diff --git a/src/core/evaluator.js b/src/core/evaluator.js index ec7e9dcfa..6c4d33fe9 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -920,9 +920,26 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { items: [], styles: Object.create(null) }; - var bidiTexts = textContent.items; + var textContentItem = { + initialized: false, + str: [], + width: 0, + height: 0, + vertical: false, + lastAdvanceWidth: 0, + lastAdvanceHeight: 0, + textAdvanceScale: 0, + spaceWidth: 0, + fakeSpaceMin: Infinity, + fakeMultiSpaceMin: Infinity, + fakeMultiSpaceMax: -0, + textRunBreakAllowed: false, + transform: null, + fontName: null + }; var SPACE_FACTOR = 0.3; var MULTI_SPACE_FACTOR = 1.5; + var MULTI_SPACE_FACTOR_MAX = 4; var self = this; var xref = this.xref; @@ -937,7 +954,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var textState; - function newTextChunk() { + function ensureTextContentItem() { + if (textContentItem.initialized) { + return textContentItem; + } var font = textState.font; if (!(font.loadedName in textContent.styles)) { textContent.styles[font.loadedName] = { @@ -947,24 +967,79 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { vertical: font.vertical }; } - return { - // |str| is initially an array which we push individual chars to, and - // then runBidi() overwrites it with the final string. - str: [], - dir: null, - width: 0, - height: 0, - transform: null, - fontName: font.loadedName - }; + textContentItem.fontName = font.loadedName; + + // 9.4.4 Text Space Details + var tsm = [textState.fontSize * textState.textHScale, 0, + 0, textState.fontSize, + 0, textState.textRise]; + + if (font.isType3Font && + textState.fontMatrix !== FONT_IDENTITY_MATRIX && + textState.fontSize === 1) { + var glyphHeight = font.bbox[3] - font.bbox[1]; + if (glyphHeight > 0) { + glyphHeight = glyphHeight * textState.fontMatrix[3]; + tsm[3] *= glyphHeight; + } + } + + var trm = Util.transform(textState.ctm, + Util.transform(textState.textMatrix, tsm)); + textContentItem.transform = trm; + if (!font.vertical) { + textContentItem.width = 0; + textContentItem.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]); + textContentItem.vertical = false; + } else { + textContentItem.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]); + textContentItem.height = 0; + textContentItem.vertical = true; + } + + var a = textState.textLineMatrix[0]; + var b = textState.textLineMatrix[1]; + var scaleLineX = Math.sqrt(a * a + b * b); + a = textState.ctm[0]; + b = textState.ctm[1]; + var scaleCtmX = Math.sqrt(a * a + b * b); + textContentItem.textAdvanceScale = scaleCtmX * scaleLineX; + textContentItem.lastAdvanceWidth = 0; + textContentItem.lastAdvanceHeight = 0; + + var spaceWidth = font.spaceWidth / 1000 * textState.fontSize; + if (spaceWidth) { + textContentItem.spaceWidth = spaceWidth; + textContentItem.fakeSpaceMin = spaceWidth * SPACE_FACTOR; + textContentItem.fakeMultiSpaceMin = spaceWidth * MULTI_SPACE_FACTOR; + textContentItem.fakeMultiSpaceMax = + spaceWidth * MULTI_SPACE_FACTOR_MAX; + // It's okay for monospace fonts to fake as much space as needed. + textContentItem.textRunBreakAllowed = !font.isMonospace; + } else { + textContentItem.spaceWidth = 0; + textContentItem.fakeSpaceMin = Infinity; + textContentItem.fakeMultiSpaceMin = Infinity; + textContentItem.fakeMultiSpaceMax = 0; + textContentItem.textRunBreakAllowed = false; + } + + + textContentItem.initialized = true; + return textContentItem; } - function runBidi(textChunk) { + function runBidiTransform(textChunk) { var str = textChunk.str.join(''); - var bidiResult = PDFJS.bidi(str, -1, textState.font.vertical); - textChunk.str = bidiResult.str; - textChunk.dir = bidiResult.dir; - return textChunk; + var bidiResult = PDFJS.bidi(str, -1, textChunk.vertical); + return { + str: bidiResult.str, + dir: bidiResult.dir, + width: textChunk.width, + height: textChunk.height, + transform: textChunk.transform, + fontName: textChunk.fontName + }; } function handleSetFont(fontName, fontRef) { @@ -976,33 +1051,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }); } - function buildTextGeometry(chars, textChunk) { + function buildTextContentItem(chars) { var font = textState.font; - textChunk = textChunk || newTextChunk(); - if (!textChunk.transform) { - // 9.4.4 Text Space Details - var tsm = [textState.fontSize * textState.textHScale, 0, - 0, textState.fontSize, - 0, textState.textRise]; - - if (font.isType3Font && - textState.fontMatrix !== FONT_IDENTITY_MATRIX && - textState.fontSize === 1) { - var glyphHeight = font.bbox[3] - font.bbox[1]; - if (glyphHeight > 0) { - glyphHeight = glyphHeight * textState.fontMatrix[3]; - tsm[3] *= glyphHeight; - } - } - - var trm = textChunk.transform = Util.transform(textState.ctm, - Util.transform(textState.textMatrix, tsm)); - if (!font.vertical) { - textChunk.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]); - } else { - textChunk.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]); - } - } + var textChunk = ensureTextContentItem(); var width = 0; var height = 0; var glyphs = font.charsToGlyphs(chars); @@ -1049,8 +1100,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var wordSpacing = textState.wordSpacing; charSpacing += wordSpacing; if (wordSpacing > 0) { - addFakeSpaces(wordSpacing * 1000 / textState.fontSize, - textChunk.str); + addFakeSpaces(wordSpacing, textChunk.str); } } @@ -1071,36 +1121,41 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { textChunk.str.push(glyphUnicode); } - var a = textState.textLineMatrix[0]; - var b = textState.textLineMatrix[1]; - var scaleLineX = Math.sqrt(a * a + b * b); - a = textState.ctm[0]; - b = textState.ctm[1]; - var scaleCtmX = Math.sqrt(a * a + b * b); if (!font.vertical) { - textChunk.width += width * scaleCtmX * scaleLineX; + textChunk.lastAdvanceWidth = width; + textChunk.width += width * textChunk.textAdvanceScale; } else { - textChunk.height += Math.abs(height * scaleCtmX * scaleLineX); + textChunk.lastAdvanceHeight = height; + textChunk.height += Math.abs(height * textChunk.textAdvanceScale); } + return textChunk; } function addFakeSpaces(width, strBuf) { - var spaceWidth = textState.font.spaceWidth; - if (spaceWidth <= 0) { + if (width < textContentItem.fakeSpaceMin) { return; } - var fakeSpaces = width / spaceWidth; - if (fakeSpaces > MULTI_SPACE_FACTOR) { - fakeSpaces = Math.round(fakeSpaces); - while (fakeSpaces--) { - strBuf.push(' '); - } - } else if (fakeSpaces > SPACE_FACTOR) { + if (width < textContentItem.fakeMultiSpaceMin) { + strBuf.push(' '); + return; + } + var fakeSpaces = Math.round(width / textContentItem.spaceWidth); + while (fakeSpaces-- > 0) { strBuf.push(' '); } } + function flushTextContentItem() { + if (!textContentItem.initialized) { + return; + } + textContent.items.push(runBidiTransform(textContentItem)); + + textContentItem.initialized = false; + textContentItem.str.length = 0; + } + var timeSlotManager = new TimeSlotManager(); return new Promise(function next(resolve, reject) { @@ -1119,35 +1174,62 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { textState = stateManager.state; var fn = operation.fn; args = operation.args; + var advance; switch (fn | 0) { case OPS.setFont: + flushTextContentItem(); textState.fontSize = args[1]; return handleSetFont(args[0].name).then(function() { next(resolve, reject); }, reject); case OPS.setTextRise: + flushTextContentItem(); textState.textRise = args[0]; break; case OPS.setHScale: + flushTextContentItem(); textState.textHScale = args[0] / 100; break; case OPS.setLeading: + flushTextContentItem(); textState.leading = args[0]; break; case OPS.moveText: + // Optimization to treat same line movement as advance + var isSameTextLine = !textState.font ? false : + ((textState.font.vertical ? args[0] : args[1]) === 0); + advance = args[0] - args[1]; + if (isSameTextLine && textContentItem.initialized && + advance > 0 && + advance <= textContentItem.fakeMultiSpaceMax) { + textState.translateTextLineMatrix(args[0], args[1]); + textContentItem.width += + (args[0] - textContentItem.lastAdvanceWidth); + textContentItem.height += + (args[1] - textContentItem.lastAdvanceHeight); + var diff = (args[0] - textContentItem.lastAdvanceWidth) - + (args[1] - textContentItem.lastAdvanceHeight); + addFakeSpaces(diff, textContentItem.str); + break; + } + + flushTextContentItem(); textState.translateTextLineMatrix(args[0], args[1]); textState.textMatrix = textState.textLineMatrix.slice(); break; case OPS.setLeadingMoveText: + flushTextContentItem(); textState.leading = -args[1]; textState.translateTextLineMatrix(args[0], args[1]); textState.textMatrix = textState.textLineMatrix.slice(); break; case OPS.nextLine: + flushTextContentItem(); textState.carriageReturn(); break; case OPS.setTextMatrix: + flushTextContentItem(); textState.setTextMatrix(args[0], args[1], args[2], args[3], args[4], args[5]); textState.setTextLineMatrix(args[0], args[1], args[2], args[3], @@ -1160,17 +1242,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { textState.wordSpacing = args[0]; break; case OPS.beginText: + flushTextContentItem(); textState.textMatrix = IDENTITY_MATRIX.slice(); textState.textLineMatrix = IDENTITY_MATRIX.slice(); break; case OPS.showSpacedText: var items = args[0]; - var textChunk = newTextChunk(); var offset; for (var j = 0, jj = items.length; j < jj; j++) { if (typeof items[j] === 'string') { - buildTextGeometry(items[j], textChunk); + buildTextContentItem(items[j]); } else { + ensureTextContentItem(); + // PDF Specification 5.3.2 states: // The number is expressed in thousandths of a unit of text // space. @@ -1179,45 +1263,57 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { // In the default coordinate system, a positive adjustment // has the effect of moving the next glyph painted either to // the left or down by the given amount. - var advance = items[j]; - var val = advance * textState.fontSize / 1000; + advance = items[j] * textState.fontSize / 1000; + var breakTextRun = false; if (textState.font.vertical) { - offset = val * + offset = advance * (textState.textHScale * textState.textMatrix[2] + textState.textMatrix[3]); - textState.translateTextMatrix(0, val); - // Value needs to be added to height to paint down. - textChunk.height += offset; + textState.translateTextMatrix(0, advance); + breakTextRun = textContentItem.textRunBreakAllowed && + advance > textContentItem.fakeMultiSpaceMax; + if (!breakTextRun) { + // Value needs to be added to height to paint down. + textContentItem.height += offset; + } } else { - offset = val * ( + advance = -advance; + offset = advance * ( textState.textHScale * textState.textMatrix[0] + textState.textMatrix[1]); - textState.translateTextMatrix(-val, 0); - // Value needs to be subtracted from width to paint left. - textChunk.width -= offset; - advance = -advance; + textState.translateTextMatrix(advance, 0); + breakTextRun = textContentItem.textRunBreakAllowed && + advance > textContentItem.fakeMultiSpaceMax; + if (!breakTextRun) { + // Value needs to be subtracted from width to paint left. + textContentItem.width += offset; + } } - if (advance > 0) { - addFakeSpaces(advance, textChunk.str); + if (breakTextRun) { + flushTextContentItem(); + } else if (advance > 0) { + addFakeSpaces(advance, textContentItem.str); } } } - bidiTexts.push(runBidi(textChunk)); break; case OPS.showText: - bidiTexts.push(runBidi(buildTextGeometry(args[0]))); + buildTextContentItem(args[0]); break; case OPS.nextLineShowText: + flushTextContentItem(); textState.carriageReturn(); - bidiTexts.push(runBidi(buildTextGeometry(args[0]))); + buildTextContentItem(args[0]); break; case OPS.nextLineSetSpacingShowText: + flushTextContentItem(); textState.wordSpacing = args[0]; textState.charSpacing = args[1]; textState.carriageReturn(); - bidiTexts.push(runBidi(buildTextGeometry(args[2]))); + buildTextContentItem(args[2]); break; case OPS.paintXObject: + flushTextContentItem(); if (args[0].code) { break; } @@ -1229,7 +1325,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var name = args[0].name; if (xobjsCache.key === name) { if (xobjsCache.texts) { - Util.appendToArray(bidiTexts, xobjsCache.texts.items); + Util.appendToArray(textContent.items, xobjsCache.texts.items); Util.extendObj(textContent.styles, xobjsCache.texts.styles); } break; @@ -1260,7 +1356,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return self.getTextContent(xobj, task, xobj.dict.get('Resources') || resources, stateManager). then(function (formTextContent) { - Util.appendToArray(bidiTexts, formTextContent.items); + Util.appendToArray(textContent.items, formTextContent.items); Util.extendObj(textContent.styles, formTextContent.styles); stateManager.restore(); @@ -1270,6 +1366,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { next(resolve, reject); }, reject); case OPS.setGState: + flushTextContentItem(); var dictName = args[0]; var extGState = resources.get('ExtGState'); @@ -1300,6 +1397,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }, reject); return; } + flushTextContentItem(); resolve(textContent); }); }, diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index f0155bea8..ee5c9c3a2 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -137,6 +137,7 @@ !vertical.pdf !ZapfDingbats.pdf !bug878026.pdf +!issue1045.pdf !issue5010.pdf !issue4934.pdf !issue4650.pdf diff --git a/test/pdfs/issue1045.pdf b/test/pdfs/issue1045.pdf new file mode 100644 index 000000000..ca7f5b7ae Binary files /dev/null and b/test/pdfs/issue1045.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 5d1c0fb82..5cd954227 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -607,6 +607,13 @@ "link": false, "type": "load" }, + { "id": "issue1045", + "file": "pdfs/issue1045.pdf", + "md5": "61d7e9bfbc03cd457dcefeab3e78a687", + "rounds": 1, + "link": false, + "type": "text" + }, { "id": "issue5677", "file": "pdfs/issue5677.pdf", "md5": "c9101578fcb806269145132724d24ac1",