diff --git a/src/core/evaluator.js b/src/core/evaluator.js index d7bccc401..a0b119777 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -1512,6 +1512,17 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { textContentItem.str.length = 0; } + function isIdenticalSetFont(name, size) { + return (textState.font && + name === textState.fontName && size === textState.fontSize); + } + + function handleBeginText() { + flushTextContentItem(); + textState.textMatrix = IDENTITY_MATRIX.slice(); + textState.textLineMatrix = IDENTITY_MATRIX.slice(); + } + function enqueueChunk() { let length = textContent.items.length; if (length > 0) { @@ -1537,6 +1548,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { task.ensureNotTerminated(); timeSlotManager.reset(); var stop, operation = {}, args = []; + let pendingBeginText = false; while (!(stop = timeSlotManager.check())) { // The arguments parsed by read() are not used beyond this loop, so // we can reuse the same array on every iteration, thus avoiding @@ -1547,16 +1559,30 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { break; } textState = stateManager.state; - var fn = operation.fn; + var fn = operation.fn | 0; args = operation.args; var advance, diff; - switch (fn | 0) { + if (pendingBeginText) { + if (fn === OPS.setFont) { + const fontNameArg = args[0].name, fontSizeArg = args[1]; + // For multiple identical Tf (setFont) commands, first check if + // the following command is Tm (setTextMatrix) before continuing. + if (isIdenticalSetFont(fontNameArg, fontSizeArg)) { + continue; + } + } + if (fn !== OPS.setTextMatrix) { + handleBeginText(); + } + pendingBeginText = false; + } + + switch (fn) { case OPS.setFont: // Optimization to ignore multiple identical Tf commands. var fontNameArg = args[0].name, fontSizeArg = args[1]; - if (textState.font && fontNameArg === textState.fontName && - fontSizeArg === textState.fontSize) { + if (isIdenticalSetFont(fontNameArg, fontSizeArg)) { break; } @@ -1644,9 +1670,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { textState.wordSpacing = args[0]; break; case OPS.beginText: - flushTextContentItem(); - textState.textMatrix = IDENTITY_MATRIX.slice(); - textState.textLineMatrix = IDENTITY_MATRIX.slice(); + // Optimization to attempt to combine separate BT/ET sequences, + // by checking the next operator(s) before flushing text content + // and resetting the text/textLine matrices (see above). + if (combineTextItems) { + pendingBeginText = true; + break; + } + + handleBeginText(); break; case OPS.showSpacedText: var items = args[0]; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index a281a9837..59475f4e8 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -72,6 +72,7 @@ !issue9458.pdf !issue9915_reduced.pdf !issue9940.pdf +!issue9984.pdf !bad-PageLabels.pdf !decodeACSuccessive.pdf !filled-background.pdf diff --git a/test/pdfs/issue9984.pdf b/test/pdfs/issue9984.pdf new file mode 100644 index 000000000..21ba81606 Binary files /dev/null and b/test/pdfs/issue9984.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index e97557e79..e146e3f44 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1352,6 +1352,13 @@ "link": false, "type": "eq" }, + { "id": "issue9984-text", + "file": "pdfs/issue9984.pdf", + "md5": "41be5f1b43f61892978cfc57c74ccf4c", + "rounds": 1, + "link": false, + "type": "text" + }, { "id": "issue8570", "file": "pdfs/issue8570.pdf", "md5": "0355731adb72df233eaa10464dcc8c51",