Merge pull request #9986 from Snuffleupagus/issue-9984
Attempt to combine separate beginText/endText sequences in `getTextContent` (issue 9984)
This commit is contained in:
commit
c94df0fef3
@ -1510,6 +1510,17 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
textContentItem.str.length = 0;
|
textContentItem.str.length = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isIdenticalSetFont(name, size) {
|
||||||
|
return (textState.font &&
|
||||||
|
name === textState.fontName && size === textState.fontSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
function handleBeginText() {
|
||||||
|
flushTextContentItem();
|
||||||
|
textState.textMatrix = IDENTITY_MATRIX.slice();
|
||||||
|
textState.textLineMatrix = IDENTITY_MATRIX.slice();
|
||||||
|
}
|
||||||
|
|
||||||
function enqueueChunk() {
|
function enqueueChunk() {
|
||||||
let length = textContent.items.length;
|
let length = textContent.items.length;
|
||||||
if (length > 0) {
|
if (length > 0) {
|
||||||
@ -1535,6 +1546,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
task.ensureNotTerminated();
|
task.ensureNotTerminated();
|
||||||
timeSlotManager.reset();
|
timeSlotManager.reset();
|
||||||
var stop, operation = {}, args = [];
|
var stop, operation = {}, args = [];
|
||||||
|
let pendingBeginText = false;
|
||||||
while (!(stop = timeSlotManager.check())) {
|
while (!(stop = timeSlotManager.check())) {
|
||||||
// The arguments parsed by read() are not used beyond this loop, so
|
// The arguments parsed by read() are not used beyond this loop, so
|
||||||
// we can reuse the same array on every iteration, thus avoiding
|
// we can reuse the same array on every iteration, thus avoiding
|
||||||
@ -1545,16 +1557,30 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
textState = stateManager.state;
|
textState = stateManager.state;
|
||||||
var fn = operation.fn;
|
var fn = operation.fn | 0;
|
||||||
args = operation.args;
|
args = operation.args;
|
||||||
var advance, diff;
|
var advance, diff;
|
||||||
|
|
||||||
switch (fn | 0) {
|
if (pendingBeginText) {
|
||||||
|
if (fn === OPS.setFont) {
|
||||||
|
const fontNameArg = args[0].name, fontSizeArg = args[1];
|
||||||
|
// For multiple identical Tf (setFont) commands, first check if
|
||||||
|
// the following command is Tm (setTextMatrix) before continuing.
|
||||||
|
if (isIdenticalSetFont(fontNameArg, fontSizeArg)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fn !== OPS.setTextMatrix) {
|
||||||
|
handleBeginText();
|
||||||
|
}
|
||||||
|
pendingBeginText = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (fn) {
|
||||||
case OPS.setFont:
|
case OPS.setFont:
|
||||||
// Optimization to ignore multiple identical Tf commands.
|
// Optimization to ignore multiple identical Tf commands.
|
||||||
var fontNameArg = args[0].name, fontSizeArg = args[1];
|
var fontNameArg = args[0].name, fontSizeArg = args[1];
|
||||||
if (textState.font && fontNameArg === textState.fontName &&
|
if (isIdenticalSetFont(fontNameArg, fontSizeArg)) {
|
||||||
fontSizeArg === textState.fontSize) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1642,9 +1668,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
textState.wordSpacing = args[0];
|
textState.wordSpacing = args[0];
|
||||||
break;
|
break;
|
||||||
case OPS.beginText:
|
case OPS.beginText:
|
||||||
flushTextContentItem();
|
// Optimization to attempt to combine separate BT/ET sequences,
|
||||||
textState.textMatrix = IDENTITY_MATRIX.slice();
|
// by checking the next operator(s) before flushing text content
|
||||||
textState.textLineMatrix = IDENTITY_MATRIX.slice();
|
// and resetting the text/textLine matrices (see above).
|
||||||
|
if (combineTextItems) {
|
||||||
|
pendingBeginText = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
handleBeginText();
|
||||||
break;
|
break;
|
||||||
case OPS.showSpacedText:
|
case OPS.showSpacedText:
|
||||||
var items = args[0];
|
var items = args[0];
|
||||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -72,6 +72,7 @@
|
|||||||
!issue9458.pdf
|
!issue9458.pdf
|
||||||
!issue9915_reduced.pdf
|
!issue9915_reduced.pdf
|
||||||
!issue9940.pdf
|
!issue9940.pdf
|
||||||
|
!issue9984.pdf
|
||||||
!bad-PageLabels.pdf
|
!bad-PageLabels.pdf
|
||||||
!decodeACSuccessive.pdf
|
!decodeACSuccessive.pdf
|
||||||
!filled-background.pdf
|
!filled-background.pdf
|
||||||
|
BIN
test/pdfs/issue9984.pdf
Normal file
BIN
test/pdfs/issue9984.pdf
Normal file
Binary file not shown.
@ -1359,6 +1359,13 @@
|
|||||||
"link": false,
|
"link": false,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
},
|
},
|
||||||
|
{ "id": "issue9984-text",
|
||||||
|
"file": "pdfs/issue9984.pdf",
|
||||||
|
"md5": "41be5f1b43f61892978cfc57c74ccf4c",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": false,
|
||||||
|
"type": "text"
|
||||||
|
},
|
||||||
{ "id": "issue8570",
|
{ "id": "issue8570",
|
||||||
"file": "pdfs/issue8570.pdf",
|
"file": "pdfs/issue8570.pdf",
|
||||||
"md5": "0355731adb72df233eaa10464dcc8c51",
|
"md5": "0355731adb72df233eaa10464dcc8c51",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user