Attempt to ignore multiple identical Tf (setFont) commands in PartialEvaluator_getTextContent (issue 5808)

This patch improves the performance of issue 5808, but I'm not sure if it's enough to call it fixed. On average, this patch reduces the number of textLayer div's by a factor of 3, and it also reduces the time spend in `getTextContent` by a factor of ~2.

The PDF file is generated by `Scribus PDF`, which for reasons I cannot understand is placing redundant `Tf` commands before *every* showText command.
Note how the PDF file also contains lots of (basically) identical fonts, but with slightly different names, which causes unnecessary font-switching. This causes some unnecessary breaking of textLayer div's, but this issue cannot be easily worked around.
This commit is contained in:
Jonas Jenwald 2016-05-31 23:01:35 +02:00
parent 19105f0669
commit 77c6ed5389
4 changed files with 169 additions and 2 deletions

View File

@ -1423,9 +1423,17 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
switch (fn | 0) {
case OPS.setFont:
// Optimization to ignore multiple identical Tf commands.
var fontNameArg = args[0].name, fontSizeArg = args[1];
if (textState.font && fontNameArg === textState.fontName &&
fontSizeArg === textState.fontSize) {
break;
}
flushTextContentItem();
textState.fontSize = args[1];
next(handleSetFont(args[0].name, null));
textState.fontName = fontNameArg;
textState.fontSize = fontSizeArg;
next(handleSetFont(fontNameArg, null));
return;
case OPS.setTextRise:
flushTextContentItem();
@ -1643,6 +1651,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
var gStateFont = gState.get('Font');
if (gStateFont) {
textState.fontName = null;
textState.fontSize = gStateFont[1];
next(handleSetFont(null, gStateFont[0]));
return;
@ -2562,6 +2571,7 @@ var StateManager = (function StateManagerClosure() {
var TextState = (function TextStateClosure() {
function TextState() {
this.ctm = new Float32Array(IDENTITY_MATRIX);
this.fontName = null;
this.fontSize = 0;
this.font = null;
this.fontMatrix = FONT_IDENTITY_MATRIX;

View File

@ -18,6 +18,7 @@
!issue5946.pdf
!issue5972.pdf
!issue5874.pdf
!issue5808.pdf
!issue6204.pdf
!issue6782.pdf
!issue6961.pdf

149
test/pdfs/issue5808.pdf Normal file
View File

@ -0,0 +1,149 @@
%PDF-1.7
%âãÏÓ
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
2 0 obj
<<
/Kids [3 0 R]
/Count 1
/Type /Pages
>>
endobj
3 0 obj
<<
/Parent 2 0 R
/MediaBox [0 0 300 50]
/Resources
<<
/Font
<<
/F1 4 0 R
>>
>>
/Contents 5 0 R
/Type /Page
>>
endobj
4 0 obj
<<
/BaseFont /Times-Roman
/Subtype /Type1
/Encoding /WinAnsiEncoding
/Type /Font
>>
endobj
5 0 obj
<<
/Length 729
>>
stream
BT
10 20 TD
/F1 14 Tf
(I) Tj
/F1 14 Tf
(s) Tj
/F1 14 Tf
(s) Tj
/F1 14 Tf
(u) Tj
/F1 14 Tf
(e) Tj
/F1 14 Tf
( ) Tj
/F1 14 Tf
(5) Tj
/F1 14 Tf
(8) Tj
/F1 14 Tf
(0) Tj
/F1 14 Tf
(8) Tj
/F1 14 Tf
( ) Tj
/F1 14 Tf
(-) Tj
/F1 14 Tf
( ) Tj
/F1 14 Tf
(A) Tj
/F1 14 Tf
( ) Tj
/F1 14 Tf
(T) Tj
/F1 14 Tf
(f) Tj
/F1 14 Tf
( ) Tj
/F1 14 Tf
(c) Tj
/F1 14 Tf
(m) Tj
/F1 14 Tf
(d) Tj
/F1 14 Tf
( ) Tj
/F1 14 Tf
(b) Tj
/F1 14 Tf
(e) Tj
/F1 14 Tf
(f) Tj
/F1 14 Tf
(o) Tj
/F1 14 Tf
(r) Tj
/F1 14 Tf
(e) Tj
/F1 14 Tf
( ) Tj
/F1 14 Tf
(e) Tj
/F1 14 Tf
(v) Tj
/F1 14 Tf
(e) Tj
/F1 14 Tf
(r) Tj
/F1 14 Tf
(y) Tj
/F1 14 Tf
( ) Tj
/F1 14 Tf
(T) Tj
/F1 14 Tf
(j) Tj
/F1 14 Tf
( ) Tj
/F1 14 Tf
(c) Tj
/F1 14 Tf
(m) Tj
/F1 14 Tf
(d) Tj
/F1 14 Tf
(.) Tj
ET
endstream
endobj xref
0 6
0000000000 65535 f
0000000015 00000 n
0000000066 00000 n
0000000125 00000 n
0000000254 00000 n
0000000355 00000 n
trailer
<<
/Root 1 0 R
/Size 6
>>
startxref
1137
%%EOF

View File

@ -1161,6 +1161,13 @@
"type": "eq",
"about": "Please note that this file currently renders incorrectly."
},
{ "id": "issue5808-text",
"file": "pdfs/issue5808.pdf",
"md5": "e0584dd540d7859d6c191aa53379692e",
"rounds": 1,
"link": false,
"type": "text"
},
{ "id": "issue6962",
"file": "pdfs/issue6962.pdf",
"md5": "d40e871ecca68baf93114bd28c782148",