From 77c6ed53890fbfc8918e5e03afe4a2f137b73e04 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Tue, 31 May 2016 23:01:35 +0200 Subject: [PATCH] Attempt to ignore multiple identical Tf (setFont) commands in `PartialEvaluator_getTextContent` (issue 5808) This patch improves the performance of issue 5808, but I'm not sure if it's enough to call it fixed. On average, this patch reduces the number of textLayer div's by a factor of 3, and it also reduces the time spend in `getTextContent` by a factor of ~2. The PDF file is generated by `Scribus PDF`, which for reasons I cannot understand is placing redundant `Tf` commands before *every* showText command. Note how the PDF file also contains lots of (basically) identical fonts, but with slightly different names, which causes unnecessary font-switching. This causes some unnecessary breaking of textLayer div's, but this issue cannot be easily worked around. --- src/core/evaluator.js | 14 +++- test/pdfs/.gitignore | 1 + test/pdfs/issue5808.pdf | 149 ++++++++++++++++++++++++++++++++++++++++ test/test_manifest.json | 7 ++ 4 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 test/pdfs/issue5808.pdf diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 904ae6cd2..0165c4ddf 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -1423,9 +1423,17 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { switch (fn | 0) { case OPS.setFont: + // Optimization to ignore multiple identical Tf commands. + var fontNameArg = args[0].name, fontSizeArg = args[1]; + if (textState.font && fontNameArg === textState.fontName && + fontSizeArg === textState.fontSize) { + break; + } + flushTextContentItem(); - textState.fontSize = args[1]; - next(handleSetFont(args[0].name, null)); + textState.fontName = fontNameArg; + textState.fontSize = fontSizeArg; + next(handleSetFont(fontNameArg, null)); return; case OPS.setTextRise: flushTextContentItem(); @@ -1643,6 +1651,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } var gStateFont = gState.get('Font'); if (gStateFont) { + textState.fontName = null; textState.fontSize = gStateFont[1]; next(handleSetFont(null, gStateFont[0])); return; @@ -2562,6 +2571,7 @@ var StateManager = (function StateManagerClosure() { var TextState = (function TextStateClosure() { function TextState() { this.ctm = new Float32Array(IDENTITY_MATRIX); + this.fontName = null; this.fontSize = 0; this.font = null; this.fontMatrix = FONT_IDENTITY_MATRIX; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 6cbda038d..6a5f3879f 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -18,6 +18,7 @@ !issue5946.pdf !issue5972.pdf !issue5874.pdf +!issue5808.pdf !issue6204.pdf !issue6782.pdf !issue6961.pdf diff --git a/test/pdfs/issue5808.pdf b/test/pdfs/issue5808.pdf new file mode 100644 index 000000000..56823b021 --- /dev/null +++ b/test/pdfs/issue5808.pdf @@ -0,0 +1,149 @@ +%PDF-1.7 +%âãÏÓ +1 0 obj +<< +/Pages 2 0 R +/Type /Catalog +>> +endobj +2 0 obj +<< +/Kids [3 0 R] +/Count 1 +/Type /Pages +>> +endobj +3 0 obj +<< +/Parent 2 0 R +/MediaBox [0 0 300 50] +/Resources +<< +/Font +<< +/F1 4 0 R +>> +>> +/Contents 5 0 R +/Type /Page +>> +endobj +4 0 obj +<< +/BaseFont /Times-Roman +/Subtype /Type1 +/Encoding /WinAnsiEncoding +/Type /Font +>> +endobj +5 0 obj +<< +/Length 729 +>> +stream +BT +10 20 TD +/F1 14 Tf +(I) Tj +/F1 14 Tf +(s) Tj +/F1 14 Tf +(s) Tj +/F1 14 Tf +(u) Tj +/F1 14 Tf +(e) Tj +/F1 14 Tf +( ) Tj +/F1 14 Tf +(5) Tj +/F1 14 Tf +(8) Tj +/F1 14 Tf +(0) Tj +/F1 14 Tf +(8) Tj +/F1 14 Tf +( ) Tj +/F1 14 Tf +(-) Tj +/F1 14 Tf +( ) Tj +/F1 14 Tf +(A) Tj +/F1 14 Tf +( ) Tj +/F1 14 Tf +(T) Tj +/F1 14 Tf +(f) Tj +/F1 14 Tf +( ) Tj +/F1 14 Tf +(c) Tj +/F1 14 Tf +(m) Tj +/F1 14 Tf +(d) Tj +/F1 14 Tf +( ) Tj +/F1 14 Tf +(b) Tj +/F1 14 Tf +(e) Tj +/F1 14 Tf +(f) Tj +/F1 14 Tf +(o) Tj +/F1 14 Tf +(r) Tj +/F1 14 Tf +(e) Tj +/F1 14 Tf +( ) Tj +/F1 14 Tf +(e) Tj +/F1 14 Tf +(v) Tj +/F1 14 Tf +(e) Tj +/F1 14 Tf +(r) Tj +/F1 14 Tf +(y) Tj +/F1 14 Tf +( ) Tj +/F1 14 Tf +(T) Tj +/F1 14 Tf +(j) Tj +/F1 14 Tf +( ) Tj +/F1 14 Tf +(c) Tj +/F1 14 Tf +(m) Tj +/F1 14 Tf +(d) Tj +/F1 14 Tf +(.) Tj +ET + +endstream +endobj xref +0 6 +0000000000 65535 f +0000000015 00000 n +0000000066 00000 n +0000000125 00000 n +0000000254 00000 n +0000000355 00000 n +trailer + +<< +/Root 1 0 R +/Size 6 +>> +startxref +1137 +%%EOF diff --git a/test/test_manifest.json b/test/test_manifest.json index 8816c1706..971e9e4e6 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1161,6 +1161,13 @@ "type": "eq", "about": "Please note that this file currently renders incorrectly." }, + { "id": "issue5808-text", + "file": "pdfs/issue5808.pdf", + "md5": "e0584dd540d7859d6c191aa53379692e", + "rounds": 1, + "link": false, + "type": "text" + }, { "id": "issue6962", "file": "pdfs/issue6962.pdf", "md5": "d40e871ecca68baf93114bd28c782148",