From af71f9b40af1e564729eca8042cc71bf6e71ac1f Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 12 Oct 2019 11:24:37 +0200 Subject: [PATCH 1/2] Inline all the possible type checks in `PartialEvaluator.hasBlendModes` to avoid unnecessary function calls For badly generated PDF documents, with issue 6961 being one example, there's well over one hundred thousand function calls being made in total for just the *two* pages. --- src/core/evaluator.js | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 46c45fd58..52c1e2c7f 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -21,7 +21,7 @@ import { } from '../shared/util'; import { CMapFactory, IdentityCMap } from './cmap'; import { - Cmd, Dict, EOF, isDict, isName, isRef, isStream, Name + Cmd, Dict, EOF, isDict, isName, isRef, isStream, Name, Ref } from './primitives'; import { ErrorFont, Font, FontFlags, getFontType, IdentityToUnicodeMap, ToUnicodeMap @@ -180,7 +180,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { }, hasBlendModes: function PartialEvaluator_hasBlendModes(resources) { - if (!isDict(resources)) { + if (!(resources instanceof Dict)) { return false; } @@ -191,33 +191,32 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var nodes = [resources], xref = this.xref; while (nodes.length) { - var key, i, ii; var node = nodes.shift(); // First check the current resources for blend modes. var graphicStates = node.get('ExtGState'); - if (isDict(graphicStates)) { + if (graphicStates instanceof Dict) { var graphicStatesKeys = graphicStates.getKeys(); - for (i = 0, ii = graphicStatesKeys.length; i < ii; i++) { - key = graphicStatesKeys[i]; + for (let i = 0, ii = graphicStatesKeys.length; i < ii; i++) { + const key = graphicStatesKeys[i]; var graphicState = graphicStates.get(key); var bm = graphicState.get('BM'); - if (isName(bm) && bm.name !== 'Normal') { + if ((bm instanceof Name) && bm.name !== 'Normal') { return true; } } } // Descend into the XObjects to look for more resources and blend modes. var xObjects = node.get('XObject'); - if (!isDict(xObjects)) { + if (!(xObjects instanceof Dict)) { continue; } var xObjectsKeys = xObjects.getKeys(); - for (i = 0, ii = xObjectsKeys.length; i < ii; i++) { - key = xObjectsKeys[i]; + for (let i = 0, ii = xObjectsKeys.length; i < ii; i++) { + const key = xObjectsKeys[i]; var xObject = xObjects.getRaw(key); - if (isRef(xObject)) { + if (xObject instanceof Ref) { if (processed[xObject.toString()]) { // The XObject has already been processed, and by avoiding a // redundant `xref.fetch` we can *significantly* reduce the load @@ -231,14 +230,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } if (xObject.dict.objId) { if (processed[xObject.dict.objId]) { - // stream has objId and is processed already - continue; + continue; // Stream has objId and was processed already. } processed[xObject.dict.objId] = true; } var xResources = xObject.dict.get('Resources'); // Checking objId to detect an infinite loop. - if (isDict(xResources) && + if ((xResources instanceof Dict) && (!xResources.objId || !processed[xResources.objId])) { nodes.push(xResources); if (xResources.objId) { From bfcbf2d78d835133f958eb22303a3f6a16c1542c Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 12 Oct 2019 11:44:07 +0200 Subject: [PATCH 2/2] Cache processed 'ExtGState's in `PartialEvaluator.hasBlendModes` to avoid unnecessary parsing/lookups This simply extends the already existing caching of processed resources to avoid duplicated parsing of 'ExtGState's, which should help with badly generated PDF documents. This patch was tested using the PDF file from issue 6961, i.e. https://github.com/mozilla/pdf.js/files/121712/test.pdf, with the following manifest file: ``` [ { "id": "issue6961", "file": "../web/pdfs/issue6961.pdf", "md5": "", "rounds": 200, "type": "eq" } ] ``` which gave the following *overall* results when comparing this patch against the `master` branch: ``` -- Grouped By browser, stat -- browser | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ------- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- Firefox | Overall | 400 | 1063 | 1051 | -12 | -1.17 | faster Firefox | Page Request | 400 | 552 | 543 | -9 | -1.69 | faster Firefox | Rendering | 400 | 511 | 508 | -3 | -0.61 | ``` and the following *page-specific* results: ``` -- Grouped By page, stat -- page | stat | Count | Baseline(ms) | Current(ms) | +/- | % | Result(P<.05) ---- | ------------ | ----- | ------------ | ----------- | --- | ----- | ------------- 0 | Overall | 200 | 1122 | 1110 | -12 | -1.03 | 0 | Page Request | 200 | 552 | 544 | -8 | -1.48 | faster 0 | Rendering | 200 | 570 | 566 | -4 | -0.62 | 1 | Overall | 200 | 1005 | 992 | -13 | -1.33 | faster 1 | Page Request | 200 | 552 | 542 | -11 | -1.91 | faster 1 | Rendering | 200 | 452 | 450 | -3 | -0.61 | ``` --- src/core/evaluator.js | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 52c1e2c7f..91d7b610d 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -199,7 +199,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { for (let i = 0, ii = graphicStatesKeys.length; i < ii; i++) { const key = graphicStatesKeys[i]; - var graphicState = graphicStates.get(key); + let graphicState = graphicStates.getRaw(key); + if (graphicState instanceof Ref) { + if (processed[graphicState.toString()]) { + continue; // The ExtGState has already been processed. + } + graphicState = xref.fetch(graphicState); + } + if (!(graphicState instanceof Dict)) { + continue; + } + if (graphicState.objId) { + processed[graphicState.objId] = true; + } var bm = graphicState.get('BM'); if ((bm instanceof Name) && bm.name !== 'Normal') { return true;