From 4ce5e520fb3f078fc732358e05c44cf66b62a178 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Wed, 24 May 2017 17:36:39 +0200 Subject: [PATCH] Add different code-paths to `{CMap, ToUnicodeMap}.charCodeOf` depending on length, since `Array.prototype.indexOf` can be extremely inefficient for very large arrays (issue 8372) Fixes 8372. --- src/core/cmap.js | 22 ++++++--- src/core/fonts.js | 15 +++++- test/pdfs/.gitignore | 1 + test/pdfs/issue8372.pdf | 101 ++++++++++++++++++++++++++++++++++++++++ test/test_manifest.json | 7 +++ 5 files changed, 138 insertions(+), 8 deletions(-) create mode 100644 test/pdfs/issue8372.pdf diff --git a/src/core/cmap.js b/src/core/cmap.js index d6006c887..0c9faa4e7 100644 --- a/src/core/cmap.js +++ b/src/core/cmap.js @@ -283,24 +283,34 @@ var CMap = (function CMapClosure() { // indices in the *billions*. For such tables we use for..in, which isn't // ideal because it stringifies the indices for all present elements, but // it does avoid iterating over every undefined entry. - var map = this._map; - var length = map.length; - var i; + let map = this._map; + let length = map.length; if (length <= 0x10000) { - for (i = 0; i < length; i++) { + for (let i = 0; i < length; i++) { if (map[i] !== undefined) { callback(i, map[i]); } } } else { - for (i in this._map) { + for (let i in map) { callback(i, map[i]); } } }, charCodeOf(value) { - return this._map.indexOf(value); + // `Array.prototype.indexOf` is *extremely* inefficient for arrays which + // are both very sparse and very large (see issue8372.pdf). + let map = this._map; + if (map.length <= 0x10000) { + return map.indexOf(value); + } + for (let charCode in map) { + if (map[charCode] === value) { + return (charCode | 0); + } + } + return -1; }, getMap() { diff --git a/src/core/fonts.js b/src/core/fonts.js index 3e24b30f9..3a2d361b4 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -283,8 +283,19 @@ var ToUnicodeMap = (function ToUnicodeMapClosure() { return this._map[i]; }, - charCodeOf(v) { - return this._map.indexOf(v); + charCodeOf(value) { + // `Array.prototype.indexOf` is *extremely* inefficient for arrays which + // are both very sparse and very large (see issue8372.pdf). + let map = this._map; + if (map.length <= 0x10000) { + return map.indexOf(value); + } + for (let charCode in map) { + if (map[charCode] === value) { + return (charCode | 0); + } + } + return -1; }, amend(map) { diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index c19301013..abaf1da18 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -50,6 +50,7 @@ !issue8061.pdf !issue8088.pdf !issue8125.pdf +!issue8372.pdf !issue8424.pdf !bad-PageLabels.pdf !filled-background.pdf diff --git a/test/pdfs/issue8372.pdf b/test/pdfs/issue8372.pdf new file mode 100644 index 000000000..340639b78 --- /dev/null +++ b/test/pdfs/issue8372.pdf @@ -0,0 +1,101 @@ +%PDF-1.7 +%âãÏÓ +1 0 obj +<< +/BaseFont /AdobeHeitiStd-Regular +/CIDSystemInfo 2 0 R +/Subtype /CIDFontType2 +/FontDescriptor 3 0 R +/Type /Font +>> +endobj +3 0 obj +<< +/FontName /AdobeHeitiStd-Regular +/StemV 56 +/Ascent 967 +/Flags 32 +/Descent -283 +/ItalicAngle 0 +/FontBBox [-163 -283 1087 967] +/Type /FontDescriptor +/CapHeight 763 +>> +endobj +2 0 obj +<< +/Supplement 4 +/Ordering (GB1) +/Registry (Adobe) +>> +endobj +4 0 obj +<< +/Pages 5 0 R +/Type /Catalog +>> +endobj +5 0 obj +<< +/MediaBox [0 0 200 50] +/Kids [6 0 R] +/Count 1 +/Type /Pages +>> +endobj +6 0 obj +<< +/Parent 5 0 R +/MediaBox [0 0 200 50] +/Resources +<< +/Font +<< +/F1 7 0 R +>> +>> +/Contents 8 0 R +/Type /Page +>> +endobj +7 0 obj +<< +/DescendantFonts [1 0 R] +/BaseFont /AdobeHeitiStd-Regular +/Subtype /Type0 +/Encoding /UniGB-UTF16-H +/Type /Font +>> +endobj +8 0 obj +<< +/Length 46 +>> +stream +BT +10 20 TD +/F1 20 Tf +[(vî)0.389893(_U)]TJ +ET + +endstream +endobj xref +0 9 +0000000000 65535 f +0000000015 00000 n +0000000334 00000 n +0000000149 00000 n +0000000405 00000 n +0000000456 00000 n +0000000538 00000 n +0000000667 00000 n +0000000801 00000 n +trailer + +<< +/Root 4 0 R +/Size 9 +>> +startxref +899 +%%EOF diff --git a/test/test_manifest.json b/test/test_manifest.json index 25b4a34c3..2b07ae514 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -2125,6 +2125,13 @@ "link": false, "type": "text" }, + { "id": "issue8372-text", + "file": "pdfs/issue8372.pdf", + "md5": "b02fb07364dd00ad5044bd259860da97", + "rounds": 1, + "link": false, + "type": "text" + }, { "id": "bug894572", "file": "pdfs/bug894572.pdf", "md5": "e54a6b0451939f685ed37e3d46e16158",