Support cmaps with only CID characters, when building the ToUnicode-map (issue 9367)

In this particular case the `CMap`-data that we create contains only numbers, but no strings, which causes `PartialEvaluator.readToUnicode` to create a ToUnicode-map with only empty strings. *Please note:* This is yet another case where I don't know if it's necessarily the best and most correct solution, but it does fix the referenced issue.
2021-09-18 00:01:24 +02:00 · 2021-09-18 00:01:24 +02:00 · ed73cf6d50
commit ed73cf6d50
parent 7082ff9bf8
3 changed files with 14 additions and 0 deletions
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@ -3455,6 +3455,11 @@ class PartialEvaluator {
          // NOTE: cmap can be a sparse array, so use forEach instead of
          // `for(;;)` to iterate over all keys.
          cmap.forEach(function (charCode, token) {
+            // Some cmaps contain *only* CID characters (fixes issue9367.pdf).
+            if (typeof token === "number") {
+              map[charCode] = String.fromCodePoint(token);
+              return;
+            }
            const str = [];
            for (let k = 0; k < token.length; k += 2) {
              const w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
--- a/test/pdfs/issue9367.pdf.link
+++ b/test/pdfs/issue9367.pdf.link
@ -0,0 +1 @@
+https://github.com/mozilla/pdf.js/files/1634833/E-WG18_105147.pdf
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@ -2358,6 +2358,14 @@
       "lastPage": 2,
       "type": "eq"
    },
+    {  "id": "issue9367",
+       "file": "pdfs/issue9367.pdf",
+       "md5": "81a2c6f1fe5d1bb00ff0479aa6547155",
+       "rounds": 1,
+       "link": true,
+       "lastPage": 1,
+       "type": "eq"
+    },
    {  "id": "issue10529",
       "file": "pdfs/issue10529.pdf",
       "md5": "1a4d404a137c610ff0c747cbea3b8666",
				`@ -0,0 +1 @@`
				`https://github.com/mozilla/pdf.js/files/1634833/E-WG18_105147.pdf`