diff --git a/src/fonts.js b/src/fonts.js index 78e6a8994..10c0819f7 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -17,7 +17,7 @@ /* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset, ExpertSubsetCharset, FileReaderSync, globalScope, GlyphsUnicode, info, isArray, isNum, ISOAdobeCharset, isWorker, PDFJS, Stream, - stringToBytes, TextDecoder, warn */ + stringToBytes, TextDecoder, TODO, warn */ 'use strict'; @@ -411,6 +411,7 @@ var CMapConverterList = { 'V': jis7ToUnicode, 'EUC-H': eucjpToUnicode, 'EUC-V': eucjpToUnicode, + '83pv-RKSJ-H': sjis83pvToUnicode, '90ms-RKSJ-H': sjisToUnicode, '90ms-RKSJ-V': sjisToUnicode, '90msp-RKSJ-H': sjisToUnicode, @@ -437,8 +438,8 @@ var decodeBytes; if (typeof TextDecoder !== 'undefined') { // The encodings supported by TextDecoder can be found at: // http://encoding.spec.whatwg.org/#concept-encoding-get - decodeBytes = function(bytes, encoding) { - return new TextDecoder(encoding).decode(bytes); + decodeBytes = function(bytes, encoding, fatal) { + return new TextDecoder(encoding, {fatal: !!fatal}).decode(bytes); }; } else if (typeof FileReaderSync !== 'undefined') { decodeBytes = function(bytes, encoding) { @@ -466,6 +467,18 @@ function sjisToUnicode(str) { return decodeBytes(stringToBytes(str), 'shift_jis'); } +function sjis83pvToUnicode(str) { + var bytes = stringToBytes(str); + try { + // TODO: 83pv has incompatible mappings in ed40..ee9c range. + return decodeBytes(bytes, 'shift_jis', true); + } catch (e) { + TODO('Unsupported 83pv character found'); + // Just retry without checking errors for now. + return decodeBytes(bytes, 'shift_jis'); + } +} + function gbkToUnicode(str) { return decodeBytes(stringToBytes(str), 'gbk'); } @@ -4464,9 +4477,21 @@ var Font = (function FontClosure() { switch (this.type) { case 'CIDFontType0': - case 'CIDFontType2': var cid = this.unicodeToCID[charcode] || charcode; + if (this.unicodeToCID.length > 0) { + width = this.widths[cid]; + vmetric = this.vmetrics && this.vmetrics[cid]; + } if (this.noUnicodeAdaptation) { + fontCharCode = this.toFontChar[charcode] || charcode; + break; + } + // CIDFontType0 is not encoded in Unicode. + fontCharCode = this.toFontChar[cid] || cid; + break; + case 'CIDFontType2': + if (this.unicodeToCID.length > 0) { + var cid = this.unicodeToCID[charcode] || charcode; width = this.widths[cid]; vmetric = this.vmetrics && this.vmetrics[cid]; } diff --git a/test/pdfs/SFAA_Japanese.pdf.link b/test/pdfs/SFAA_Japanese.pdf.link new file mode 100644 index 000000000..32abe3dbe --- /dev/null +++ b/test/pdfs/SFAA_Japanese.pdf.link @@ -0,0 +1 @@ +http://www.project2061.org/publications/sfaa/SFAA_Japanese.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index ef9d9f0c7..2d71a2158 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -930,6 +930,15 @@ "rounds": 1, "type": "eq" }, + { "id": "sfaa_japanese", + "file": "pdfs/SFAA_Japanese.pdf", + "md5": "b961bbc0d05bdd6d91041bca60ec8e8b", + "rounds": 1, + "link": true, + "firstPage": 1, + "lastPage": 1, + "type": "eq" + }, { "id": "vertical", "file": "pdfs/vertical.pdf", "md5": "8a74d33504701edcefeef2afd022765e",