From 441326297626af6359554c89e9e1931b210b5148 Mon Sep 17 00:00:00 2001 From: vyv03354 Date: Tue, 26 Feb 2013 21:06:07 +0900 Subject: [PATCH] Implement "83pv" CMap and fix CIDFontType0 handling --- src/fonts.js | 33 ++++++++++++++++++++++++++++---- test/pdfs/SFAA_Japanese.pdf.link | 1 + test/test_manifest.json | 9 +++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) create mode 100644 test/pdfs/SFAA_Japanese.pdf.link diff --git a/src/fonts.js b/src/fonts.js index 0e7424d17..d2fed266b 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -17,7 +17,7 @@ /* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset, ExpertSubsetCharset, FileReaderSync, globalScope, GlyphsUnicode, info, isArray, isNum, ISOAdobeCharset, isWorker, PDFJS, Stream, - stringToBytes, TextDecoder, warn */ + stringToBytes, TextDecoder, TODO, warn */ 'use strict'; @@ -411,6 +411,7 @@ var CMapConverterList = { 'V': jis7ToUnicode, 'EUC-H': eucjpToUnicode, 'EUC-V': eucjpToUnicode, + '83pv-RKSJ-H': sjis83pvToUnicode, '90ms-RKSJ-H': sjisToUnicode, '90ms-RKSJ-V': sjisToUnicode, '90msp-RKSJ-H': sjisToUnicode, @@ -435,8 +436,8 @@ var decodeBytes; if (typeof TextDecoder !== 'undefined') { // The encodings supported by TextDecoder can be found at: // http://encoding.spec.whatwg.org/#concept-encoding-get - decodeBytes = function(bytes, encoding) { - return new TextDecoder(encoding).decode(bytes); + decodeBytes = function(bytes, encoding, fatal) { + return new TextDecoder(encoding, {fatal: !!fatal}).decode(bytes); }; } else if (typeof FileReaderSync !== 'undefined') { decodeBytes = function(bytes, encoding) { @@ -464,6 +465,18 @@ function sjisToUnicode(str) { return decodeBytes(stringToBytes(str), 'shift_jis'); } +function sjis83pvToUnicode(str) { + var bytes = stringToBytes(str); + try { + // TODO: 83pv has incompatible mappings in ed40..ee9c range. + return decodeBytes(bytes, 'shift_jis', true); + } catch (e) { + TODO('Unsupported 83pv character found'); + // Just retry without checking errors for now. + return decodeBytes(bytes, 'shift_jis'); + } +} + function gbkToUnicode(str) { return decodeBytes(stringToBytes(str), 'gbk'); } @@ -4458,9 +4471,21 @@ var Font = (function FontClosure() { switch (this.type) { case 'CIDFontType0': - case 'CIDFontType2': var cid = this.unicodeToCID[charcode] || charcode; + if (this.unicodeToCID.length > 0) { + width = this.widths[cid]; + vmetric = this.vmetrics && this.vmetrics[cid]; + } if (this.noUnicodeAdaptation) { + fontCharCode = this.toFontChar[charcode] || charcode; + break; + } + // CIDFontType0 is not encoded in Unicode. + fontCharCode = this.toFontChar[cid] || cid; + break; + case 'CIDFontType2': + if (this.unicodeToCID.length > 0) { + var cid = this.unicodeToCID[charcode] || charcode; width = this.widths[cid]; vmetric = this.vmetrics && this.vmetrics[cid]; } diff --git a/test/pdfs/SFAA_Japanese.pdf.link b/test/pdfs/SFAA_Japanese.pdf.link new file mode 100644 index 000000000..32abe3dbe --- /dev/null +++ b/test/pdfs/SFAA_Japanese.pdf.link @@ -0,0 +1 @@ +http://www.project2061.org/publications/sfaa/SFAA_Japanese.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index a455b116c..1eae289bf 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -923,6 +923,15 @@ "rounds": 1, "type": "eq" }, + { "id": "sfaa_japanese", + "file": "pdfs/SFAA_Japanese.pdf", + "md5": "b961bbc0d05bdd6d91041bca60ec8e8b", + "rounds": 1, + "link": true, + "firstPage": 1, + "lastPage": 1, + "type": "eq" + }, { "id": "vertical", "file": "pdfs/vertical.pdf", "md5": "8a74d33504701edcefeef2afd022765e",