From 441326297626af6359554c89e9e1931b210b5148 Mon Sep 17 00:00:00 2001
From: vyv03354 <VYV03354@nifty.ne.jp>
Date: Tue, 26 Feb 2013 21:06:07 +0900
Subject: [PATCH] Implement "83pv" CMap and fix CIDFontType0 handling

---
 src/fonts.js                     | 33 ++++++++++++++++++++++++++++----
 test/pdfs/SFAA_Japanese.pdf.link |  1 +
 test/test_manifest.json          |  9 +++++++++
 3 files changed, 39 insertions(+), 4 deletions(-)
 create mode 100644 test/pdfs/SFAA_Japanese.pdf.link

diff --git a/src/fonts.js b/src/fonts.js
index 0e7424d17..d2fed266b 100644
--- a/src/fonts.js
+++ b/src/fonts.js
@@ -17,7 +17,7 @@
 /* globals assert, bytesToString, CIDToUnicodeMaps, error, ExpertCharset,
            ExpertSubsetCharset, FileReaderSync, globalScope, GlyphsUnicode,
            info, isArray, isNum, ISOAdobeCharset, isWorker, PDFJS, Stream,
-           stringToBytes, TextDecoder, warn */
+           stringToBytes, TextDecoder, TODO, warn */
 
 'use strict';
 
@@ -411,6 +411,7 @@ var CMapConverterList = {
   'V': jis7ToUnicode,
   'EUC-H': eucjpToUnicode,
   'EUC-V': eucjpToUnicode,
+  '83pv-RKSJ-H': sjis83pvToUnicode,
   '90ms-RKSJ-H': sjisToUnicode,
   '90ms-RKSJ-V': sjisToUnicode,
   '90msp-RKSJ-H': sjisToUnicode,
@@ -435,8 +436,8 @@ var decodeBytes;
 if (typeof TextDecoder !== 'undefined') {
   // The encodings supported by TextDecoder can be found at:
   // http://encoding.spec.whatwg.org/#concept-encoding-get
-  decodeBytes = function(bytes, encoding) {
-    return new TextDecoder(encoding).decode(bytes);
+  decodeBytes = function(bytes, encoding, fatal) {
+    return new TextDecoder(encoding, {fatal: !!fatal}).decode(bytes);
   };
 } else if (typeof FileReaderSync !== 'undefined') {
   decodeBytes = function(bytes, encoding) {
@@ -464,6 +465,18 @@ function sjisToUnicode(str) {
   return decodeBytes(stringToBytes(str), 'shift_jis');
 }
 
+function sjis83pvToUnicode(str) {
+  var bytes = stringToBytes(str);
+  try {
+    // TODO: 83pv has incompatible mappings in ed40..ee9c range.
+    return decodeBytes(bytes, 'shift_jis', true);
+  } catch (e) {
+    TODO('Unsupported 83pv character found');
+    // Just retry without checking errors for now.
+    return decodeBytes(bytes, 'shift_jis');
+  }
+}
+
 function gbkToUnicode(str) {
   return decodeBytes(stringToBytes(str), 'gbk');
 }
@@ -4458,9 +4471,21 @@ var Font = (function FontClosure() {
 
       switch (this.type) {
         case 'CIDFontType0':
-        case 'CIDFontType2':
           var cid = this.unicodeToCID[charcode] || charcode;
+          if (this.unicodeToCID.length > 0) {
+            width = this.widths[cid];
+            vmetric = this.vmetrics && this.vmetrics[cid];
+          }
           if (this.noUnicodeAdaptation) {
+            fontCharCode = this.toFontChar[charcode] || charcode;
+            break;
+          }
+          // CIDFontType0 is not encoded in Unicode.
+          fontCharCode = this.toFontChar[cid] || cid;
+          break;
+        case 'CIDFontType2':
+          if (this.unicodeToCID.length > 0) {
+            var cid = this.unicodeToCID[charcode] || charcode;
             width = this.widths[cid];
             vmetric = this.vmetrics && this.vmetrics[cid];
           }
diff --git a/test/pdfs/SFAA_Japanese.pdf.link b/test/pdfs/SFAA_Japanese.pdf.link
new file mode 100644
index 000000000..32abe3dbe
--- /dev/null
+++ b/test/pdfs/SFAA_Japanese.pdf.link
@@ -0,0 +1 @@
+http://www.project2061.org/publications/sfaa/SFAA_Japanese.pdf
diff --git a/test/test_manifest.json b/test/test_manifest.json
index a455b116c..1eae289bf 100644
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@@ -923,6 +923,15 @@
       "rounds": 1,
       "type": "eq"
     },
+    {  "id": "sfaa_japanese",
+      "file": "pdfs/SFAA_Japanese.pdf",
+      "md5": "b961bbc0d05bdd6d91041bca60ec8e8b",
+      "rounds": 1,
+      "link": true,
+      "firstPage": 1,
+      "lastPage": 1,
+      "type": "eq"
+    },
     {  "id": "vertical",
       "file": "pdfs/vertical.pdf",
       "md5": "8a74d33504701edcefeef2afd022765e",