From 7c7d05e7a350447139fe62227f03283ff0c81a36 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 6 Mar 2015 15:01:26 +0100 Subject: [PATCH] Attempt to infer if a CMap file actually contains just a standard `Identity-H`/`Identity-V` map --- src/core/cmap.js | 36 ++++++++++++++++++++++++++++++++++-- src/core/evaluator.js | 6 +++++- test/unit/cmap_spec.js | 23 +++++++++++++++++++++-- 3 files changed, 60 insertions(+), 5 deletions(-) diff --git a/src/core/cmap.js b/src/core/cmap.js index 5d69d17cb..d97c34d74 100644 --- a/src/core/cmap.js +++ b/src/core/cmap.js @@ -204,6 +204,7 @@ var CMap = (function CMapClosure() { // - bf chars are variable-length byte sequences, stored as strings, with // one byte per character. this._map = []; + this.name = ''; this.vertical = false; this.useCMap = null; this.builtInCMap = builtInCMap; @@ -303,13 +304,28 @@ var CMap = (function CMapClosure() { } out.charcode = 0; out.length = 1; + }, + + get isIdentityCMap() { + if (!(this.name === 'Identity-H' || this.name === 'Identity-V')) { + return false; + } + if (this._map.length !== 0x10000) { + return false; + } + for (var i = 0; i < 0x10000; i++) { + if (this._map[i] !== i) { + return false; + } + } + return true; } }; return CMap; })(); // A special case of CMap, where the _map array implicitly has a length of -// 65535 and each element is equal to its index. +// 65536 and each element is equal to its index. var IdentityCMap = (function IdentityCMapClosure() { function IdentityCMap(vertical, n) { CMap.call(this); @@ -364,7 +380,11 @@ var IdentityCMap = (function IdentityCMapClosure() { return map; }, - readCharCode: CMap.prototype.readCharCode + readCharCode: CMap.prototype.readCharCode, + + get isIdentityCMap() { + error('should not access .isIdentityCMap'); + } }; return IdentityCMap; @@ -829,6 +849,13 @@ var CMapFactory = (function CMapFactoryClosure() { } } + function parseCMapName(cMap, lexer) { + var obj = lexer.getObj(); + if (isName(obj) && isString(obj.name)) { + cMap.name = obj.name; + } + } + function parseCMap(cMap, lexer, builtInCMapParams, useCMap) { var previous; var embededUseCMap; @@ -839,6 +866,8 @@ var CMapFactory = (function CMapFactoryClosure() { } else if (isName(obj)) { if (obj.name === 'WMode') { parseWMode(cMap, lexer); + } else if (obj.name === 'CMapName') { + parseCMapName(cMap, lexer); } previous = obj; } else if (isCmd(obj)) { @@ -948,6 +977,9 @@ var CMapFactory = (function CMapFactoryClosure() { } catch (e) { warn('Invalid CMap data. ' + e); } + if (cMap.isIdentityCMap) { + return createBuiltInCMap(cMap.name, builtInCMapParams); + } return cMap; } error('Encoding required.'); diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 418ef0d1b..8bd7959b8 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -1339,7 +1339,11 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { return new ToUnicodeMap(cmap.getMap()); } else if (isStream(cmapObj)) { cmap = CMapFactory.create(cmapObj, - { url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).getMap(); + { url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null); + if (cmap instanceof IdentityCMap) { + return new IdentityToUnicodeMap(0, 0xFFFF); + } + cmap = cmap.getMap(); // Convert UTF-16BE // NOTE: cmap can be a sparse array, so use forEach instead of for(;;) // to iterate over all keys. diff --git a/test/unit/cmap_spec.js b/test/unit/cmap_spec.js index 8da30000f..f83f92a90 100644 --- a/test/unit/cmap_spec.js +++ b/test/unit/cmap_spec.js @@ -1,6 +1,7 @@ /* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */ -/* globals expect, it, describe, StringStream, CMapFactory, Name */ +/* globals expect, it, describe, StringStream, CMapFactory, Name, CMap, + IdentityCMap */ 'use strict'; @@ -92,8 +93,16 @@ describe('cmap', function() { var stream = new StringStream(str); var cmap = CMapFactory.create(stream, { url: cMapUrl, packed: cMapPacked }, null); + expect(cmap instanceof CMap).toEqual(true); expect(cmap.useCMap).not.toBeNull(); - expect(cmap.builtInCMap).toBeUndefined(); + expect(cmap.builtInCMap).toBeFalsy(); + expect(cmap.isIdentityCMap).toEqual(false); + }); + it('parses cmapname', function() { + var str = '/CMapName /Identity-H def\n'; + var stream = new StringStream(str); + var cmap = CMapFactory.create(stream); + expect(cmap.name).toEqual('Identity-H'); }); it('parses wmode', function() { var str = '/WMode 1 def\n'; @@ -104,7 +113,17 @@ describe('cmap', function() { it('loads built in cmap', function() { var cmap = CMapFactory.create(new Name('Adobe-Japan1-1'), { url: cMapUrl, packed: cMapPacked }, null); + expect(cmap instanceof CMap).toEqual(true); expect(cmap.useCMap).toBeNull(); expect(cmap.builtInCMap).toBeTruthy(); + expect(cmap.isIdentityCMap).toEqual(false); + }); + it('loads built in identity cmap', function() { + var cmap = CMapFactory.create(new Name('Identity-H'), + { url: cMapUrl, packed: cMapPacked }, null); + expect(cmap instanceof IdentityCMap).toEqual(true); + expect(cmap.vertical).toEqual(false); + expect(function() { return cmap.isIdentityCMap; }).toThrow( + new Error('should not access .isIdentityCMap')); }); });