Catch errors and continue parsing in parseCMap (issue 7492)

After PR 7039, the PDF file in issue 7492 no longer renders at all, but note that text selection wasn't working correctly previously.

The problem with the PDF file in issue 7492 is that the `cMap`, in the `toUnicode` entry in the font, contains an invalid name:
```
/CMapName /-usr-share-fonts-truetype-Panton-Panton Family-Fontfabric - Panton.otf,000-UTF16 def
```
When we parse that line, things obviously break because there are spaces present in the wrong places.
To avoid that issue, the patch simply lets `parseCMap` continue when errors are encountered, to try and recover usable data. Note that by not aborting immediatly when an error is encountered, we are also able to fix the text selection.

Obviously, it could be argued that we should just immediatly reject a corrupt `cMap`. But given that they usually are correct, it seems that trying to recover as much data as possible from corrupt one can only be a good thing for both glyph mapping and text selection.

Fixes 7492.
This commit is contained in:
Jonas Jenwald 2016-07-18 16:01:02 +02:00
parent 0da97ad2a0
commit 90d19de935
4 changed files with 59 additions and 37 deletions

View File

@ -31,9 +31,11 @@
var Util = sharedUtil.Util;
var assert = sharedUtil.assert;
var warn = sharedUtil.warn;
var error = sharedUtil.error;
var isInt = sharedUtil.isInt;
var isString = sharedUtil.isString;
var MissingDataException = sharedUtil.MissingDataException;
var isName = corePrimitives.isName;
var isCmd = corePrimitives.isCmd;
var isStream = corePrimitives.isStream;
@ -881,41 +883,49 @@ var CMapFactory = (function CMapFactoryClosure() {
var previous;
var embededUseCMap;
objLoop: while (true) {
var obj = lexer.getObj();
if (isEOF(obj)) {
break;
} else if (isName(obj)) {
if (obj.name === 'WMode') {
parseWMode(cMap, lexer);
} else if (obj.name === 'CMapName') {
parseCMapName(cMap, lexer);
try {
var obj = lexer.getObj();
if (isEOF(obj)) {
break;
} else if (isName(obj)) {
if (obj.name === 'WMode') {
parseWMode(cMap, lexer);
} else if (obj.name === 'CMapName') {
parseCMapName(cMap, lexer);
}
previous = obj;
} else if (isCmd(obj)) {
switch (obj.cmd) {
case 'endcmap':
break objLoop;
case 'usecmap':
if (isName(previous)) {
embededUseCMap = previous.name;
}
break;
case 'begincodespacerange':
parseCodespaceRange(cMap, lexer);
break;
case 'beginbfchar':
parseBfChar(cMap, lexer);
break;
case 'begincidchar':
parseCidChar(cMap, lexer);
break;
case 'beginbfrange':
parseBfRange(cMap, lexer);
break;
case 'begincidrange':
parseCidRange(cMap, lexer);
break;
}
}
previous = obj;
} else if (isCmd(obj)) {
switch (obj.cmd) {
case 'endcmap':
break objLoop;
case 'usecmap':
if (isName(previous)) {
embededUseCMap = previous.name;
}
break;
case 'begincodespacerange':
parseCodespaceRange(cMap, lexer);
break;
case 'beginbfchar':
parseBfChar(cMap, lexer);
break;
case 'begincidchar':
parseCidChar(cMap, lexer);
break;
case 'beginbfrange':
parseBfRange(cMap, lexer);
break;
case 'begincidrange':
parseCidRange(cMap, lexer);
break;
} catch (ex) {
if (ex instanceof MissingDataException) {
throw ex;
}
warn('Invalid cMap data: ' + ex);
continue;
}
}
@ -926,9 +936,8 @@ var CMapFactory = (function CMapFactoryClosure() {
}
if (useCMap) {
return extendCMap(cMap, builtInCMapParams, useCMap);
} else {
return Promise.resolve(cMap);
}
return Promise.resolve(cMap);
}
function extendCMap(cMap, builtInCMapParams, useCMap) {
@ -990,8 +999,6 @@ var CMapFactory = (function CMapFactoryClosure() {
parseCMap(cMap, lexer, builtInCMapParams, null).then(
function (parsedCMap) {
resolve(parsedCMap);
}).catch(function (e) {
reject(new Error({ message: 'Invalid CMap data', error: e }));
});
} else {
reject(new Error('Unable to get cMap at: ' + url));

View File

@ -28,6 +28,7 @@
!issue7200.pdf
!issue7229.pdf
!issue7439.pdf
!issue7492.pdf
!filled-background.pdf
!ArabicCIDTrueType.pdf
!ThuluthFeatures.pdf

BIN
test/pdfs/issue7492.pdf Normal file

Binary file not shown.

View File

@ -1124,6 +1124,20 @@
"link": false,
"type": "eq"
},
{ "id": "issue7492-eq",
"file": "pdfs/issue7492.pdf",
"md5": "7b0b28919c1088a2a5a0aeedbaa4c3ca",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "issue7492-text",
"file": "pdfs/issue7492.pdf",
"md5": "7b0b28919c1088a2a5a0aeedbaa4c3ca",
"rounds": 1,
"link": false,
"type": "text"
},
{ "id": "ShowText-ShadingPattern",
"file": "pdfs/ShowText-ShadingPattern.pdf",
"md5": "fe683725db037ffe19d390969610a652",