Use stringToPDFString to sanitizing bad "Prefix" entries in Page Label dictionaries

It seems that certain bad PDF generators can create badly encoded "Prefix" entries for Page Labels, one example being http://ukjewishfilm.org/wp-content/uploads/2015/09/Jewish-Film-Festival-Programme-ONLINE.pdf.

Unfortunately I didn't come across such a PDF file while adding the API support for Page Labels, but with them now being used in the viewer I just found this issue. With this patch, we now display the Page Labels in the same way as Adobe Reader.
This commit is contained in:
Jonas Jenwald 2016-11-03 19:48:08 +01:00
parent 9f8d67475e
commit 2d8d8b5e53
4 changed files with 15 additions and 3 deletions

View File

@ -302,8 +302,9 @@ var Catalog = (function CatalogClosure() {
assert(!s || isName(s), 'Invalid style in PageLabel dictionary.');
style = (s ? s.name : null);
prefix = labelDict.get('P') || '';
assert(isString(prefix), 'Invalid prefix in PageLabel dictionary.');
var p = labelDict.get('P') || '';
assert(isString(p), 'Invalid prefix in PageLabel dictionary.');
prefix = stringToPDFString(p);
start = labelDict.get('St') || 1;
assert(isInt(start), 'Invalid start in PageLabel dictionary.');

View File

@ -40,6 +40,7 @@
!issue7544.pdf
!issue7598.pdf
!issue7665.pdf
!bad-PageLabels.pdf
!filled-background.pdf
!ArabicCIDTrueType.pdf
!ThuluthFeatures.pdf

Binary file not shown.

View File

@ -503,14 +503,24 @@ describe('api', function() {
return pdfDoc.getPageLabels();
});
Promise.all([promise0, promise1, promise2]).then(function (pageLabels) {
// PageLabels with bad "Prefix" entries.
var url3 = new URL('../pdfs/bad-PageLabels.pdf', window.location).href;
var loadingTask3 = new PDFJS.getDocument(url3);
var promise3 = loadingTask3.promise.then(function (pdfDoc) {
return pdfDoc.getPageLabels();
});
Promise.all([promise0, promise1, promise2, promise3]).then(
function (pageLabels) {
expect(pageLabels[0]).toEqual(['i', 'ii', 'iii', '1']);
expect(pageLabels[1]).toEqual(['Front Page1']);
expect(pageLabels[2]).toEqual(['1', '2']);
expect(pageLabels[3]).toEqual(['X1']);
loadingTask0.destroy();
loadingTask1.destroy();
loadingTask2.destroy();
loadingTask3.destroy();
done();
}).catch(function (reason) {
done.fail(reason);