From 2d8d8b5e538544ec3509f72ec6b91323f73cc5d5 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 3 Nov 2016 19:48:08 +0100 Subject: [PATCH 1/2] Use `stringToPDFString` to sanitizing bad "Prefix" entries in Page Label dictionaries It seems that certain bad PDF generators can create badly encoded "Prefix" entries for Page Labels, one example being http://ukjewishfilm.org/wp-content/uploads/2015/09/Jewish-Film-Festival-Programme-ONLINE.pdf. Unfortunately I didn't come across such a PDF file while adding the API support for Page Labels, but with them now being used in the viewer I just found this issue. With this patch, we now display the Page Labels in the same way as Adobe Reader. --- src/core/obj.js | 5 +++-- test/pdfs/.gitignore | 1 + test/pdfs/bad-PageLabels.pdf | Bin 0 -> 792 bytes test/unit/api_spec.js | 12 +++++++++++- 4 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 test/pdfs/bad-PageLabels.pdf diff --git a/src/core/obj.js b/src/core/obj.js index 4d3c18735..7196797ba 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -302,8 +302,9 @@ var Catalog = (function CatalogClosure() { assert(!s || isName(s), 'Invalid style in PageLabel dictionary.'); style = (s ? s.name : null); - prefix = labelDict.get('P') || ''; - assert(isString(prefix), 'Invalid prefix in PageLabel dictionary.'); + var p = labelDict.get('P') || ''; + assert(isString(p), 'Invalid prefix in PageLabel dictionary.'); + prefix = stringToPDFString(p); start = labelDict.get('St') || 1; assert(isInt(start), 'Invalid start in PageLabel dictionary.'); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index f07e5921f..ecfb1700d 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -40,6 +40,7 @@ !issue7544.pdf !issue7598.pdf !issue7665.pdf +!bad-PageLabels.pdf !filled-background.pdf !ArabicCIDTrueType.pdf !ThuluthFeatures.pdf diff --git a/test/pdfs/bad-PageLabels.pdf b/test/pdfs/bad-PageLabels.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8cafbcd102f135c177b7da52818b56bc58c30283 GIT binary patch literal 792 zcmZWn!A{#i5bb%sVlI*3K-QbYNstf{A;|^WM!`~%5Qj~?35()gUG#Cii$ZV97m*OIO%R$=!OQL~^@;Ve0 znJKhCS-L`55I6Oz3fVaXWtrKv%Z=Tl_+jjY^=1$3V{(ml6Lt`rTx+|n9&ng4!&jwN z1xzxKoDs7rh`B`B!)yabv@6Wrx-NJuKMko?Dn}k7Hmm6#WA+HM40sn!EsSg7wZ*RA z>Vnpi;E0ED-!{icq%H91_3g{;F@;7kZPA`fy@3SBki>@~r2P~(7%@rH Date: Thu, 3 Nov 2016 20:08:06 +0100 Subject: [PATCH 2/2] Add a bit more validation to `Catalog_readPageLabels`, to ensure that the Page Labels are well formed --- src/core/obj.js | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/core/obj.js b/src/core/obj.js index 7196797ba..19de8dffa 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -283,7 +283,6 @@ var Catalog = (function CatalogClosure() { var pageLabels = new Array(this.numPages); var style = null; var prefix = ''; - var start = 1; var numberTree = new NumberTree(obj, this.xref); var nums = numberTree.getAll(); @@ -300,15 +299,16 @@ var Catalog = (function CatalogClosure() { var s = labelDict.get('S'); assert(!s || isName(s), 'Invalid style in PageLabel dictionary.'); - style = (s ? s.name : null); + style = s ? s.name : null; - var p = labelDict.get('P') || ''; - assert(isString(p), 'Invalid prefix in PageLabel dictionary.'); - prefix = stringToPDFString(p); + var p = labelDict.get('P'); + assert(!p || isString(p), 'Invalid prefix in PageLabel dictionary.'); + prefix = p ? stringToPDFString(p) : ''; - start = labelDict.get('St') || 1; - assert(isInt(start), 'Invalid start in PageLabel dictionary.'); - currentIndex = start; + var st = labelDict.get('St'); + assert(!st || (isInt(st) && st >= 1), + 'Invalid start in PageLabel dictionary.'); + currentIndex = st || 1; } switch (style) {