diff --git a/src/core/obj.js b/src/core/obj.js index b64899f75..923cf18b5 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -263,6 +263,96 @@ var Catalog = (function CatalogClosure() { } return dest; }, + + get pageLabels() { + var obj = null; + try { + obj = this.readPageLabels(); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + warn('Unable to read page labels.'); + } + return shadow(this, 'pageLabels', obj); + }, + readPageLabels: function Catalog_readPageLabels() { + var obj = this.catDict.getRaw('PageLabels'); + if (!obj) { + return null; + } + var pageLabels = new Array(this.numPages); + var style = null; + var prefix = ''; + var start = 1; + + var numberTree = new NumberTree(obj, this.xref); + var nums = numberTree.getAll(); + var currentLabel = '', currentIndex = 1; + + for (var i = 0, ii = this.numPages; i < ii; i++) { + if (nums.hasOwnProperty(i)) { + var labelDict = nums[i]; + assert(isDict(labelDict), 'The PageLabel is not a dictionary.'); + + var type = labelDict.get('Type'); + assert(!type || (isName(type) && type.name === 'PageLabel'), + 'Invalid type in PageLabel dictionary.'); + + var s = labelDict.get('S'); + assert(!s || isName(s), 'Invalid style in PageLabel dictionary.'); + style = (s ? s.name : null); + + prefix = labelDict.get('P') || ''; + assert(isString(prefix), 'Invalid prefix in PageLabel dictionary.'); + + start = labelDict.get('St') || 1; + assert(isInt(start), 'Invalid start in PageLabel dictionary.'); + currentIndex = start; + } + + switch (style) { + case 'D': + currentLabel = currentIndex; + break; + case 'R': + case 'r': + currentLabel = Util.toRoman(currentIndex, style === 'r'); + break; + case 'A': + case 'a': + var LIMIT = 26; // Use only the characters A--Z, or a--z. + var A_UPPER_CASE = 0x41, A_LOWER_CASE = 0x61; + + var baseCharCode = (style === 'a' ? A_LOWER_CASE : A_UPPER_CASE); + var letterIndex = currentIndex - 1; + var character = String.fromCharCode(baseCharCode + + (letterIndex % LIMIT)); + var charBuf = []; + for (var j = 0, jj = (letterIndex / LIMIT) | 0; j <= jj; j++) { + charBuf.push(character); + } + currentLabel = charBuf.join(''); + break; + default: + assert(!style, + 'Invalid style "' + style + '" in PageLabel dictionary.'); + } + pageLabels[i] = prefix + currentLabel; + + currentLabel = ''; + currentIndex++; + } + + // Ignore PageLabels if they correspond to standard page numbering. + for (i = 0, ii = this.numPages; i < ii; i++) { + if (pageLabels[i] !== (i + 1).toString()) { + break; + } + } + return (i === ii ? [] : pageLabels); + }, + get attachments() { var xref = this.xref; var attachments = null, nameTreeRef; diff --git a/src/core/worker.js b/src/core/worker.js index 37f2fc756..f71e42b93 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -451,6 +451,12 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = { } ); + handler.on('GetPageLabels', + function wphSetupGetPageLabels(data) { + return pdfManager.ensureCatalog('pageLabels'); + } + ); + handler.on('GetAttachments', function wphSetupGetAttachments(data) { return pdfManager.ensureCatalog('attachments'); diff --git a/src/display/api.js b/src/display/api.js index 9a1ad451a..d94a4bd3a 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -690,6 +690,16 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() { getDestination: function PDFDocumentProxy_getDestination(id) { return this.transport.getDestination(id); }, + /** + * @return {Promise} A promise that is resolved with: an Array containing + * the pageLabels that correspond to the pageIndexes; or null, when no + * pageLabels are present in the PDF file. + * NOTE: If the pageLabels are all identical to standard page numbering, + * i.e. [1, 2, 3, ...], the promise is resolved with an empty Array. + */ + getPageLabels: function PDFDocumentProxy_getPageLabels() { + return this.transport.getPageLabels(); + }, /** * @return {Promise} A promise that is resolved with a lookup table for * mapping named attachments to their content. @@ -1804,6 +1814,10 @@ var WorkerTransport = (function WorkerTransportClosure() { return this.messageHandler.sendWithPromise('GetDestination', { id: id }); }, + getPageLabels: function WorkerTransport_getPageLabels() { + return this.messageHandler.sendWithPromise('GetPageLabels', null); + }, + getAttachments: function WorkerTransport_getAttachments() { return this.messageHandler.sendWithPromise('GetAttachments', null); }, diff --git a/src/shared/util.js b/src/shared/util.js index 23b359227..9a9926667 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -808,6 +808,42 @@ var Util = PDFJS.Util = (function UtilClosure() { return num < 0 ? -1 : 1; }; + var ROMAN_NUMBER_MAP = [ + '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', + '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', + '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX' + ]; + /** + * Converts positive integers to (upper case) Roman numerals. + * @param {integer} number - The number that should be converted. + * @param {boolean} lowerCase - Indicates if the result should be converted + * to lower case letters. The default is false. + * @return {string} The resulting Roman number. + */ + Util.toRoman = function Util_toRoman(number, lowerCase) { + assert(isInt(number) && number > 0, + 'The number should be a positive integer.'); + var pos, romanBuf = []; + // Thousands + while (number >= 1000) { + number -= 1000; + romanBuf.push('M'); + } + // Hundreds + pos = (number / 100) | 0; + number %= 100; + romanBuf.push(ROMAN_NUMBER_MAP[pos]); + // Tens + pos = (number / 10) | 0; + number %= 10; + romanBuf.push(ROMAN_NUMBER_MAP[10 + pos]); + // Ones + romanBuf.push(ROMAN_NUMBER_MAP[20 + number]); + + var romanStr = romanBuf.join(''); + return (lowerCase ? romanStr.toLowerCase() : romanStr); + }; + Util.appendToArray = function Util_appendToArray(arr1, arr2) { Array.prototype.push.apply(arr1, arr2); }; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index ad15775ac..d1c35a4b7 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -24,6 +24,7 @@ !arial_unicode_ab_cidfont.pdf !arial_unicode_en_cidfont.pdf !asciihexdecode.pdf +!bug793632.pdf !bug1020858.pdf !bug1050040.pdf !bug1200096.pdf diff --git a/test/pdfs/bug793632.pdf b/test/pdfs/bug793632.pdf new file mode 100644 index 000000000..e17e69b6b Binary files /dev/null and b/test/pdfs/bug793632.pdf differ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index a1213bec0..466d035e1 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -329,6 +329,36 @@ describe('api', function() { expect(data).toEqual(null); }); }); + it('gets non-existent page labels', function () { + var promise = doc.getPageLabels(); + waitsForPromiseResolved(promise, function (data) { + expect(data).toEqual(null); + }); + }); + it('gets page labels', function () { + // PageLabels with Roman/Arabic numerals. + var url0 = combineUrl(window.location.href, '../pdfs/bug793632.pdf'); + var promise0 = PDFJS.getDocument(url0).promise.then(function (pdfDoc) { + return pdfDoc.getPageLabels(); + }); + // PageLabels with only a label prefix. + var url1 = combineUrl(window.location.href, '../pdfs/issue1453.pdf'); + var promise1 = PDFJS.getDocument(url1).promise.then(function (pdfDoc) { + return pdfDoc.getPageLabels(); + }); + // PageLabels identical to standard page numbering. + var url2 = combineUrl(window.location.href, '../pdfs/rotation.pdf'); + var promise2 = PDFJS.getDocument(url2).promise.then(function (pdfDoc) { + return pdfDoc.getPageLabels(); + }); + + waitsForPromiseResolved(Promise.all([promise0, promise1, promise2]), + function (pageLabels) { + expect(pageLabels[0]).toEqual(['i', 'ii', 'iii', '1']); + expect(pageLabels[1]).toEqual(['Front Page1']); + expect(pageLabels[2]).toEqual([]); + }); + }); it('gets attachments', function() { var promise = doc.getAttachments(); waitsForPromiseResolved(promise, function (data) {