From 3705e5e45986d66c5bb644f22b17de8e341839c9 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 24 Mar 2017 17:24:30 +0100 Subject: [PATCH 1/2] Use a proper `MessageHandler` for `PartialEvaluator.getTextContent` to avoid errors for fonts relying on built-in CMap files (PR 8064 follow-up) *My apologies for inadvertently breaking this in PR 8064; apparently we don't have any tests that cover this use-case :(* Without this patch `getTextContent` will fail if called before `getOperatorList`, since loading of fonts during text-extraction may require fetching of built-in CMap files. *Please note:* The `text` test added here, which uses an already existing PDF file, fails without this patch. --- src/core/document.js | 7 +------ src/core/worker.js | 2 +- test/test_manifest.json | 9 ++++++++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index 716b9a21d..0cb7dfe0e 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -310,14 +310,9 @@ var Page = (function PageClosure() { }); }, - extractTextContent: function Page_extractTextContent(task, + extractTextContent: function Page_extractTextContent(handler, task, normalizeWhitespace, combineTextItems) { - var handler = { - on: function nullHandlerOn() {}, - send: function nullHandlerSend() {} - }; - var self = this; var pdfManager = this.pdfManager; diff --git a/src/core/worker.js b/src/core/worker.js index 756c9e8a6..ec8aa4b1c 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -906,7 +906,7 @@ var WorkerMessageHandler = { startWorkerTask(task); var pageNum = pageIndex + 1; var start = Date.now(); - return page.extractTextContent(task, normalizeWhitespace, + return page.extractTextContent(handler, task, normalizeWhitespace, combineTextItems).then( function(textContent) { finishWorkerTask(task); diff --git a/test/test_manifest.json b/test/test_manifest.json index a266aea45..e99a638f6 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -2757,10 +2757,17 @@ "md5": "797093d67c4d4d4231ac6e1fb66bf6c3", "rounds": 1, "link": true, - "firstPage": 1, "lastPage": 1, "type": "eq" }, + { "id": "mao-text", + "file": "pdfs/mao.pdf", + "md5": "797093d67c4d4d4231ac6e1fb66bf6c3", + "rounds": 1, + "link": true, + "lastPage": 1, + "type": "text" + }, { "id": "noembed-identity", "file": "pdfs/noembed-identity.pdf", "md5": "05d3803b6c22451e18cb60d8d8c75c0c", From 5c0c122a7da425f84b675ed665dc14bd9b7d166e Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 25 Mar 2017 17:43:51 +0100 Subject: [PATCH 2/2] Ensure that the `XMLHttpRequest` is `open`ed before attempting to set the `responseType` in the `DOMCMapReaderFactory`, since IE fails otherwise (issue 8193) I really cannot understand why this change is necessary, since modern browsers such as Firefox and Chrome work just fine with the old code. Hence this is patch is yet another "hack" that's needed just because IE apparently cannot just work like you'd expect. For consistency, the Node factory used in the CMap unit-tests is changed as well. Fixes 8193. --- src/display/dom_utils.js | 13 +++++++------ test/unit/test_utils.js | 12 +++++------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/display/dom_utils.js b/src/display/dom_utils.js index 185303a28..37ea54643 100644 --- a/src/display/dom_utils.js +++ b/src/display/dom_utils.js @@ -76,15 +76,17 @@ var DOMCMapReaderFactory = (function DOMCMapReaderFactoryClosure() { DOMCMapReaderFactory.prototype = { fetch: function(params) { - if (!params.name) { + var name = params.name; + if (!name) { return Promise.reject(new Error('CMap name must be specified.')); } return new Promise(function (resolve, reject) { - var url = this.baseUrl + params.name; + var url = this.baseUrl + name + (this.isCompressed ? '.bcmap' : ''); var request = new XMLHttpRequest(); + request.open('GET', url, true); + if (this.isCompressed) { - url += '.bcmap'; request.responseType = 'arraybuffer'; } request.onreadystatechange = function () { @@ -105,12 +107,11 @@ var DOMCMapReaderFactory = (function DOMCMapReaderFactoryClosure() { return; } reject(new Error('Unable to load ' + - (this.isCompressed ? 'binary' : '') + - ' CMap at: ' + url)); + (this.isCompressed ? 'binary ' : '') + + 'CMap at: ' + url)); } }.bind(this); - request.open('GET', url, true); request.send(null); }.bind(this)); }, diff --git a/test/unit/test_utils.js b/test/unit/test_utils.js index 7d37c0e21..bf28eeba8 100644 --- a/test/unit/test_utils.js +++ b/test/unit/test_utils.js @@ -35,21 +35,19 @@ var NodeCMapReaderFactory = (function NodeCMapReaderFactoryClosure() { NodeCMapReaderFactory.prototype = { fetch: function(params) { - if (!params.name) { + var name = params.name; + if (!name) { return Promise.reject(new Error('CMap name must be specified.')); } return new Promise(function (resolve, reject) { - var url = this.baseUrl + params.name; + var url = this.baseUrl + name + (this.isCompressed ? '.bcmap' : ''); var fs = require('fs'); - if (this.isCompressed) { - url += '.bcmap'; - } fs.readFile(url, function (error, data) { if (error || !data) { reject(new Error('Unable to load ' + - (this.isCompressed ? 'binary' : '') + - ' CMap at: ' + url)); + (this.isCompressed ? 'binary ' : '') + + 'CMap at: ' + url)); return; } resolve({