Use a proper MessageHandler for PartialEvaluator.getTextContent to avoid errors for fonts relying on built-in CMap files (PR 8064 follow-up)

*My apologies for inadvertently breaking this in PR 8064; apparently we don't have any tests that cover this use-case :(*

Without this patch `getTextContent` will fail if called before `getOperatorList`, since loading of fonts during text-extraction may require fetching of built-in CMap files.

*Please note:* The `text` test added here, which uses an already existing PDF file, fails without this patch.
This commit is contained in:
Jonas Jenwald 2017-03-24 17:24:30 +01:00
parent 68f2bf3bec
commit 3705e5e459
3 changed files with 10 additions and 8 deletions

View File

@ -310,14 +310,9 @@ var Page = (function PageClosure() {
});
},
extractTextContent: function Page_extractTextContent(task,
extractTextContent: function Page_extractTextContent(handler, task,
normalizeWhitespace,
combineTextItems) {
var handler = {
on: function nullHandlerOn() {},
send: function nullHandlerSend() {}
};
var self = this;
var pdfManager = this.pdfManager;

View File

@ -906,7 +906,7 @@ var WorkerMessageHandler = {
startWorkerTask(task);
var pageNum = pageIndex + 1;
var start = Date.now();
return page.extractTextContent(task, normalizeWhitespace,
return page.extractTextContent(handler, task, normalizeWhitespace,
combineTextItems).then(
function(textContent) {
finishWorkerTask(task);

View File

@ -2757,10 +2757,17 @@
"md5": "797093d67c4d4d4231ac6e1fb66bf6c3",
"rounds": 1,
"link": true,
"firstPage": 1,
"lastPage": 1,
"type": "eq"
},
{ "id": "mao-text",
"file": "pdfs/mao.pdf",
"md5": "797093d67c4d4d4231ac6e1fb66bf6c3",
"rounds": 1,
"link": true,
"lastPage": 1,
"type": "text"
},
{ "id": "noembed-identity",
"file": "pdfs/noembed-identity.pdf",
"md5": "05d3803b6c22451e18cb60d8d8c75c0c",