Merge pull request #8488 from mukulmishra18/streams-getTextContent
Streams get text content
This commit is contained in:
commit
e2ca894fec
@ -270,7 +270,7 @@ var Page = (function PageClosure() {
|
|||||||
},
|
},
|
||||||
|
|
||||||
extractTextContent({ handler, task, normalizeWhitespace,
|
extractTextContent({ handler, task, normalizeWhitespace,
|
||||||
combineTextItems, }) {
|
sink, combineTextItems, }) {
|
||||||
var contentStreamPromise = this.pdfManager.ensure(this,
|
var contentStreamPromise = this.pdfManager.ensure(this,
|
||||||
'getContentStream');
|
'getContentStream');
|
||||||
var resourcesPromise = this.loadResources([
|
var resourcesPromise = this.loadResources([
|
||||||
@ -298,6 +298,7 @@ var Page = (function PageClosure() {
|
|||||||
resources: this.resources,
|
resources: this.resources,
|
||||||
normalizeWhitespace,
|
normalizeWhitespace,
|
||||||
combineTextItems,
|
combineTextItems,
|
||||||
|
sink,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
|
@ -1176,7 +1176,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
},
|
},
|
||||||
|
|
||||||
getTextContent({ stream, task, resources, stateManager = null,
|
getTextContent({ stream, task, resources, stateManager = null,
|
||||||
normalizeWhitespace = false, combineTextItems = false, }) {
|
normalizeWhitespace = false, combineTextItems = false,
|
||||||
|
sink, seenStyles = Object.create(null), }) {
|
||||||
// Ensure that `resources`/`stateManager` is correctly initialized,
|
// Ensure that `resources`/`stateManager` is correctly initialized,
|
||||||
// even if the provided parameter is e.g. `null`.
|
// even if the provided parameter is e.g. `null`.
|
||||||
resources = resources || Dict.empty;
|
resources = resources || Dict.empty;
|
||||||
@ -1214,7 +1215,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
|
|
||||||
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
|
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
|
||||||
var xobjs = null;
|
var xobjs = null;
|
||||||
var xobjsCache = Object.create(null);
|
var skipEmptyXObjs = Object.create(null);
|
||||||
|
|
||||||
var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
|
var preprocessor = new EvaluatorPreprocessor(stream, xref, stateManager);
|
||||||
|
|
||||||
@ -1225,7 +1226,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
return textContentItem;
|
return textContentItem;
|
||||||
}
|
}
|
||||||
var font = textState.font;
|
var font = textState.font;
|
||||||
if (!(font.loadedName in textContent.styles)) {
|
if (!(font.loadedName in seenStyles)) {
|
||||||
|
seenStyles[font.loadedName] = true;
|
||||||
textContent.styles[font.loadedName] = {
|
textContent.styles[font.loadedName] = {
|
||||||
fontFamily: font.fallbackName,
|
fontFamily: font.fallbackName,
|
||||||
ascent: font.ascent,
|
ascent: font.ascent,
|
||||||
@ -1416,11 +1418,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
textContentItem.str.length = 0;
|
textContentItem.str.length = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function enqueueChunk() {
|
||||||
|
let length = textContent.items.length;
|
||||||
|
if (length > 0) {
|
||||||
|
sink.enqueue(textContent, length);
|
||||||
|
textContent.items = [];
|
||||||
|
textContent.styles = Object.create(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var timeSlotManager = new TimeSlotManager();
|
var timeSlotManager = new TimeSlotManager();
|
||||||
|
|
||||||
return new Promise(function promiseBody(resolve, reject) {
|
return new Promise(function promiseBody(resolve, reject) {
|
||||||
var next = function (promise) {
|
let next = function (promise) {
|
||||||
promise.then(function () {
|
enqueueChunk();
|
||||||
|
Promise.all([promise, sink.ready]).then(function () {
|
||||||
try {
|
try {
|
||||||
promiseBody(resolve, reject);
|
promiseBody(resolve, reject);
|
||||||
} catch (ex) {
|
} catch (ex) {
|
||||||
@ -1615,11 +1627,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var name = args[0].name;
|
var name = args[0].name;
|
||||||
if (xobjsCache.key === name) {
|
if (name in skipEmptyXObjs) {
|
||||||
if (xobjsCache.texts) {
|
|
||||||
Util.appendToArray(textContent.items, xobjsCache.texts.items);
|
|
||||||
Util.extendObj(textContent.styles, xobjsCache.texts.styles);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1633,8 +1641,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
assert(isName(type), 'XObject should have a Name subtype');
|
assert(isName(type), 'XObject should have a Name subtype');
|
||||||
|
|
||||||
if (type.name !== 'Form') {
|
if (type.name !== 'Form') {
|
||||||
xobjsCache.key = name;
|
skipEmptyXObjs[name] = true;
|
||||||
xobjsCache.texts = null;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1650,6 +1657,26 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
xObjStateManager.transform(matrix);
|
xObjStateManager.transform(matrix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enqueue the `textContent` chunk before parsing the /Form
|
||||||
|
// XObject.
|
||||||
|
enqueueChunk();
|
||||||
|
let sinkWrapper = {
|
||||||
|
enqueueInvoked: false,
|
||||||
|
|
||||||
|
enqueue(chunk, size) {
|
||||||
|
this.enqueueInvoked = true;
|
||||||
|
sink.enqueue(chunk, size);
|
||||||
|
},
|
||||||
|
|
||||||
|
get desiredSize() {
|
||||||
|
return sink.desiredSize;
|
||||||
|
},
|
||||||
|
|
||||||
|
get ready() {
|
||||||
|
return sink.ready;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
next(self.getTextContent({
|
next(self.getTextContent({
|
||||||
stream: xobj,
|
stream: xobj,
|
||||||
task,
|
task,
|
||||||
@ -1657,12 +1684,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
stateManager: xObjStateManager,
|
stateManager: xObjStateManager,
|
||||||
normalizeWhitespace,
|
normalizeWhitespace,
|
||||||
combineTextItems,
|
combineTextItems,
|
||||||
}).then(function (formTextContent) {
|
sink: sinkWrapper,
|
||||||
Util.appendToArray(textContent.items, formTextContent.items);
|
seenStyles,
|
||||||
Util.extendObj(textContent.styles, formTextContent.styles);
|
}).then(function() {
|
||||||
|
if (!sinkWrapper.enqueueInvoked) {
|
||||||
xobjsCache.key = name;
|
skipEmptyXObjs[name] = true;
|
||||||
xobjsCache.texts = formTextContent;
|
}
|
||||||
}));
|
}));
|
||||||
return;
|
return;
|
||||||
case OPS.setGState:
|
case OPS.setGState:
|
||||||
@ -1686,20 +1713,27 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
} // switch
|
} // switch
|
||||||
|
if (textContent.items.length >= sink.desiredSize) {
|
||||||
|
// Wait for ready, if we reach highWaterMark.
|
||||||
|
stop = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
} // while
|
} // while
|
||||||
if (stop) {
|
if (stop) {
|
||||||
next(deferred);
|
next(deferred);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
flushTextContentItem();
|
flushTextContentItem();
|
||||||
resolve(textContent);
|
enqueueChunk();
|
||||||
|
resolve();
|
||||||
}).catch((reason) => {
|
}).catch((reason) => {
|
||||||
if (this.options.ignoreErrors) {
|
if (this.options.ignoreErrors) {
|
||||||
// Error(s) in the TextContent -- allow text-extraction to continue.
|
// Error(s) in the TextContent -- allow text-extraction to continue.
|
||||||
warn('getTextContent - ignoring errors during task: ' + task.name);
|
warn('getTextContent - ignoring errors during task: ' + task.name);
|
||||||
|
|
||||||
flushTextContentItem();
|
flushTextContentItem();
|
||||||
return textContent;
|
enqueueChunk();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
throw reason;
|
throw reason;
|
||||||
});
|
});
|
||||||
|
@ -874,30 +874,35 @@ var WorkerMessageHandler = {
|
|||||||
});
|
});
|
||||||
}, this);
|
}, this);
|
||||||
|
|
||||||
handler.on('GetTextContent', function wphExtractText(data) {
|
handler.on('GetTextContent', function wphExtractText(data, sink) {
|
||||||
var pageIndex = data.pageIndex;
|
var pageIndex = data.pageIndex;
|
||||||
return pdfManager.getPage(pageIndex).then(function(page) {
|
sink.onPull = function (desiredSize) { };
|
||||||
|
sink.onCancel = function (reason) { };
|
||||||
|
|
||||||
|
pdfManager.getPage(pageIndex).then(function(page) {
|
||||||
var task = new WorkerTask('GetTextContent: page ' + pageIndex);
|
var task = new WorkerTask('GetTextContent: page ' + pageIndex);
|
||||||
startWorkerTask(task);
|
startWorkerTask(task);
|
||||||
|
|
||||||
var pageNum = pageIndex + 1;
|
var pageNum = pageIndex + 1;
|
||||||
var start = Date.now();
|
var start = Date.now();
|
||||||
return page.extractTextContent({
|
page.extractTextContent({
|
||||||
handler,
|
handler,
|
||||||
task,
|
task,
|
||||||
|
sink,
|
||||||
normalizeWhitespace: data.normalizeWhitespace,
|
normalizeWhitespace: data.normalizeWhitespace,
|
||||||
combineTextItems: data.combineTextItems,
|
combineTextItems: data.combineTextItems,
|
||||||
}).then(function(textContent) {
|
}).then(function() {
|
||||||
finishWorkerTask(task);
|
finishWorkerTask(task);
|
||||||
|
|
||||||
info('text indexing: page=' + pageNum + ' - time=' +
|
info('text indexing: page=' + pageNum + ' - time=' +
|
||||||
(Date.now() - start) + 'ms');
|
(Date.now() - start) + 'ms');
|
||||||
return textContent;
|
sink.close();
|
||||||
}, function (reason) {
|
}, function (reason) {
|
||||||
finishWorkerTask(task);
|
finishWorkerTask(task);
|
||||||
if (task.terminated) {
|
if (task.terminated) {
|
||||||
return; // ignoring errors from the terminated thread
|
return; // ignoring errors from the terminated thread
|
||||||
}
|
}
|
||||||
|
sink.error(reason);
|
||||||
throw reason;
|
throw reason;
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
@ -954,6 +954,24 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
|
|||||||
return intentState.opListReadCapability.promise;
|
return intentState.opListReadCapability.promise;
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {getTextContentParameters} params - getTextContent parameters.
|
||||||
|
* @return {ReadableStream} ReadableStream to read textContent chunks.
|
||||||
|
*/
|
||||||
|
streamTextContent(params = {}) {
|
||||||
|
const TEXT_CONTENT_CHUNK_SIZE = 100;
|
||||||
|
return this.transport.messageHandler.sendWithStream('GetTextContent', {
|
||||||
|
pageIndex: this.pageNumber - 1,
|
||||||
|
normalizeWhitespace: (params.normalizeWhitespace === true),
|
||||||
|
combineTextItems: (params.disableCombineTextItems !== true),
|
||||||
|
}, {
|
||||||
|
highWaterMark: TEXT_CONTENT_CHUNK_SIZE,
|
||||||
|
size(textContent) {
|
||||||
|
return textContent.items.length;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {getTextContentParameters} params - getTextContent parameters.
|
* @param {getTextContentParameters} params - getTextContent parameters.
|
||||||
* @return {Promise} That is resolved a {@link TextContent}
|
* @return {Promise} That is resolved a {@link TextContent}
|
||||||
@ -961,10 +979,28 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
|
|||||||
*/
|
*/
|
||||||
getTextContent: function PDFPageProxy_getTextContent(params) {
|
getTextContent: function PDFPageProxy_getTextContent(params) {
|
||||||
params = params || {};
|
params = params || {};
|
||||||
return this.transport.messageHandler.sendWithPromise('GetTextContent', {
|
let readableStream = this.streamTextContent(params);
|
||||||
pageIndex: this.pageNumber - 1,
|
|
||||||
normalizeWhitespace: (params.normalizeWhitespace === true),
|
return new Promise(function(resolve, reject) {
|
||||||
combineTextItems: (params.disableCombineTextItems !== true),
|
function pump() {
|
||||||
|
reader.read().then(function({ value, done, }) {
|
||||||
|
if (done) {
|
||||||
|
resolve(textContent);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Util.extendObj(textContent.styles, value.styles);
|
||||||
|
Util.appendToArray(textContent.items, value.items);
|
||||||
|
pump();
|
||||||
|
}, reject);
|
||||||
|
}
|
||||||
|
|
||||||
|
let reader = readableStream.getReader();
|
||||||
|
let textContent = {
|
||||||
|
items: [],
|
||||||
|
styles: Object.create(null),
|
||||||
|
};
|
||||||
|
|
||||||
|
pump();
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -20,14 +20,20 @@ import { CustomStyle, getDefaultSetting } from './dom_utils';
|
|||||||
* Text layer render parameters.
|
* Text layer render parameters.
|
||||||
*
|
*
|
||||||
* @typedef {Object} TextLayerRenderParameters
|
* @typedef {Object} TextLayerRenderParameters
|
||||||
* @property {TextContent} textContent - Text content to render (the object is
|
* @property {TextContent} textContent - (optional) Text content to render
|
||||||
* returned by the page's getTextContent() method).
|
* (the object is returned by the page's getTextContent() method).
|
||||||
|
* @property {ReadableStream} textContentStream - (optional) Text content
|
||||||
|
* stream to render (the stream is returned by the page's
|
||||||
|
* streamTextContent() method).
|
||||||
* @property {HTMLElement} container - HTML element that will contain text runs.
|
* @property {HTMLElement} container - HTML element that will contain text runs.
|
||||||
* @property {PageViewport} viewport - The target viewport to properly
|
* @property {PageViewport} viewport - The target viewport to properly
|
||||||
* layout the text runs.
|
* layout the text runs.
|
||||||
* @property {Array} textDivs - (optional) HTML elements that are correspond
|
* @property {Array} textDivs - (optional) HTML elements that are correspond
|
||||||
* the text items of the textContent input. This is output and shall be
|
* the text items of the textContent input. This is output and shall be
|
||||||
* initially be set to empty array.
|
* initially be set to empty array.
|
||||||
|
* @property {Array} textContentItemsStr - (optional) Strings that correspond
|
||||||
|
* the `str` property of the text items of textContent input. This is output
|
||||||
|
* and shall be initially be set to empty array.
|
||||||
* @property {number} timeout - (optional) Delay in milliseconds before
|
* @property {number} timeout - (optional) Delay in milliseconds before
|
||||||
* rendering of the text runs occurs.
|
* rendering of the text runs occurs.
|
||||||
* @property {boolean} enhanceTextSelection - (optional) Whether to turn on the
|
* @property {boolean} enhanceTextSelection - (optional) Whether to turn on the
|
||||||
@ -122,6 +128,9 @@ var renderTextLayer = (function renderTextLayerClosure() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
task._textDivProperties.set(textDiv, textDivProperties);
|
task._textDivProperties.set(textDiv, textDivProperties);
|
||||||
|
if (task._textContentStream) {
|
||||||
|
task._layoutText(textDiv);
|
||||||
|
}
|
||||||
|
|
||||||
if (task._enhanceTextSelection) {
|
if (task._enhanceTextSelection) {
|
||||||
var angleCos = 1, angleSin = 0;
|
var angleCos = 1, angleSin = 0;
|
||||||
@ -157,7 +166,6 @@ var renderTextLayer = (function renderTextLayerClosure() {
|
|||||||
if (task._canceled) {
|
if (task._canceled) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
var textLayerFrag = task._container;
|
|
||||||
var textDivs = task._textDivs;
|
var textDivs = task._textDivs;
|
||||||
var capability = task._capability;
|
var capability = task._capability;
|
||||||
var textDivsLength = textDivs.length;
|
var textDivsLength = textDivs.length;
|
||||||
@ -170,50 +178,12 @@ var renderTextLayer = (function renderTextLayerClosure() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The temporary canvas is used to measure text length in the DOM.
|
if (!task._textContentStream) {
|
||||||
var canvas = document.createElement('canvas');
|
for (var i = 0; i < textDivsLength; i++) {
|
||||||
if (typeof PDFJSDev === 'undefined' ||
|
task._layoutText(textDivs[i]);
|
||||||
PDFJSDev.test('FIREFOX || MOZCENTRAL || GENERIC')) {
|
}
|
||||||
canvas.mozOpaque = true;
|
|
||||||
}
|
}
|
||||||
var ctx = canvas.getContext('2d', { alpha: false, });
|
|
||||||
|
|
||||||
var lastFontSize;
|
|
||||||
var lastFontFamily;
|
|
||||||
for (var i = 0; i < textDivsLength; i++) {
|
|
||||||
var textDiv = textDivs[i];
|
|
||||||
var textDivProperties = task._textDivProperties.get(textDiv);
|
|
||||||
if (textDivProperties.isWhitespace) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
var fontSize = textDiv.style.fontSize;
|
|
||||||
var fontFamily = textDiv.style.fontFamily;
|
|
||||||
|
|
||||||
// Only build font string and set to context if different from last.
|
|
||||||
if (fontSize !== lastFontSize || fontFamily !== lastFontFamily) {
|
|
||||||
ctx.font = fontSize + ' ' + fontFamily;
|
|
||||||
lastFontSize = fontSize;
|
|
||||||
lastFontFamily = fontFamily;
|
|
||||||
}
|
|
||||||
|
|
||||||
var width = ctx.measureText(textDiv.textContent).width;
|
|
||||||
textLayerFrag.appendChild(textDiv);
|
|
||||||
|
|
||||||
var transform = '';
|
|
||||||
if (textDivProperties.canvasWidth !== 0 && width > 0) {
|
|
||||||
textDivProperties.scale = textDivProperties.canvasWidth / width;
|
|
||||||
transform = 'scaleX(' + textDivProperties.scale + ')';
|
|
||||||
}
|
|
||||||
if (textDivProperties.angle !== 0) {
|
|
||||||
transform = 'rotate(' + textDivProperties.angle + 'deg) ' + transform;
|
|
||||||
}
|
|
||||||
if (transform !== '') {
|
|
||||||
textDivProperties.originalTransform = transform;
|
|
||||||
CustomStyle.setProp('transform', textDiv, transform);
|
|
||||||
}
|
|
||||||
task._textDivProperties.set(textDiv, textDivProperties);
|
|
||||||
}
|
|
||||||
task._renderingDone = true;
|
task._renderingDone = true;
|
||||||
capability.resolve();
|
capability.resolve();
|
||||||
}
|
}
|
||||||
@ -499,19 +469,27 @@ var renderTextLayer = (function renderTextLayerClosure() {
|
|||||||
* @param {boolean} enhanceTextSelection
|
* @param {boolean} enhanceTextSelection
|
||||||
* @private
|
* @private
|
||||||
*/
|
*/
|
||||||
function TextLayerRenderTask(textContent, container, viewport, textDivs,
|
function TextLayerRenderTask({ textContent, textContentStream, container,
|
||||||
enhanceTextSelection) {
|
viewport, textDivs, textContentItemsStr,
|
||||||
|
enhanceTextSelection, }) {
|
||||||
this._textContent = textContent;
|
this._textContent = textContent;
|
||||||
|
this._textContentStream = textContentStream;
|
||||||
this._container = container;
|
this._container = container;
|
||||||
this._viewport = viewport;
|
this._viewport = viewport;
|
||||||
this._textDivs = textDivs || [];
|
this._textDivs = textDivs || [];
|
||||||
|
this._textContentItemsStr = textContentItemsStr || [];
|
||||||
|
this._enhanceTextSelection = !!enhanceTextSelection;
|
||||||
|
|
||||||
|
this._reader = null;
|
||||||
|
this._layoutTextLastFontSize = null;
|
||||||
|
this._layoutTextLastFontFamily = null;
|
||||||
|
this._layoutTextCtx = null;
|
||||||
this._textDivProperties = new WeakMap();
|
this._textDivProperties = new WeakMap();
|
||||||
this._renderingDone = false;
|
this._renderingDone = false;
|
||||||
this._canceled = false;
|
this._canceled = false;
|
||||||
this._capability = createPromiseCapability();
|
this._capability = createPromiseCapability();
|
||||||
this._renderTimer = null;
|
this._renderTimer = null;
|
||||||
this._bounds = [];
|
this._bounds = [];
|
||||||
this._enhanceTextSelection = !!enhanceTextSelection;
|
|
||||||
}
|
}
|
||||||
TextLayerRenderTask.prototype = {
|
TextLayerRenderTask.prototype = {
|
||||||
get promise() {
|
get promise() {
|
||||||
@ -519,6 +497,10 @@ var renderTextLayer = (function renderTextLayerClosure() {
|
|||||||
},
|
},
|
||||||
|
|
||||||
cancel: function TextLayer_cancel() {
|
cancel: function TextLayer_cancel() {
|
||||||
|
if (this._reader) {
|
||||||
|
this._reader.cancel();
|
||||||
|
this._reader = null;
|
||||||
|
}
|
||||||
this._canceled = true;
|
this._canceled = true;
|
||||||
if (this._renderTimer !== null) {
|
if (this._renderTimer !== null) {
|
||||||
clearTimeout(this._renderTimer);
|
clearTimeout(this._renderTimer);
|
||||||
@ -527,21 +509,100 @@ var renderTextLayer = (function renderTextLayerClosure() {
|
|||||||
this._capability.reject('canceled');
|
this._capability.reject('canceled');
|
||||||
},
|
},
|
||||||
|
|
||||||
_render: function TextLayer_render(timeout) {
|
_processItems(items, styleCache) {
|
||||||
var textItems = this._textContent.items;
|
for (let i = 0, len = items.length; i < len; i++) {
|
||||||
var textStyles = this._textContent.styles;
|
this._textContentItemsStr.push(items[i].str);
|
||||||
for (var i = 0, len = textItems.length; i < len; i++) {
|
appendText(this, items[i], styleCache);
|
||||||
appendText(this, textItems[i], textStyles);
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
_layoutText(textDiv) {
|
||||||
|
let textLayerFrag = this._container;
|
||||||
|
|
||||||
|
let textDivProperties = this._textDivProperties.get(textDiv);
|
||||||
|
if (textDivProperties.isWhitespace) {
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!timeout) { // Render right away
|
let fontSize = textDiv.style.fontSize;
|
||||||
render(this);
|
let fontFamily = textDiv.style.fontFamily;
|
||||||
} else { // Schedule
|
|
||||||
this._renderTimer = setTimeout(() => {
|
// Only build font string and set to context if different from last.
|
||||||
render(this);
|
if (fontSize !== this._layoutTextLastFontSize ||
|
||||||
this._renderTimer = null;
|
fontFamily !== this._layoutTextLastFontFamily) {
|
||||||
}, timeout);
|
this._layoutTextCtx.font = fontSize + ' ' + fontFamily;
|
||||||
|
this._lastFontSize = fontSize;
|
||||||
|
this._lastFontFamily = fontFamily;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let width = this._layoutTextCtx.measureText(textDiv.textContent).width;
|
||||||
|
|
||||||
|
let transform = '';
|
||||||
|
if (textDivProperties.canvasWidth !== 0 && width > 0) {
|
||||||
|
textDivProperties.scale = textDivProperties.canvasWidth / width;
|
||||||
|
transform = 'scaleX(' + textDivProperties.scale + ')';
|
||||||
|
}
|
||||||
|
if (textDivProperties.angle !== 0) {
|
||||||
|
transform = 'rotate(' + textDivProperties.angle + 'deg) ' + transform;
|
||||||
|
}
|
||||||
|
if (transform !== '') {
|
||||||
|
textDivProperties.originalTransform = transform;
|
||||||
|
CustomStyle.setProp('transform', textDiv, transform);
|
||||||
|
}
|
||||||
|
this._textDivProperties.set(textDiv, textDivProperties);
|
||||||
|
textLayerFrag.appendChild(textDiv);
|
||||||
|
},
|
||||||
|
|
||||||
|
_render: function TextLayer_render(timeout) {
|
||||||
|
let capability = createPromiseCapability();
|
||||||
|
let styleCache = Object.create(null);
|
||||||
|
|
||||||
|
// The temporary canvas is used to measure text length in the DOM.
|
||||||
|
let canvas = document.createElement('canvas');
|
||||||
|
if (typeof PDFJSDev === 'undefined' ||
|
||||||
|
PDFJSDev.test('FIREFOX || MOZCENTRAL || GENERIC')) {
|
||||||
|
canvas.mozOpaque = true;
|
||||||
|
}
|
||||||
|
this._layoutTextCtx = canvas.getContext('2d', { alpha: false, });
|
||||||
|
|
||||||
|
if (this._textContent) {
|
||||||
|
let textItems = this._textContent.items;
|
||||||
|
let textStyles = this._textContent.styles;
|
||||||
|
this._processItems(textItems, textStyles);
|
||||||
|
capability.resolve();
|
||||||
|
} else if (this._textContentStream) {
|
||||||
|
let pump = () => {
|
||||||
|
this._reader.read().then(({ value, done, }) => {
|
||||||
|
if (done) {
|
||||||
|
capability.resolve();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Util.extendObj(styleCache, value.styles);
|
||||||
|
this._processItems(value.items, styleCache);
|
||||||
|
pump();
|
||||||
|
|
||||||
|
}, capability.reject);
|
||||||
|
};
|
||||||
|
|
||||||
|
this._reader = this._textContentStream.getReader();
|
||||||
|
pump();
|
||||||
|
} else {
|
||||||
|
throw new Error('Neither "textContent" nor "textContentStream"' +
|
||||||
|
' parameters specified.');
|
||||||
|
}
|
||||||
|
|
||||||
|
capability.promise.then(() => {
|
||||||
|
styleCache = null;
|
||||||
|
if (!timeout) { // Render right away
|
||||||
|
render(this);
|
||||||
|
} else { // Schedule
|
||||||
|
this._renderTimer = setTimeout(() => {
|
||||||
|
render(this);
|
||||||
|
this._renderTimer = null;
|
||||||
|
}, timeout);
|
||||||
|
}
|
||||||
|
}, this._capability.reject);
|
||||||
},
|
},
|
||||||
|
|
||||||
expandTextDivs: function TextLayer_expandTextDivs(expandDivs) {
|
expandTextDivs: function TextLayer_expandTextDivs(expandDivs) {
|
||||||
@ -610,11 +671,15 @@ var renderTextLayer = (function renderTextLayerClosure() {
|
|||||||
* @returns {TextLayerRenderTask}
|
* @returns {TextLayerRenderTask}
|
||||||
*/
|
*/
|
||||||
function renderTextLayer(renderParameters) {
|
function renderTextLayer(renderParameters) {
|
||||||
var task = new TextLayerRenderTask(renderParameters.textContent,
|
var task = new TextLayerRenderTask({
|
||||||
renderParameters.container,
|
textContent: renderParameters.textContent,
|
||||||
renderParameters.viewport,
|
textContentStream: renderParameters.textContentStream,
|
||||||
renderParameters.textDivs,
|
container: renderParameters.container,
|
||||||
renderParameters.enhanceTextSelection);
|
viewport: renderParameters.viewport,
|
||||||
|
textDivs: renderParameters.textDivs,
|
||||||
|
textContentItemsStr: renderParameters.textContentItemsStr,
|
||||||
|
enhanceTextSelection: renderParameters.enhanceTextSelection,
|
||||||
|
});
|
||||||
task._render(renderParameters.timeout);
|
task._render(renderParameters.timeout);
|
||||||
return task;
|
return task;
|
||||||
}
|
}
|
||||||
|
@ -1378,6 +1378,7 @@ MessageHandler.prototype = {
|
|||||||
this.streamControllers[streamId] = {
|
this.streamControllers[streamId] = {
|
||||||
controller,
|
controller,
|
||||||
startCall: startCapability,
|
startCall: startCapability,
|
||||||
|
isClosed: false,
|
||||||
};
|
};
|
||||||
this.postMessage({
|
this.postMessage({
|
||||||
sourceName,
|
sourceName,
|
||||||
@ -1409,6 +1410,7 @@ MessageHandler.prototype = {
|
|||||||
cancel: (reason) => {
|
cancel: (reason) => {
|
||||||
let cancelCapability = createPromiseCapability();
|
let cancelCapability = createPromiseCapability();
|
||||||
this.streamControllers[streamId].cancelCall = cancelCapability;
|
this.streamControllers[streamId].cancelCall = cancelCapability;
|
||||||
|
this.streamControllers[streamId].isClosed = true;
|
||||||
this.postMessage({
|
this.postMessage({
|
||||||
sourceName,
|
sourceName,
|
||||||
targetName,
|
targetName,
|
||||||
@ -1532,9 +1534,15 @@ MessageHandler.prototype = {
|
|||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
case 'enqueue':
|
case 'enqueue':
|
||||||
this.streamControllers[data.streamId].controller.enqueue(data.chunk);
|
if (!this.streamControllers[data.streamId].isClosed) {
|
||||||
|
this.streamControllers[data.streamId].controller.enqueue(data.chunk);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case 'close':
|
case 'close':
|
||||||
|
if (this.streamControllers[data.streamId].isClosed) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
this.streamControllers[data.streamId].isClosed = true;
|
||||||
this.streamControllers[data.streamId].controller.close();
|
this.streamControllers[data.streamId].controller.close();
|
||||||
deleteStreamController();
|
deleteStreamController();
|
||||||
break;
|
break;
|
||||||
@ -1548,6 +1556,9 @@ MessageHandler.prototype = {
|
|||||||
deleteStreamController();
|
deleteStreamController();
|
||||||
break;
|
break;
|
||||||
case 'cancel':
|
case 'cancel':
|
||||||
|
if (!this.streamSinks[data.streamId]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
resolveCall(this.streamSinks[data.streamId].onCancel,
|
resolveCall(this.streamSinks[data.streamId].onCancel,
|
||||||
[data.reason]).then(() => {
|
[data.reason]).then(() => {
|
||||||
sendStreamResponse({ stream: 'cancel_complete', success: true, });
|
sendStreamResponse({ stream: 'cancel_complete', success: true, });
|
||||||
|
@ -24,8 +24,6 @@ import {
|
|||||||
import { getGlobalEventBus } from './dom_events';
|
import { getGlobalEventBus } from './dom_events';
|
||||||
import { RenderingStates } from './pdf_rendering_queue';
|
import { RenderingStates } from './pdf_rendering_queue';
|
||||||
|
|
||||||
const TEXT_LAYER_RENDER_DELAY = 200; // ms
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @typedef {Object} PDFPageViewOptions
|
* @typedef {Object} PDFPageViewOptions
|
||||||
* @property {HTMLDivElement} container - The viewer element.
|
* @property {HTMLDivElement} container - The viewer element.
|
||||||
@ -444,12 +442,11 @@ class PDFPageView {
|
|||||||
let resultPromise = paintTask.promise.then(function() {
|
let resultPromise = paintTask.promise.then(function() {
|
||||||
return finishPaintTask(null).then(function () {
|
return finishPaintTask(null).then(function () {
|
||||||
if (textLayer) {
|
if (textLayer) {
|
||||||
pdfPage.getTextContent({
|
let readableStream = pdfPage.streamTextContent({
|
||||||
normalizeWhitespace: true,
|
normalizeWhitespace: true,
|
||||||
}).then(function textContentResolved(textContent) {
|
|
||||||
textLayer.setTextContent(textContent);
|
|
||||||
textLayer.render(TEXT_LAYER_RENDER_DELAY);
|
|
||||||
});
|
});
|
||||||
|
textLayer.setTextContentStream(readableStream);
|
||||||
|
textLayer.render();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}, function(reason) {
|
}, function(reason) {
|
||||||
|
@ -41,6 +41,8 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
|
|||||||
this.textLayerDiv = options.textLayerDiv;
|
this.textLayerDiv = options.textLayerDiv;
|
||||||
this.eventBus = options.eventBus || getGlobalEventBus();
|
this.eventBus = options.eventBus || getGlobalEventBus();
|
||||||
this.textContent = null;
|
this.textContent = null;
|
||||||
|
this.textContentItemsStr = [];
|
||||||
|
this.textContentStream = null;
|
||||||
this.renderingDone = false;
|
this.renderingDone = false;
|
||||||
this.pageIdx = options.pageIndex;
|
this.pageIdx = options.pageIndex;
|
||||||
this.pageNumber = this.pageIdx + 1;
|
this.pageNumber = this.pageIdx + 1;
|
||||||
@ -79,7 +81,7 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
|
|||||||
* for specified amount of ms.
|
* for specified amount of ms.
|
||||||
*/
|
*/
|
||||||
render: function TextLayerBuilder_render(timeout) {
|
render: function TextLayerBuilder_render(timeout) {
|
||||||
if (!this.textContent || this.renderingDone) {
|
if (!(this.textContent || this.textContentStream) || this.renderingDone) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
this.cancel();
|
this.cancel();
|
||||||
@ -88,9 +90,11 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
|
|||||||
var textLayerFrag = document.createDocumentFragment();
|
var textLayerFrag = document.createDocumentFragment();
|
||||||
this.textLayerRenderTask = renderTextLayer({
|
this.textLayerRenderTask = renderTextLayer({
|
||||||
textContent: this.textContent,
|
textContent: this.textContent,
|
||||||
|
textContentStream: this.textContentStream,
|
||||||
container: textLayerFrag,
|
container: textLayerFrag,
|
||||||
viewport: this.viewport,
|
viewport: this.viewport,
|
||||||
textDivs: this.textDivs,
|
textDivs: this.textDivs,
|
||||||
|
textContentItemsStr: this.textContentItemsStr,
|
||||||
timeout,
|
timeout,
|
||||||
enhanceTextSelection: this.enhanceTextSelection,
|
enhanceTextSelection: this.enhanceTextSelection,
|
||||||
});
|
});
|
||||||
@ -113,6 +117,11 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
setTextContentStream(readableStream) {
|
||||||
|
this.cancel();
|
||||||
|
this.textContentStream = readableStream;
|
||||||
|
},
|
||||||
|
|
||||||
setTextContent: function TextLayerBuilder_setTextContent(textContent) {
|
setTextContent: function TextLayerBuilder_setTextContent(textContent) {
|
||||||
this.cancel();
|
this.cancel();
|
||||||
this.textContent = textContent;
|
this.textContent = textContent;
|
||||||
@ -122,8 +131,8 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
|
|||||||
matchesLength) {
|
matchesLength) {
|
||||||
var i = 0;
|
var i = 0;
|
||||||
var iIndex = 0;
|
var iIndex = 0;
|
||||||
var bidiTexts = this.textContent.items;
|
let textContentItemsStr = this.textContentItemsStr;
|
||||||
var end = bidiTexts.length - 1;
|
var end = textContentItemsStr.length - 1;
|
||||||
var queryLen = (this.findController === null ?
|
var queryLen = (this.findController === null ?
|
||||||
0 : this.findController.state.query.length);
|
0 : this.findController.state.query.length);
|
||||||
var ret = [];
|
var ret = [];
|
||||||
@ -135,12 +144,13 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
|
|||||||
var matchIdx = matches[m];
|
var matchIdx = matches[m];
|
||||||
|
|
||||||
// Loop over the divIdxs.
|
// Loop over the divIdxs.
|
||||||
while (i !== end && matchIdx >= (iIndex + bidiTexts[i].str.length)) {
|
while (i !== end && matchIdx >=
|
||||||
iIndex += bidiTexts[i].str.length;
|
(iIndex + textContentItemsStr[i].length)) {
|
||||||
|
iIndex += textContentItemsStr[i].length;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i === bidiTexts.length) {
|
if (i === textContentItemsStr.length) {
|
||||||
console.error('Could not find a matching mapping');
|
console.error('Could not find a matching mapping');
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,8 +170,9 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
|
|||||||
|
|
||||||
// Somewhat the same array as above, but use > instead of >= to get
|
// Somewhat the same array as above, but use > instead of >= to get
|
||||||
// the end position right.
|
// the end position right.
|
||||||
while (i !== end && matchIdx > (iIndex + bidiTexts[i].str.length)) {
|
while (i !== end && matchIdx >
|
||||||
iIndex += bidiTexts[i].str.length;
|
(iIndex + textContentItemsStr[i].length)) {
|
||||||
|
iIndex += textContentItemsStr[i].length;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,7 +192,7 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var bidiTexts = this.textContent.items;
|
let textContentItemsStr = this.textContentItemsStr;
|
||||||
var textDivs = this.textDivs;
|
var textDivs = this.textDivs;
|
||||||
var prevEnd = null;
|
var prevEnd = null;
|
||||||
var pageIdx = this.pageIdx;
|
var pageIdx = this.pageIdx;
|
||||||
@ -204,7 +215,8 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
|
|||||||
|
|
||||||
function appendTextToDiv(divIdx, fromOffset, toOffset, className) {
|
function appendTextToDiv(divIdx, fromOffset, toOffset, className) {
|
||||||
var div = textDivs[divIdx];
|
var div = textDivs[divIdx];
|
||||||
var content = bidiTexts[divIdx].str.substring(fromOffset, toOffset);
|
var content =
|
||||||
|
textContentItemsStr[divIdx].substring(fromOffset, toOffset);
|
||||||
var node = document.createTextNode(content);
|
var node = document.createTextNode(content);
|
||||||
if (className) {
|
if (className) {
|
||||||
var span = document.createElement('span');
|
var span = document.createElement('span');
|
||||||
@ -277,7 +289,7 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
|
|||||||
// Clear all matches.
|
// Clear all matches.
|
||||||
var matches = this.matches;
|
var matches = this.matches;
|
||||||
var textDivs = this.textDivs;
|
var textDivs = this.textDivs;
|
||||||
var bidiTexts = this.textContent.items;
|
let textContentItemsStr = this.textContentItemsStr;
|
||||||
var clearedUntilDivIdx = -1;
|
var clearedUntilDivIdx = -1;
|
||||||
|
|
||||||
// Clear all current matches.
|
// Clear all current matches.
|
||||||
@ -286,7 +298,7 @@ var TextLayerBuilder = (function TextLayerBuilderClosure() {
|
|||||||
var begin = Math.max(clearedUntilDivIdx, match.begin.divIdx);
|
var begin = Math.max(clearedUntilDivIdx, match.begin.divIdx);
|
||||||
for (var n = begin, end = match.end.divIdx; n <= end; n++) {
|
for (var n = begin, end = match.end.divIdx; n <= end; n++) {
|
||||||
var div = textDivs[n];
|
var div = textDivs[n];
|
||||||
div.textContent = bidiTexts[n].str;
|
div.textContent = textContentItemsStr[n];
|
||||||
div.className = '';
|
div.className = '';
|
||||||
}
|
}
|
||||||
clearedUntilDivIdx = match.end.divIdx + 1;
|
clearedUntilDivIdx = match.end.divIdx + 1;
|
||||||
|
Loading…
Reference in New Issue
Block a user