Make getTextContent return offset array and improve the algorithm. Make
parts in viewer.js work again.
This commit is contained in:
parent
e13846821c
commit
a38c4bc729
@ -505,7 +505,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
return queue;
|
return queue;
|
||||||
},
|
},
|
||||||
|
|
||||||
getTextContent: function partialEvaluatorGetIRQueue(stream, resources) {
|
getTextContent: function partialEvaluatorGetIRQueue(stream, resources, state) {
|
||||||
|
if (!state) {
|
||||||
|
state = {
|
||||||
|
text: '',
|
||||||
|
mapping: []
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
var self = this;
|
var self = this;
|
||||||
var xref = this.xref;
|
var xref = this.xref;
|
||||||
@ -515,18 +521,22 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
resources = xref.fetchIfRef(resources) || new Dict();
|
resources = xref.fetchIfRef(resources) || new Dict();
|
||||||
|
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
|
||||||
|
var xobjs = null;
|
||||||
|
|
||||||
var parser = new Parser(new Lexer(stream), false);
|
var parser = new Parser(new Lexer(stream), false);
|
||||||
var res = resources;
|
var res = resources;
|
||||||
var args = [], obj;
|
var args = [], obj;
|
||||||
|
|
||||||
var text = '';
|
var text = state.text;
|
||||||
var chunk = '';
|
var chunk = '';
|
||||||
|
var commandOffset = state.mapping;
|
||||||
var font = null;
|
var font = null;
|
||||||
while (!isEOF(obj = parser.getObj())) {
|
while (!isEOF(obj = parser.getObj())) {
|
||||||
if (isCmd(obj)) {
|
if (isCmd(obj)) {
|
||||||
var cmd = obj.cmd;
|
var cmd = obj.cmd;
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
|
// TODO: Add support for SAVE/RESTORE and XFORM here.
|
||||||
case 'Tf':
|
case 'Tf':
|
||||||
font = handleSetFont(args[0].name).translated;
|
font = handleSetFont(args[0].name).translated;
|
||||||
break;
|
break;
|
||||||
@ -536,9 +546,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
if (typeof items[j] === 'string') {
|
if (typeof items[j] === 'string') {
|
||||||
chunk += fontCharsToUnicode(items[j], font);
|
chunk += fontCharsToUnicode(items[j], font);
|
||||||
} else if (items[j] < 0) {
|
} else if (items[j] < 0) {
|
||||||
// making all negative offsets a space - better to have
|
|
||||||
// a space in incorrect place than not have them at all
|
|
||||||
chunk += ' ';
|
chunk += ' ';
|
||||||
|
} else if (items[j] < 0 && font.spacedWidth > 0) {
|
||||||
|
var numFakeSpaces = Math.round(-e / font.spacedWidth);
|
||||||
|
if (numFakeSpaces > 0) {
|
||||||
|
chunk += ' ';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -551,8 +564,49 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
case '"':
|
case '"':
|
||||||
chunk += fontCharsToUnicode(args[2], font) + ' ';
|
chunk += fontCharsToUnicode(args[2], font) + ' ';
|
||||||
break;
|
break;
|
||||||
|
case 'Do':
|
||||||
|
// Set the chunk such that the following if won't add something
|
||||||
|
// to the state.
|
||||||
|
chunk = '';
|
||||||
|
|
||||||
|
if (args[0].code) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!xobjs) {
|
||||||
|
xobjs = resources.get('XObject') || new Dict();
|
||||||
|
}
|
||||||
|
|
||||||
|
var name = args[0].name;
|
||||||
|
var xobj = xobjs.get(name);
|
||||||
|
if (!xobj)
|
||||||
|
break;
|
||||||
|
assertWellFormed(isStream(xobj), 'XObject should be a stream');
|
||||||
|
|
||||||
|
var type = xobj.dict.get('Subtype');
|
||||||
|
assertWellFormed(
|
||||||
|
isName(type),
|
||||||
|
'XObject should have a Name subtype'
|
||||||
|
);
|
||||||
|
|
||||||
|
if ('Form' !== type.name)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Add some spacing between the text here and the text of the
|
||||||
|
// xForm.
|
||||||
|
text = text + ' ';
|
||||||
|
|
||||||
|
state.text = text;
|
||||||
|
state = this.getTextContent(
|
||||||
|
xobj,
|
||||||
|
xobj.dict.get('Resources') || resources,
|
||||||
|
state
|
||||||
|
);
|
||||||
|
text = state.text;
|
||||||
|
break;
|
||||||
} // switch
|
} // switch
|
||||||
if (chunk !== '') {
|
if (chunk !== '') {
|
||||||
|
commandOffset.push(text.length);
|
||||||
text += chunk;
|
text += chunk;
|
||||||
chunk = '';
|
chunk = '';
|
||||||
}
|
}
|
||||||
@ -564,7 +618,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return text;
|
return {
|
||||||
|
text: text,
|
||||||
|
mapping: commandOffset
|
||||||
|
};
|
||||||
},
|
},
|
||||||
|
|
||||||
extractDataStructures: function
|
extractDataStructures: function
|
||||||
|
@ -88,7 +88,7 @@ limitations under the License.
|
|||||||
<button id="viewOutline" class="toolbarButton group" title="Show Document Outline" tabindex="2" data-l10n-id="outline">
|
<button id="viewOutline" class="toolbarButton group" title="Show Document Outline" tabindex="2" data-l10n-id="outline">
|
||||||
<span data-l10n-id="outline_label">Document Outline</span>
|
<span data-l10n-id="outline_label">Document Outline</span>
|
||||||
</button>
|
</button>
|
||||||
<button id="viewSearch" class="toolbarButton group hidden" title="Search Document" tabindex="3" data-l10n-id="search_panel">
|
<button id="viewSearch" class="toolbarButton group" title="Search Document" tabindex="3" data-l10n-id="search_panel">
|
||||||
<span data-l10n-id="search_panel_label">Search Document</span>
|
<span data-l10n-id="search_panel_label">Search Document</span>
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
@ -1043,7 +1043,7 @@ var PDFView = {
|
|||||||
function extractPageText(pageIndex) {
|
function extractPageText(pageIndex) {
|
||||||
self.pages[pageIndex].pdfPage.getTextContent().then(
|
self.pages[pageIndex].pdfPage.getTextContent().then(
|
||||||
function textContentResolved(textContent) {
|
function textContentResolved(textContent) {
|
||||||
self.pageText[pageIndex] = textContent;
|
self.pageText[pageIndex] = textContent.text;
|
||||||
self.search();
|
self.search();
|
||||||
if ((pageIndex + 1) < self.pages.length)
|
if ((pageIndex + 1) < self.pages.length)
|
||||||
extractPageText(pageIndex + 1);
|
extractPageText(pageIndex + 1);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user