Merge pull request #2127 from jviereck/text-algo-3

Use the text extracted in the getTextContent function for the divs of the textLayer.
This commit is contained in:
Julian Viereck 2012-09-25 05:52:46 -07:00
commit e98eba1b11
7 changed files with 233 additions and 96 deletions

View File

@ -138,11 +138,16 @@ var bidi = PDFJS.bidi = (function bidiClosure() {
}
}
function bidi(text, startLevel) {
var str = text.str;
function BidiResult(str, isLTR) {
this.str = str;
this.ltr = isLTR;
}
function bidi(str, startLevel) {
var isLTR = true;
var strLength = str.length;
if (strLength == 0)
return str;
return new BidiResult(str, ltr);
// get types, fill arrays
@ -176,16 +181,16 @@ var bidi = PDFJS.bidi = (function bidiClosure() {
// if less than 30% chars are rtl then string is primarily ltr
// if more than 30% chars are rtl then string is primarily rtl
if (numBidi == 0) {
text.direction = 'ltr';
return str;
isLTR = true;
return new BidiResult(str, isLTR);
}
if (startLevel == -1) {
if ((strLength / numBidi) < 0.3) {
text.direction = 'ltr';
isLTR = true;
startLevel = 0;
} else {
text.direction = 'rtl';
isLTR = false;
startLevel = 1;
}
}
@ -438,7 +443,8 @@ var bidi = PDFJS.bidi = (function bidiClosure() {
if (ch != '<' && ch != '>')
result += ch;
}
return result;
return new BidiResult(result, isLTR);
}
return bidi;

View File

@ -677,9 +677,10 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
var textHScale2 = textHScale * fontMatrix[0];
var glyphsLength = glyphs.length;
var textLayer = this.textLayer;
var text = {str: '', length: 0, canvasWidth: 0, geom: {}};
var geom;
var textSelection = textLayer && !skipTextSelection ? true : false;
var textRenderingMode = current.textRenderingMode;
var canvasWidth = 0.0;
// Type3 fonts - each glyph is a "mini-PDF"
if (font.coded) {
@ -692,7 +693,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
if (textSelection) {
this.save();
ctx.scale(1, -1);
text.geom = this.getTextGeometry();
geom = this.getTextGeometry();
this.restore();
}
for (var i = 0; i < glyphsLength; ++i) {
@ -718,9 +719,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
ctx.translate(width, 0);
current.x += width * textHScale;
text.str += glyph.unicode;
text.length++;
text.canvasWidth += width;
canvasWidth += width;
}
ctx.restore();
} else {
@ -735,7 +734,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
lineWidth /= scale;
if (textSelection)
text.geom = this.getTextGeometry();
geom = this.getTextGeometry();
if (fontSizeScale != 1.0) {
ctx.scale(fontSizeScale, fontSizeScale);
@ -784,17 +783,19 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
var glyphUnicode = glyph.unicode === ' ' ? '\u00A0' : glyph.unicode;
if (glyphUnicode in NormalizedUnicodes)
glyphUnicode = NormalizedUnicodes[glyphUnicode];
text.str += reverseIfRtl(glyphUnicode);
text.canvasWidth += charWidth;
canvasWidth += charWidth;
}
current.x += x * textHScale2;
ctx.restore();
}
if (textSelection)
this.textLayer.appendText(text, font.fallbackName, fontSize);
if (textSelection) {
geom.canvasWidth = canvasWidth;
this.textLayer.appendText(font.fallbackName, fontSize, geom);
}
return text;
return canvasWidth;
},
showSpacedText: function CanvasGraphics_showSpacedText(arr) {
var ctx = this.ctx;
@ -806,7 +807,8 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
textHScale *= (current.fontMatrix || IDENTITY_MATRIX)[0];
var arrLength = arr.length;
var textLayer = this.textLayer;
var text = {str: '', length: 0, canvasWidth: 0, geom: {}};
var geom;
var canvasWidth = 0.0;
var textSelection = textLayer ? true : false;
if (textSelection) {
@ -819,7 +821,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
ctx.scale(textHScale, 1);
} else
this.applyTextTransforms();
text.geom = this.getTextGeometry();
geom = this.getTextGeometry();
ctx.restore();
}
@ -829,34 +831,22 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
var spacingLength = -e * 0.001 * fontSize * textHScale;
current.x += spacingLength;
if (textSelection) {
// Emulate precise spacing via HTML spaces
text.canvasWidth += spacingLength;
if (e < 0 && text.geom.spaceWidth > 0) { // avoid div by zero
var numFakeSpaces = Math.round(-e / text.geom.spaceWidth);
if (numFakeSpaces > 0) {
text.str += '\u00A0';
}
}
}
if (textSelection)
canvasWidth += spacingLength;
} else if (isString(e)) {
var shownText = this.showText(e, true);
var shownCanvasWidth = this.showText(e, true);
if (textSelection) {
if (shownText.str === ' ') {
text.str += '\u00A0';
} else {
text.str += shownText.str;
}
text.canvasWidth += shownText.canvasWidth;
}
if (textSelection)
canvasWidth += shownCanvasWidth;
} else {
error('TJ array element ' + e + ' is not string or num');
}
}
if (textSelection)
this.textLayer.appendText(text, font.fallbackName, fontSize);
if (textSelection) {
geom.canvasWidth = canvasWidth;
this.textLayer.appendText(font.fallbackName, fontSize, geom);
}
},
nextLineShowText: function CanvasGraphics_nextLineShowText(text) {
this.nextLine();

View File

@ -164,6 +164,21 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
translated = { error: e };
}
font.translated = translated;
var data = translated;
if (data.loadCharProcs) {
delete data.loadCharProcs;
var charProcs = font.get('CharProcs').getAll();
var fontResources = font.get('Resources') || resources;
var charProcOperatorList = {};
for (var key in charProcs) {
var glyphStream = charProcs[key];
charProcOperatorList[key] =
this.getOperatorList(glyphStream, fontResources, dependency);
}
data.charProcOperatorList = charProcOperatorList;
}
}
return font;
},
@ -195,19 +210,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var loadedName = font.loadedName;
if (!font.sent) {
var data = font.translated;
if (data.loadCharProcs) {
delete data.loadCharProcs;
var charProcs = font.get('CharProcs').getAll();
var fontResources = font.get('Resources') || resources;
var charProcOperatorList = {};
for (var key in charProcs) {
var glyphStream = charProcs[key];
charProcOperatorList[key] =
self.getOperatorList(glyphStream, fontResources, dependency);
}
data.charProcOperatorList = charProcOperatorList;
}
if (data instanceof Font)
data = data.exportData();
@ -505,7 +507,18 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return queue;
},
getTextContent: function partialEvaluatorGetIRQueue(stream, resources) {
getTextContent: function partialEvaluatorGetIRQueue(
stream, resources, state) {
var bidiTexts;
if (!state) {
bidiTexts = [];
state = {
bidiTexts: bidiTexts
};
} else {
bidiTexts = state.bidiTexts;
}
var self = this;
var xref = this.xref;
@ -515,18 +528,20 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
resources = xref.fetchIfRef(resources) || new Dict();
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
var xobjs = null;
var parser = new Parser(new Lexer(stream), false);
var res = resources;
var args = [], obj;
var text = '';
var chunk = '';
var font = null;
while (!isEOF(obj = parser.getObj())) {
if (isCmd(obj)) {
var cmd = obj.cmd;
switch (cmd) {
// TODO: Add support for SAVE/RESTORE and XFORM here.
case 'Tf':
font = handleSetFont(args[0].name).translated;
break;
@ -535,10 +550,11 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
for (var j = 0, jj = items.length; j < jj; j++) {
if (typeof items[j] === 'string') {
chunk += fontCharsToUnicode(items[j], font);
} else if (items[j] < 0) {
// making all negative offsets a space - better to have
// a space in incorrect place than not have them at all
chunk += ' ';
} else if (items[j] < 0 && font.spaceWidth > 0) {
var numFakeSpaces = Math.round(-items[j] / font.spaceWidth);
if (numFakeSpaces > 0) {
chunk += ' ';
}
}
}
break;
@ -546,14 +562,69 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
chunk += fontCharsToUnicode(args[0], font);
break;
case "'":
chunk += fontCharsToUnicode(args[0], font) + ' ';
// For search, adding a extra white space for line breaks would be
// better here, but that causes too much spaces in the
// text-selection divs.
chunk += fontCharsToUnicode(args[0], font);
break;
case '"':
chunk += fontCharsToUnicode(args[2], font) + ' ';
// Note comment in "'"
chunk += fontCharsToUnicode(args[2], font);
break;
case 'Do':
// Set the chunk such that the following if won't add something
// to the state.
chunk = '';
if (args[0].code) {
break;
}
if (!xobjs) {
xobjs = resources.get('XObject') || new Dict();
}
var name = args[0].name;
var xobj = xobjs.get(name);
if (!xobj)
break;
assertWellFormed(isStream(xobj), 'XObject should be a stream');
var type = xobj.dict.get('Subtype');
assertWellFormed(
isName(type),
'XObject should have a Name subtype'
);
if ('Form' !== type.name)
break;
state = this.getTextContent(
xobj,
xobj.dict.get('Resources') || resources,
state
);
break;
case 'gs':
var dictName = args[0];
var extGState = resources.get('ExtGState');
if (!isDict(extGState) || !extGState.has(dictName.name))
break;
var gsState = extGState.get(dictName.name);
for (var i = 0; i < gsState.length; i++) {
if (gsState[i] === 'Font') {
font = handleSetFont(args[0].name).translated;
}
}
break;
} // switch
if (chunk !== '') {
text += chunk;
bidiTexts.push(PDFJS.bidi(chunk, -1));
chunk = '';
}
@ -562,9 +633,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
assertWellFormed(args.length <= 33, 'Too many arguments');
args.push(obj);
}
}
} // while
return text;
return state;
},
extractDataStructures: function

View File

@ -3886,6 +3886,10 @@ var Font = (function FontClosure() {
},
get spaceWidth() {
if ('_shadowWidth' in this) {
return this._shadowWidth;
}
// trying to estimate space character width
var possibleSpaceReplacements = ['space', 'minus', 'one', 'i'];
var width;
@ -3913,7 +3917,10 @@ var Font = (function FontClosure() {
break; // the non-zero width found
}
width = (width || this.defaultWidth) * this.widthMultiplier;
return shadow(this, 'spaceWidth', width);
// Do not shadow the property here. See discussion:
// https://github.com/mozilla/pdf.js/pull/2127#discussion_r1662280
this._shadowWidth = width;
return width;
},
charToGlyph: function Font_charToGlyph(charcode) {

View File

@ -159,6 +159,7 @@ NullTextLayerBuilder.prototype = {
function SimpleTextLayerBuilder(ctx, viewport) {
this.ctx = ctx;
this.viewport = viewport;
this.textCounter = 0;
}
SimpleTextLayerBuilder.prototype = {
beginLayout: function SimpleTextLayerBuilder_BeginLayout() {
@ -167,27 +168,31 @@ SimpleTextLayerBuilder.prototype = {
endLayout: function SimpleTextLayerBuilder_EndLayout() {
this.ctx.restore();
},
appendText: function SimpleTextLayerBuilder_AppendText(text, fontName,
fontSize) {
appendText: function SimpleTextLayerBuilder_AppendText(fontName, fontSize,
geom) {
var ctx = this.ctx, viewport = this.viewport;
// vScale and hScale already contain the scaling to pixel units
var fontHeight = fontSize * text.geom.vScale;
var fontHeight = fontSize * geom.vScale;
ctx.beginPath();
ctx.strokeStyle = 'red';
ctx.fillStyle = 'yellow';
ctx.rect(text.geom.x, text.geom.y - fontHeight,
text.canvasWidth * text.geom.hScale, fontHeight);
ctx.rect(geom.x, geom.y - fontHeight,
geom.canvasWidth * geom.hScale, fontHeight);
ctx.stroke();
ctx.fill();
var textContent = bidi(text, -1);
var textContent = this.textContent.bidiTexts[this.textCounter].str;
ctx.font = fontHeight + 'px ' + fontName;
ctx.fillStyle = 'black';
ctx.fillText(textContent, text.geom.x, text.geom.y);
ctx.fillText(textContent, geom.x, geom.y);
this.textCounter++;
},
setTextContent: function SimpleTextLayerBuilder_SetTextContent(textContent) {
this.textContent = textContent;
}
};
function nextPage(task, loadError) {
var failure = loadError || '';
@ -245,6 +250,10 @@ function nextPage(task, loadError) {
drawContext = dummyCanvas.getContext('2d');
// ... text builder will draw its content on the test canvas
textLayerBuilder = new SimpleTextLayerBuilder(ctx, viewport);
page.getTextContent().then(function(textContent) {
textLayerBuilder.setTextContent(textContent);
});
} else {
drawContext = ctx;
textLayerBuilder = new NullTextLayerBuilder();

View File

@ -121,7 +121,7 @@ html[dir='rtl'] .innerCenter {
-o-transition-timing-function: ease;
transition-duration: 200ms;
transition-timing-function: ease;
}
html[dir='ltr'] #sidebarContainer {
-webkit-transition-property: left;
@ -629,7 +629,7 @@ html[dir='rtl'] .toolbarButton:first-child {
display: inline-block;
content: url(images/toolbarButton-sidebarToggle.png);
}
html[dir='ltr'] .toolbarButton.pageUp::before {
display: inline-block;
content: url(images/toolbarButton-pageUp.png);
@ -639,7 +639,7 @@ html[dir='rtl'] .toolbarButton.pageUp::before {
display: inline-block;
content: url(images/toolbarButton-pageUp-rtl.png);
}
html[dir='ltr'] .toolbarButton.pageDown::before {
display: inline-block;
content: url(images/toolbarButton-pageDown.png);
@ -654,7 +654,7 @@ html[dir='rtl'] .toolbarButton.pageDown::before {
display: inline-block;
content: url(images/toolbarButton-zoomOut.png);
}
.toolbarButton.zoomIn::before {
display: inline-block;
content: url(images/toolbarButton-zoomIn.png);
@ -691,12 +691,12 @@ html[dir='rtl'] .toolbarButton.pageDown::before {
.toolbarButton.bookmark::before {
content: url(images/toolbarButton-bookmark.png);
}
#viewThumbnail.toolbarButton::before {
display: inline-block;
content: url(images/toolbarButton-viewThumbnail.png);
}
#viewOutline.toolbarButton::before {
display: inline-block;
content: url(images/toolbarButton-viewOutline.png);
@ -797,7 +797,7 @@ html[dir='rtl'] .toolbarButton.pageDown::before {
padding: 7px;
-moz-transition-duration: 150ms;
}
a:focus > .thumbnail > .thumbnailSelectionRing > .thumbnailImage,
.thumbnail:hover > .thumbnailSelectionRing > .thumbnailImage {
opacity: .9;
@ -1016,7 +1016,7 @@ canvas {
background: -moz-linear-gradient(top, #b2b2b2 0%,#898989 100%);
background: -ms-linear-gradient(top, #b2b2b2 0%,#898989 100%);
background: -o-linear-gradient(top, #b2b2b2 0%,#898989 100%);
background: linear-gradient(top, #b2b2b2 0%,#898989 100%);
background: linear-gradient(top, #b2b2b2 0%,#898989 100%);
border-top-left-radius: 2px;
border-bottom-left-radius: 2px;
@ -1066,6 +1066,7 @@ canvas {
color: transparent;
position: absolute;
line-height:1.3;
white-space:pre;
}
/* TODO: file FF bug to support ::-moz-selection:window-inactive
@ -1202,7 +1203,7 @@ canvas {
@page {
margin: 0;
}
}
#printContainer {
display: none;

View File

@ -1040,7 +1040,7 @@ var PDFView = {
function extractPageText(pageIndex) {
self.pages[pageIndex].pdfPage.getTextContent().then(
function textContentResolved(textContent) {
self.pageText[pageIndex] = textContent;
self.pageText[pageIndex] = textContent.join('');
self.search();
if ((pageIndex + 1) < self.pages.length)
extractPageText(pageIndex + 1);
@ -1228,6 +1228,8 @@ var PageView = function pageView(container, pdfPage, id, scale,
this.renderingState = RenderingStates.INITIAL;
this.resume = null;
this.textContent = null;
var anchor = document.createElement('a');
anchor.name = '' + this.id;
@ -1448,6 +1450,13 @@ var PageView = function pageView(container, pdfPage, id, scale,
}, 0);
};
this.getTextContent = function pageviewGetTextContent() {
if (!this.textContent) {
this.textContent = this.pdfPage.getTextContent();
}
return this.textContent;
};
this.draw = function pageviewDraw(callback) {
if (this.renderingState !== RenderingStates.INITIAL)
error('Must be in new state before drawing');
@ -1528,6 +1537,14 @@ var PageView = function pageView(container, pdfPage, id, scale,
}
);
if (textLayer) {
this.getTextContent().then(
function textContentResolved(textContent) {
textLayer.setTextContent(textContent);
}
);
}
setupAnnotations(this.pdfPage, this.viewport);
div.setAttribute('data-loaded', true);
};
@ -1820,12 +1837,19 @@ var CustomStyle = (function CustomStyleClosure() {
var TextLayerBuilder = function textLayerBuilder(textLayerDiv) {
var textLayerFrag = document.createDocumentFragment();
this.textLayerDiv = textLayerDiv;
this.layoutDone = false;
this.divContentDone = false;
this.beginLayout = function textLayerBuilderBeginLayout() {
this.textDivs = [];
this.textLayerQueue = [];
};
this.endLayout = function textLayerBuilderEndLayout() {
this.layoutDone = true;
this.insertDivContent();
},
this.renderLayer = function textLayerBuilderRenderLayer() {
var self = this;
var textDivs = this.textDivs;
@ -1857,7 +1881,7 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) {
textLayerDiv.appendChild(textLayerFrag);
};
this.endLayout = function textLayerBuilderEndLayout() {
this.setupRenderLayoutTimer = function textLayerSetupRenderLayoutTimer() {
// Schedule renderLayout() if user has been scrolling, otherwise
// run it right away
var kRenderDelay = 200; // in ms
@ -1870,27 +1894,56 @@ var TextLayerBuilder = function textLayerBuilder(textLayerDiv) {
if (this.renderTimer)
clearTimeout(this.renderTimer);
this.renderTimer = setTimeout(function() {
self.endLayout();
self.setupRenderLayoutTimer();
}, kRenderDelay);
}
}; // endLayout
};
this.appendText = function textLayerBuilderAppendText(text,
fontName, fontSize) {
this.appendText = function textLayerBuilderAppendText(fontName, fontSize,
geom) {
var textDiv = document.createElement('div');
// vScale and hScale already contain the scaling to pixel units
var fontHeight = fontSize * text.geom.vScale;
textDiv.dataset.canvasWidth = text.canvasWidth * text.geom.hScale;
var fontHeight = fontSize * geom.vScale;
textDiv.dataset.canvasWidth = geom.canvasWidth * geom.hScale;
textDiv.dataset.fontName = fontName;
textDiv.style.fontSize = fontHeight + 'px';
textDiv.style.fontFamily = fontName;
textDiv.style.left = text.geom.x + 'px';
textDiv.style.top = (text.geom.y - fontHeight) + 'px';
textDiv.textContent = PDFJS.bidi(text, -1);
textDiv.dir = text.direction;
textDiv.style.left = geom.x + 'px';
textDiv.style.top = (geom.y - fontHeight) + 'px';
// The content of the div is set in the `setTextContent` function.
this.textDivs.push(textDiv);
};
this.insertDivContent = function textLayerUpdateTextContent() {
// Only set the content of the divs once layout has finished, the content
// for the divs is available and content is not yet set on the divs.
if (!this.layoutDone || this.divContentDone || !this.textContent)
return;
this.divContentDone = true;
var textDivs = this.textDivs;
var bidiTexts = this.textContent.bidiTexts;
for (var i = 0; i < bidiTexts.length; i++) {
var bidiText = bidiTexts[i];
var textDiv = textDivs[i];
textDiv.textContent = bidiText.str;
textDiv.dir = bidiText.ltr ? 'ltr' : 'rtl';
}
this.setupRenderLayoutTimer();
};
this.setTextContent = function textLayerBuilderSetTextContent(textContent) {
this.textContent = textContent;
this.insertDivContent();
};
};
document.addEventListener('DOMContentLoaded', function webViewerLoad(evt) {