Combines standalone divs into text groups.

This commit is contained in:
Yury Delendik 2015-11-03 10:12:41 -06:00
parent 9e66625092
commit 376f8bde14

View File

@ -920,7 +920,18 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
items: [], items: [],
styles: Object.create(null) styles: Object.create(null)
}; };
var bidiTexts = textContent.items; var textContentItem = {
initialized: false,
str: [],
width: 0,
height: 0,
vertical: false,
lastAdvanceWidth: 0,
lastAdvanceHeight: 0,
textAdvanceScale: 0,
transform: null,
fontName: null
};
var SPACE_FACTOR = 0.3; var SPACE_FACTOR = 0.3;
var MULTI_SPACE_FACTOR = 1.5; var MULTI_SPACE_FACTOR = 1.5;
@ -937,7 +948,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var textState; var textState;
function newTextChunk() { function ensureTextContentItem() {
if (textContentItem.initialized) {
return textContentItem;
}
var font = textState.font; var font = textState.font;
if (!(font.loadedName in textContent.styles)) { if (!(font.loadedName in textContent.styles)) {
textContent.styles[font.loadedName] = { textContent.styles[font.loadedName] = {
@ -947,39 +961,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
vertical: font.vertical vertical: font.vertical
}; };
} }
return { textContentItem.fontName = font.loadedName;
// |str| is initially an array which we push individual chars to, and
// then runBidi() overwrites it with the final string.
str: [],
dir: null,
width: 0,
height: 0,
transform: null,
fontName: font.loadedName
};
}
function runBidi(textChunk) {
var str = textChunk.str.join('');
var bidiResult = PDFJS.bidi(str, -1, textState.font.vertical);
textChunk.str = bidiResult.str;
textChunk.dir = bidiResult.dir;
return textChunk;
}
function handleSetFont(fontName, fontRef) {
return self.loadFont(fontName, fontRef, xref, resources).
then(function (translated) {
textState.font = translated.font;
textState.fontMatrix = translated.font.fontMatrix ||
FONT_IDENTITY_MATRIX;
});
}
function buildTextGeometry(chars, textChunk) {
var font = textState.font;
textChunk = textChunk || newTextChunk();
if (!textChunk.transform) {
// 9.4.4 Text Space Details // 9.4.4 Text Space Details
var tsm = [textState.fontSize * textState.textHScale, 0, var tsm = [textState.fontSize * textState.textHScale, 0,
0, textState.fontSize, 0, textState.fontSize,
@ -995,14 +978,58 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
} }
} }
var trm = textChunk.transform = Util.transform(textState.ctm, var trm = Util.transform(textState.ctm,
Util.transform(textState.textMatrix, tsm)); Util.transform(textState.textMatrix, tsm));
textContentItem.transform = trm;
if (!font.vertical) { if (!font.vertical) {
textChunk.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]); textContentItem.width = 0;
textContentItem.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]);
textContentItem.vertical = false;
} else { } else {
textChunk.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]); textContentItem.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]);
textContentItem.height = 0;
textContentItem.vertical = true;
} }
var a = textState.textLineMatrix[0];
var b = textState.textLineMatrix[1];
var scaleLineX = Math.sqrt(a * a + b * b);
a = textState.ctm[0];
b = textState.ctm[1];
var scaleCtmX = Math.sqrt(a * a + b * b);
textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
textContentItem.lastAdvanceWidth = 0;
textContentItem.lastAdvanceHeight = 0;
textContentItem.initialized = true;
return textContentItem;
} }
function runBidiTransform(textChunk) {
var str = textChunk.str.join('');
var bidiResult = PDFJS.bidi(str, -1, textChunk.vertical);
return {
str: bidiResult.str,
dir: bidiResult.dir,
width: textChunk.width,
height: textChunk.height,
transform: textChunk.transform,
fontName: textChunk.fontName
};
}
function handleSetFont(fontName, fontRef) {
return self.loadFont(fontName, fontRef, xref, resources).
then(function (translated) {
textState.font = translated.font;
textState.fontMatrix = translated.font.fontMatrix ||
FONT_IDENTITY_MATRIX;
});
}
function buildTextContentItem(chars) {
var font = textState.font;
var textChunk = ensureTextContentItem();
var width = 0; var width = 0;
var height = 0; var height = 0;
var glyphs = font.charsToGlyphs(chars); var glyphs = font.charsToGlyphs(chars);
@ -1071,16 +1098,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
textChunk.str.push(glyphUnicode); textChunk.str.push(glyphUnicode);
} }
var a = textState.textLineMatrix[0];
var b = textState.textLineMatrix[1];
var scaleLineX = Math.sqrt(a * a + b * b);
a = textState.ctm[0];
b = textState.ctm[1];
var scaleCtmX = Math.sqrt(a * a + b * b);
if (!font.vertical) { if (!font.vertical) {
textChunk.width += width * scaleCtmX * scaleLineX; textChunk.lastAdvanceWidth = width;
textChunk.width += width * textChunk.textAdvanceScale;
} else { } else {
textChunk.height += Math.abs(height * scaleCtmX * scaleLineX); textChunk.lastAdvanceHeight = height;
textChunk.height += Math.abs(height * textChunk.textAdvanceScale);
} }
return textChunk; return textChunk;
} }
@ -1101,6 +1124,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
} }
} }
function flushTextContentItem() {
if (!textContentItem.initialized) {
return;
}
textContent.items.push(runBidiTransform(textContentItem));
textContentItem.initialized = false;
textContentItem.str.length = 0;
}
var timeSlotManager = new TimeSlotManager(); var timeSlotManager = new TimeSlotManager();
return new Promise(function next(resolve, reject) { return new Promise(function next(resolve, reject) {
@ -1119,35 +1152,60 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
textState = stateManager.state; textState = stateManager.state;
var fn = operation.fn; var fn = operation.fn;
args = operation.args; args = operation.args;
var advance;
switch (fn | 0) { switch (fn | 0) {
case OPS.setFont: case OPS.setFont:
flushTextContentItem();
textState.fontSize = args[1]; textState.fontSize = args[1];
return handleSetFont(args[0].name).then(function() { return handleSetFont(args[0].name).then(function() {
next(resolve, reject); next(resolve, reject);
}, reject); }, reject);
case OPS.setTextRise: case OPS.setTextRise:
flushTextContentItem();
textState.textRise = args[0]; textState.textRise = args[0];
break; break;
case OPS.setHScale: case OPS.setHScale:
flushTextContentItem();
textState.textHScale = args[0] / 100; textState.textHScale = args[0] / 100;
break; break;
case OPS.setLeading: case OPS.setLeading:
flushTextContentItem();
textState.leading = args[0]; textState.leading = args[0];
break; break;
case OPS.moveText: case OPS.moveText:
// Optimization to treat same line movement as advance
var isSameTextLine = !textState.font ? false :
((textState.font.vertical ? args[0] : args[1]) === 0);
if (isSameTextLine && textContentItem.initialized) {
textState.translateTextLineMatrix(args[0], args[1]);
textContentItem.width +=
(args[0] - textContentItem.lastAdvanceWidth);
textContentItem.height +=
(args[1] - textContentItem.lastAdvanceHeight);
advance = (args[0] - args[1]) * 1000 / textState.fontSize;
if (advance > 0) {
addFakeSpaces(advance, textContentItem.str);
}
break;
}
flushTextContentItem();
textState.translateTextLineMatrix(args[0], args[1]); textState.translateTextLineMatrix(args[0], args[1]);
textState.textMatrix = textState.textLineMatrix.slice(); textState.textMatrix = textState.textLineMatrix.slice();
break; break;
case OPS.setLeadingMoveText: case OPS.setLeadingMoveText:
flushTextContentItem();
textState.leading = -args[1]; textState.leading = -args[1];
textState.translateTextLineMatrix(args[0], args[1]); textState.translateTextLineMatrix(args[0], args[1]);
textState.textMatrix = textState.textLineMatrix.slice(); textState.textMatrix = textState.textLineMatrix.slice();
break; break;
case OPS.nextLine: case OPS.nextLine:
flushTextContentItem();
textState.carriageReturn(); textState.carriageReturn();
break; break;
case OPS.setTextMatrix: case OPS.setTextMatrix:
flushTextContentItem();
textState.setTextMatrix(args[0], args[1], args[2], args[3], textState.setTextMatrix(args[0], args[1], args[2], args[3],
args[4], args[5]); args[4], args[5]);
textState.setTextLineMatrix(args[0], args[1], args[2], args[3], textState.setTextLineMatrix(args[0], args[1], args[2], args[3],
@ -1160,17 +1218,20 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
textState.wordSpacing = args[0]; textState.wordSpacing = args[0];
break; break;
case OPS.beginText: case OPS.beginText:
flushTextContentItem();
textState.textMatrix = IDENTITY_MATRIX.slice(); textState.textMatrix = IDENTITY_MATRIX.slice();
textState.textLineMatrix = IDENTITY_MATRIX.slice(); textState.textLineMatrix = IDENTITY_MATRIX.slice();
break; break;
case OPS.showSpacedText: case OPS.showSpacedText:
var items = args[0]; var items = args[0];
var textChunk = newTextChunk();
var offset; var offset;
for (var j = 0, jj = items.length; j < jj; j++) { for (var j = 0, jj = items.length; j < jj; j++) {
if (typeof items[j] === 'string') { if (typeof items[j] === 'string') {
buildTextGeometry(items[j], textChunk); buildTextContentItem(items[j]);
} else { } else {
if (j === 0) {
ensureTextContentItem();
}
// PDF Specification 5.3.2 states: // PDF Specification 5.3.2 states:
// The number is expressed in thousandths of a unit of text // The number is expressed in thousandths of a unit of text
// space. // space.
@ -1179,7 +1240,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
// In the default coordinate system, a positive adjustment // In the default coordinate system, a positive adjustment
// has the effect of moving the next glyph painted either to // has the effect of moving the next glyph painted either to
// the left or down by the given amount. // the left or down by the given amount.
var advance = items[j]; advance = items[j];
var val = advance * textState.fontSize / 1000; var val = advance * textState.fontSize / 1000;
if (textState.font.vertical) { if (textState.font.vertical) {
offset = val * offset = val *
@ -1187,37 +1248,39 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
textState.textMatrix[3]); textState.textMatrix[3]);
textState.translateTextMatrix(0, val); textState.translateTextMatrix(0, val);
// Value needs to be added to height to paint down. // Value needs to be added to height to paint down.
textChunk.height += offset; textContentItem.height += offset;
} else { } else {
offset = val * ( offset = val * (
textState.textHScale * textState.textMatrix[0] + textState.textHScale * textState.textMatrix[0] +
textState.textMatrix[1]); textState.textMatrix[1]);
textState.translateTextMatrix(-val, 0); textState.translateTextMatrix(-val, 0);
// Value needs to be subtracted from width to paint left. // Value needs to be subtracted from width to paint left.
textChunk.width -= offset; textContentItem.width -= offset;
advance = -advance; advance = -advance;
} }
if (advance > 0) { if (advance > 0) {
addFakeSpaces(advance, textChunk.str); addFakeSpaces(advance, textContentItem.str);
} }
} }
} }
bidiTexts.push(runBidi(textChunk));
break; break;
case OPS.showText: case OPS.showText:
bidiTexts.push(runBidi(buildTextGeometry(args[0]))); buildTextContentItem(args[0]);
break; break;
case OPS.nextLineShowText: case OPS.nextLineShowText:
flushTextContentItem();
textState.carriageReturn(); textState.carriageReturn();
bidiTexts.push(runBidi(buildTextGeometry(args[0]))); buildTextContentItem(args[0]);
break; break;
case OPS.nextLineSetSpacingShowText: case OPS.nextLineSetSpacingShowText:
flushTextContentItem();
textState.wordSpacing = args[0]; textState.wordSpacing = args[0];
textState.charSpacing = args[1]; textState.charSpacing = args[1];
textState.carriageReturn(); textState.carriageReturn();
bidiTexts.push(runBidi(buildTextGeometry(args[2]))); buildTextContentItem(args[2]);
break; break;
case OPS.paintXObject: case OPS.paintXObject:
flushTextContentItem();
if (args[0].code) { if (args[0].code) {
break; break;
} }
@ -1229,7 +1292,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var name = args[0].name; var name = args[0].name;
if (xobjsCache.key === name) { if (xobjsCache.key === name) {
if (xobjsCache.texts) { if (xobjsCache.texts) {
Util.appendToArray(bidiTexts, xobjsCache.texts.items); Util.appendToArray(textContent.items, xobjsCache.texts.items);
Util.extendObj(textContent.styles, xobjsCache.texts.styles); Util.extendObj(textContent.styles, xobjsCache.texts.styles);
} }
break; break;
@ -1260,7 +1323,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return self.getTextContent(xobj, task, return self.getTextContent(xobj, task,
xobj.dict.get('Resources') || resources, stateManager). xobj.dict.get('Resources') || resources, stateManager).
then(function (formTextContent) { then(function (formTextContent) {
Util.appendToArray(bidiTexts, formTextContent.items); Util.appendToArray(textContent.items, formTextContent.items);
Util.extendObj(textContent.styles, formTextContent.styles); Util.extendObj(textContent.styles, formTextContent.styles);
stateManager.restore(); stateManager.restore();
@ -1270,6 +1333,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
next(resolve, reject); next(resolve, reject);
}, reject); }, reject);
case OPS.setGState: case OPS.setGState:
flushTextContentItem();
var dictName = args[0]; var dictName = args[0];
var extGState = resources.get('ExtGState'); var extGState = resources.get('ExtGState');
@ -1300,6 +1364,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}, reject); }, reject);
return; return;
} }
flushTextContentItem();
resolve(textContent); resolve(textContent);
}); });
}, },