Merge pull request #6590 from yurydelendik/combinechars

Combines standalone chars into text groups.
This commit is contained in:
Brendan Dahl 2015-11-06 15:06:41 -08:00
commit 9a830a7b62
4 changed files with 189 additions and 83 deletions

View File

@ -920,9 +920,26 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
items: [],
styles: Object.create(null)
};
var bidiTexts = textContent.items;
var textContentItem = {
initialized: false,
str: [],
width: 0,
height: 0,
vertical: false,
lastAdvanceWidth: 0,
lastAdvanceHeight: 0,
textAdvanceScale: 0,
spaceWidth: 0,
fakeSpaceMin: Infinity,
fakeMultiSpaceMin: Infinity,
fakeMultiSpaceMax: -0,
textRunBreakAllowed: false,
transform: null,
fontName: null
};
var SPACE_FACTOR = 0.3;
var MULTI_SPACE_FACTOR = 1.5;
var MULTI_SPACE_FACTOR_MAX = 4;
var self = this;
var xref = this.xref;
@ -937,7 +954,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var textState;
function newTextChunk() {
function ensureTextContentItem() {
if (textContentItem.initialized) {
return textContentItem;
}
var font = textState.font;
if (!(font.loadedName in textContent.styles)) {
textContent.styles[font.loadedName] = {
@ -947,39 +967,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
vertical: font.vertical
};
}
return {
// |str| is initially an array which we push individual chars to, and
// then runBidi() overwrites it with the final string.
str: [],
dir: null,
width: 0,
height: 0,
transform: null,
fontName: font.loadedName
};
}
textContentItem.fontName = font.loadedName;
function runBidi(textChunk) {
var str = textChunk.str.join('');
var bidiResult = PDFJS.bidi(str, -1, textState.font.vertical);
textChunk.str = bidiResult.str;
textChunk.dir = bidiResult.dir;
return textChunk;
}
function handleSetFont(fontName, fontRef) {
return self.loadFont(fontName, fontRef, xref, resources).
then(function (translated) {
textState.font = translated.font;
textState.fontMatrix = translated.font.fontMatrix ||
FONT_IDENTITY_MATRIX;
});
}
function buildTextGeometry(chars, textChunk) {
var font = textState.font;
textChunk = textChunk || newTextChunk();
if (!textChunk.transform) {
// 9.4.4 Text Space Details
var tsm = [textState.fontSize * textState.textHScale, 0,
0, textState.fontSize,
@ -995,14 +984,76 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}
}
var trm = textChunk.transform = Util.transform(textState.ctm,
var trm = Util.transform(textState.ctm,
Util.transform(textState.textMatrix, tsm));
textContentItem.transform = trm;
if (!font.vertical) {
textChunk.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]);
textContentItem.width = 0;
textContentItem.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]);
textContentItem.vertical = false;
} else {
textChunk.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]);
textContentItem.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]);
textContentItem.height = 0;
textContentItem.vertical = true;
}
var a = textState.textLineMatrix[0];
var b = textState.textLineMatrix[1];
var scaleLineX = Math.sqrt(a * a + b * b);
a = textState.ctm[0];
b = textState.ctm[1];
var scaleCtmX = Math.sqrt(a * a + b * b);
textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
textContentItem.lastAdvanceWidth = 0;
textContentItem.lastAdvanceHeight = 0;
var spaceWidth = font.spaceWidth / 1000 * textState.fontSize;
if (spaceWidth) {
textContentItem.spaceWidth = spaceWidth;
textContentItem.fakeSpaceMin = spaceWidth * SPACE_FACTOR;
textContentItem.fakeMultiSpaceMin = spaceWidth * MULTI_SPACE_FACTOR;
textContentItem.fakeMultiSpaceMax =
spaceWidth * MULTI_SPACE_FACTOR_MAX;
// It's okay for monospace fonts to fake as much space as needed.
textContentItem.textRunBreakAllowed = !font.isMonospace;
} else {
textContentItem.spaceWidth = 0;
textContentItem.fakeSpaceMin = Infinity;
textContentItem.fakeMultiSpaceMin = Infinity;
textContentItem.fakeMultiSpaceMax = 0;
textContentItem.textRunBreakAllowed = false;
}
textContentItem.initialized = true;
return textContentItem;
}
function runBidiTransform(textChunk) {
var str = textChunk.str.join('');
var bidiResult = PDFJS.bidi(str, -1, textChunk.vertical);
return {
str: bidiResult.str,
dir: bidiResult.dir,
width: textChunk.width,
height: textChunk.height,
transform: textChunk.transform,
fontName: textChunk.fontName
};
}
function handleSetFont(fontName, fontRef) {
return self.loadFont(fontName, fontRef, xref, resources).
then(function (translated) {
textState.font = translated.font;
textState.fontMatrix = translated.font.fontMatrix ||
FONT_IDENTITY_MATRIX;
});
}
function buildTextContentItem(chars) {
var font = textState.font;
var textChunk = ensureTextContentItem();
var width = 0;
var height = 0;
var glyphs = font.charsToGlyphs(chars);
@ -1049,8 +1100,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var wordSpacing = textState.wordSpacing;
charSpacing += wordSpacing;
if (wordSpacing > 0) {
addFakeSpaces(wordSpacing * 1000 / textState.fontSize,
textChunk.str);
addFakeSpaces(wordSpacing, textChunk.str);
}
}
@ -1071,34 +1121,39 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
textChunk.str.push(glyphUnicode);
}
var a = textState.textLineMatrix[0];
var b = textState.textLineMatrix[1];
var scaleLineX = Math.sqrt(a * a + b * b);
a = textState.ctm[0];
b = textState.ctm[1];
var scaleCtmX = Math.sqrt(a * a + b * b);
if (!font.vertical) {
textChunk.width += width * scaleCtmX * scaleLineX;
textChunk.lastAdvanceWidth = width;
textChunk.width += width * textChunk.textAdvanceScale;
} else {
textChunk.height += Math.abs(height * scaleCtmX * scaleLineX);
textChunk.lastAdvanceHeight = height;
textChunk.height += Math.abs(height * textChunk.textAdvanceScale);
}
return textChunk;
}
function addFakeSpaces(width, strBuf) {
var spaceWidth = textState.font.spaceWidth;
if (spaceWidth <= 0) {
if (width < textContentItem.fakeSpaceMin) {
return;
}
var fakeSpaces = width / spaceWidth;
if (fakeSpaces > MULTI_SPACE_FACTOR) {
fakeSpaces = Math.round(fakeSpaces);
while (fakeSpaces--) {
if (width < textContentItem.fakeMultiSpaceMin) {
strBuf.push(' ');
return;
}
var fakeSpaces = Math.round(width / textContentItem.spaceWidth);
while (fakeSpaces-- > 0) {
strBuf.push(' ');
}
} else if (fakeSpaces > SPACE_FACTOR) {
strBuf.push(' ');
}
function flushTextContentItem() {
if (!textContentItem.initialized) {
return;
}
textContent.items.push(runBidiTransform(textContentItem));
textContentItem.initialized = false;
textContentItem.str.length = 0;
}
var timeSlotManager = new TimeSlotManager();
@ -1119,35 +1174,62 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
textState = stateManager.state;
var fn = operation.fn;
args = operation.args;
var advance;
switch (fn | 0) {
case OPS.setFont:
flushTextContentItem();
textState.fontSize = args[1];
return handleSetFont(args[0].name).then(function() {
next(resolve, reject);
}, reject);
case OPS.setTextRise:
flushTextContentItem();
textState.textRise = args[0];
break;
case OPS.setHScale:
flushTextContentItem();
textState.textHScale = args[0] / 100;
break;
case OPS.setLeading:
flushTextContentItem();
textState.leading = args[0];
break;
case OPS.moveText:
// Optimization to treat same line movement as advance
var isSameTextLine = !textState.font ? false :
((textState.font.vertical ? args[0] : args[1]) === 0);
advance = args[0] - args[1];
if (isSameTextLine && textContentItem.initialized &&
advance > 0 &&
advance <= textContentItem.fakeMultiSpaceMax) {
textState.translateTextLineMatrix(args[0], args[1]);
textContentItem.width +=
(args[0] - textContentItem.lastAdvanceWidth);
textContentItem.height +=
(args[1] - textContentItem.lastAdvanceHeight);
var diff = (args[0] - textContentItem.lastAdvanceWidth) -
(args[1] - textContentItem.lastAdvanceHeight);
addFakeSpaces(diff, textContentItem.str);
break;
}
flushTextContentItem();
textState.translateTextLineMatrix(args[0], args[1]);
textState.textMatrix = textState.textLineMatrix.slice();
break;
case OPS.setLeadingMoveText:
flushTextContentItem();
textState.leading = -args[1];
textState.translateTextLineMatrix(args[0], args[1]);
textState.textMatrix = textState.textLineMatrix.slice();
break;
case OPS.nextLine:
flushTextContentItem();
textState.carriageReturn();
break;
case OPS.setTextMatrix:
flushTextContentItem();
textState.setTextMatrix(args[0], args[1], args[2], args[3],
args[4], args[5]);
textState.setTextLineMatrix(args[0], args[1], args[2], args[3],
@ -1160,17 +1242,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
textState.wordSpacing = args[0];
break;
case OPS.beginText:
flushTextContentItem();
textState.textMatrix = IDENTITY_MATRIX.slice();
textState.textLineMatrix = IDENTITY_MATRIX.slice();
break;
case OPS.showSpacedText:
var items = args[0];
var textChunk = newTextChunk();
var offset;
for (var j = 0, jj = items.length; j < jj; j++) {
if (typeof items[j] === 'string') {
buildTextGeometry(items[j], textChunk);
buildTextContentItem(items[j]);
} else {
ensureTextContentItem();
// PDF Specification 5.3.2 states:
// The number is expressed in thousandths of a unit of text
// space.
@ -1179,45 +1263,57 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
// In the default coordinate system, a positive adjustment
// has the effect of moving the next glyph painted either to
// the left or down by the given amount.
var advance = items[j];
var val = advance * textState.fontSize / 1000;
advance = items[j] * textState.fontSize / 1000;
var breakTextRun = false;
if (textState.font.vertical) {
offset = val *
offset = advance *
(textState.textHScale * textState.textMatrix[2] +
textState.textMatrix[3]);
textState.translateTextMatrix(0, val);
textState.translateTextMatrix(0, advance);
breakTextRun = textContentItem.textRunBreakAllowed &&
advance > textContentItem.fakeMultiSpaceMax;
if (!breakTextRun) {
// Value needs to be added to height to paint down.
textChunk.height += offset;
textContentItem.height += offset;
}
} else {
offset = val * (
advance = -advance;
offset = advance * (
textState.textHScale * textState.textMatrix[0] +
textState.textMatrix[1]);
textState.translateTextMatrix(-val, 0);
textState.translateTextMatrix(advance, 0);
breakTextRun = textContentItem.textRunBreakAllowed &&
advance > textContentItem.fakeMultiSpaceMax;
if (!breakTextRun) {
// Value needs to be subtracted from width to paint left.
textChunk.width -= offset;
advance = -advance;
textContentItem.width += offset;
}
if (advance > 0) {
addFakeSpaces(advance, textChunk.str);
}
if (breakTextRun) {
flushTextContentItem();
} else if (advance > 0) {
addFakeSpaces(advance, textContentItem.str);
}
}
}
bidiTexts.push(runBidi(textChunk));
break;
case OPS.showText:
bidiTexts.push(runBidi(buildTextGeometry(args[0])));
buildTextContentItem(args[0]);
break;
case OPS.nextLineShowText:
flushTextContentItem();
textState.carriageReturn();
bidiTexts.push(runBidi(buildTextGeometry(args[0])));
buildTextContentItem(args[0]);
break;
case OPS.nextLineSetSpacingShowText:
flushTextContentItem();
textState.wordSpacing = args[0];
textState.charSpacing = args[1];
textState.carriageReturn();
bidiTexts.push(runBidi(buildTextGeometry(args[2])));
buildTextContentItem(args[2]);
break;
case OPS.paintXObject:
flushTextContentItem();
if (args[0].code) {
break;
}
@ -1229,7 +1325,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
var name = args[0].name;
if (xobjsCache.key === name) {
if (xobjsCache.texts) {
Util.appendToArray(bidiTexts, xobjsCache.texts.items);
Util.appendToArray(textContent.items, xobjsCache.texts.items);
Util.extendObj(textContent.styles, xobjsCache.texts.styles);
}
break;
@ -1260,7 +1356,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return self.getTextContent(xobj, task,
xobj.dict.get('Resources') || resources, stateManager).
then(function (formTextContent) {
Util.appendToArray(bidiTexts, formTextContent.items);
Util.appendToArray(textContent.items, formTextContent.items);
Util.extendObj(textContent.styles, formTextContent.styles);
stateManager.restore();
@ -1270,6 +1366,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
next(resolve, reject);
}, reject);
case OPS.setGState:
flushTextContentItem();
var dictName = args[0];
var extGState = resources.get('ExtGState');
@ -1300,6 +1397,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}, reject);
return;
}
flushTextContentItem();
resolve(textContent);
});
},

View File

@ -137,6 +137,7 @@
!vertical.pdf
!ZapfDingbats.pdf
!bug878026.pdf
!issue1045.pdf
!issue5010.pdf
!issue4934.pdf
!issue4650.pdf

BIN
test/pdfs/issue1045.pdf Normal file

Binary file not shown.

View File

@ -607,6 +607,13 @@
"link": false,
"type": "load"
},
{ "id": "issue1045",
"file": "pdfs/issue1045.pdf",
"md5": "61d7e9bfbc03cd457dcefeab3e78a687",
"rounds": 1,
"link": false,
"type": "text"
},
{ "id": "issue5677",
"file": "pdfs/issue5677.pdf",
"md5": "c9101578fcb806269145132724d24ac1",