Merge pull request #6590 from yurydelendik/combinechars
Combines standalone chars into text groups.
This commit is contained in:
commit
9a830a7b62
@ -920,9 +920,26 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
items: [],
|
||||
styles: Object.create(null)
|
||||
};
|
||||
var bidiTexts = textContent.items;
|
||||
var textContentItem = {
|
||||
initialized: false,
|
||||
str: [],
|
||||
width: 0,
|
||||
height: 0,
|
||||
vertical: false,
|
||||
lastAdvanceWidth: 0,
|
||||
lastAdvanceHeight: 0,
|
||||
textAdvanceScale: 0,
|
||||
spaceWidth: 0,
|
||||
fakeSpaceMin: Infinity,
|
||||
fakeMultiSpaceMin: Infinity,
|
||||
fakeMultiSpaceMax: -0,
|
||||
textRunBreakAllowed: false,
|
||||
transform: null,
|
||||
fontName: null
|
||||
};
|
||||
var SPACE_FACTOR = 0.3;
|
||||
var MULTI_SPACE_FACTOR = 1.5;
|
||||
var MULTI_SPACE_FACTOR_MAX = 4;
|
||||
|
||||
var self = this;
|
||||
var xref = this.xref;
|
||||
@ -937,7 +954,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
|
||||
var textState;
|
||||
|
||||
function newTextChunk() {
|
||||
function ensureTextContentItem() {
|
||||
if (textContentItem.initialized) {
|
||||
return textContentItem;
|
||||
}
|
||||
var font = textState.font;
|
||||
if (!(font.loadedName in textContent.styles)) {
|
||||
textContent.styles[font.loadedName] = {
|
||||
@ -947,39 +967,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
vertical: font.vertical
|
||||
};
|
||||
}
|
||||
return {
|
||||
// |str| is initially an array which we push individual chars to, and
|
||||
// then runBidi() overwrites it with the final string.
|
||||
str: [],
|
||||
dir: null,
|
||||
width: 0,
|
||||
height: 0,
|
||||
transform: null,
|
||||
fontName: font.loadedName
|
||||
};
|
||||
}
|
||||
textContentItem.fontName = font.loadedName;
|
||||
|
||||
function runBidi(textChunk) {
|
||||
var str = textChunk.str.join('');
|
||||
var bidiResult = PDFJS.bidi(str, -1, textState.font.vertical);
|
||||
textChunk.str = bidiResult.str;
|
||||
textChunk.dir = bidiResult.dir;
|
||||
return textChunk;
|
||||
}
|
||||
|
||||
function handleSetFont(fontName, fontRef) {
|
||||
return self.loadFont(fontName, fontRef, xref, resources).
|
||||
then(function (translated) {
|
||||
textState.font = translated.font;
|
||||
textState.fontMatrix = translated.font.fontMatrix ||
|
||||
FONT_IDENTITY_MATRIX;
|
||||
});
|
||||
}
|
||||
|
||||
function buildTextGeometry(chars, textChunk) {
|
||||
var font = textState.font;
|
||||
textChunk = textChunk || newTextChunk();
|
||||
if (!textChunk.transform) {
|
||||
// 9.4.4 Text Space Details
|
||||
var tsm = [textState.fontSize * textState.textHScale, 0,
|
||||
0, textState.fontSize,
|
||||
@ -995,14 +984,76 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
}
|
||||
}
|
||||
|
||||
var trm = textChunk.transform = Util.transform(textState.ctm,
|
||||
var trm = Util.transform(textState.ctm,
|
||||
Util.transform(textState.textMatrix, tsm));
|
||||
textContentItem.transform = trm;
|
||||
if (!font.vertical) {
|
||||
textChunk.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]);
|
||||
textContentItem.width = 0;
|
||||
textContentItem.height = Math.sqrt(trm[2] * trm[2] + trm[3] * trm[3]);
|
||||
textContentItem.vertical = false;
|
||||
} else {
|
||||
textChunk.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]);
|
||||
textContentItem.width = Math.sqrt(trm[0] * trm[0] + trm[1] * trm[1]);
|
||||
textContentItem.height = 0;
|
||||
textContentItem.vertical = true;
|
||||
}
|
||||
|
||||
var a = textState.textLineMatrix[0];
|
||||
var b = textState.textLineMatrix[1];
|
||||
var scaleLineX = Math.sqrt(a * a + b * b);
|
||||
a = textState.ctm[0];
|
||||
b = textState.ctm[1];
|
||||
var scaleCtmX = Math.sqrt(a * a + b * b);
|
||||
textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
|
||||
textContentItem.lastAdvanceWidth = 0;
|
||||
textContentItem.lastAdvanceHeight = 0;
|
||||
|
||||
var spaceWidth = font.spaceWidth / 1000 * textState.fontSize;
|
||||
if (spaceWidth) {
|
||||
textContentItem.spaceWidth = spaceWidth;
|
||||
textContentItem.fakeSpaceMin = spaceWidth * SPACE_FACTOR;
|
||||
textContentItem.fakeMultiSpaceMin = spaceWidth * MULTI_SPACE_FACTOR;
|
||||
textContentItem.fakeMultiSpaceMax =
|
||||
spaceWidth * MULTI_SPACE_FACTOR_MAX;
|
||||
// It's okay for monospace fonts to fake as much space as needed.
|
||||
textContentItem.textRunBreakAllowed = !font.isMonospace;
|
||||
} else {
|
||||
textContentItem.spaceWidth = 0;
|
||||
textContentItem.fakeSpaceMin = Infinity;
|
||||
textContentItem.fakeMultiSpaceMin = Infinity;
|
||||
textContentItem.fakeMultiSpaceMax = 0;
|
||||
textContentItem.textRunBreakAllowed = false;
|
||||
}
|
||||
|
||||
|
||||
textContentItem.initialized = true;
|
||||
return textContentItem;
|
||||
}
|
||||
|
||||
function runBidiTransform(textChunk) {
|
||||
var str = textChunk.str.join('');
|
||||
var bidiResult = PDFJS.bidi(str, -1, textChunk.vertical);
|
||||
return {
|
||||
str: bidiResult.str,
|
||||
dir: bidiResult.dir,
|
||||
width: textChunk.width,
|
||||
height: textChunk.height,
|
||||
transform: textChunk.transform,
|
||||
fontName: textChunk.fontName
|
||||
};
|
||||
}
|
||||
|
||||
function handleSetFont(fontName, fontRef) {
|
||||
return self.loadFont(fontName, fontRef, xref, resources).
|
||||
then(function (translated) {
|
||||
textState.font = translated.font;
|
||||
textState.fontMatrix = translated.font.fontMatrix ||
|
||||
FONT_IDENTITY_MATRIX;
|
||||
});
|
||||
}
|
||||
|
||||
function buildTextContentItem(chars) {
|
||||
var font = textState.font;
|
||||
var textChunk = ensureTextContentItem();
|
||||
var width = 0;
|
||||
var height = 0;
|
||||
var glyphs = font.charsToGlyphs(chars);
|
||||
@ -1049,8 +1100,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
var wordSpacing = textState.wordSpacing;
|
||||
charSpacing += wordSpacing;
|
||||
if (wordSpacing > 0) {
|
||||
addFakeSpaces(wordSpacing * 1000 / textState.fontSize,
|
||||
textChunk.str);
|
||||
addFakeSpaces(wordSpacing, textChunk.str);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1071,34 +1121,39 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
textChunk.str.push(glyphUnicode);
|
||||
}
|
||||
|
||||
var a = textState.textLineMatrix[0];
|
||||
var b = textState.textLineMatrix[1];
|
||||
var scaleLineX = Math.sqrt(a * a + b * b);
|
||||
a = textState.ctm[0];
|
||||
b = textState.ctm[1];
|
||||
var scaleCtmX = Math.sqrt(a * a + b * b);
|
||||
if (!font.vertical) {
|
||||
textChunk.width += width * scaleCtmX * scaleLineX;
|
||||
textChunk.lastAdvanceWidth = width;
|
||||
textChunk.width += width * textChunk.textAdvanceScale;
|
||||
} else {
|
||||
textChunk.height += Math.abs(height * scaleCtmX * scaleLineX);
|
||||
textChunk.lastAdvanceHeight = height;
|
||||
textChunk.height += Math.abs(height * textChunk.textAdvanceScale);
|
||||
}
|
||||
|
||||
return textChunk;
|
||||
}
|
||||
|
||||
function addFakeSpaces(width, strBuf) {
|
||||
var spaceWidth = textState.font.spaceWidth;
|
||||
if (spaceWidth <= 0) {
|
||||
if (width < textContentItem.fakeSpaceMin) {
|
||||
return;
|
||||
}
|
||||
var fakeSpaces = width / spaceWidth;
|
||||
if (fakeSpaces > MULTI_SPACE_FACTOR) {
|
||||
fakeSpaces = Math.round(fakeSpaces);
|
||||
while (fakeSpaces--) {
|
||||
if (width < textContentItem.fakeMultiSpaceMin) {
|
||||
strBuf.push(' ');
|
||||
return;
|
||||
}
|
||||
var fakeSpaces = Math.round(width / textContentItem.spaceWidth);
|
||||
while (fakeSpaces-- > 0) {
|
||||
strBuf.push(' ');
|
||||
}
|
||||
} else if (fakeSpaces > SPACE_FACTOR) {
|
||||
strBuf.push(' ');
|
||||
}
|
||||
|
||||
function flushTextContentItem() {
|
||||
if (!textContentItem.initialized) {
|
||||
return;
|
||||
}
|
||||
textContent.items.push(runBidiTransform(textContentItem));
|
||||
|
||||
textContentItem.initialized = false;
|
||||
textContentItem.str.length = 0;
|
||||
}
|
||||
|
||||
var timeSlotManager = new TimeSlotManager();
|
||||
@ -1119,35 +1174,62 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
textState = stateManager.state;
|
||||
var fn = operation.fn;
|
||||
args = operation.args;
|
||||
var advance;
|
||||
|
||||
switch (fn | 0) {
|
||||
case OPS.setFont:
|
||||
flushTextContentItem();
|
||||
textState.fontSize = args[1];
|
||||
return handleSetFont(args[0].name).then(function() {
|
||||
next(resolve, reject);
|
||||
}, reject);
|
||||
case OPS.setTextRise:
|
||||
flushTextContentItem();
|
||||
textState.textRise = args[0];
|
||||
break;
|
||||
case OPS.setHScale:
|
||||
flushTextContentItem();
|
||||
textState.textHScale = args[0] / 100;
|
||||
break;
|
||||
case OPS.setLeading:
|
||||
flushTextContentItem();
|
||||
textState.leading = args[0];
|
||||
break;
|
||||
case OPS.moveText:
|
||||
// Optimization to treat same line movement as advance
|
||||
var isSameTextLine = !textState.font ? false :
|
||||
((textState.font.vertical ? args[0] : args[1]) === 0);
|
||||
advance = args[0] - args[1];
|
||||
if (isSameTextLine && textContentItem.initialized &&
|
||||
advance > 0 &&
|
||||
advance <= textContentItem.fakeMultiSpaceMax) {
|
||||
textState.translateTextLineMatrix(args[0], args[1]);
|
||||
textContentItem.width +=
|
||||
(args[0] - textContentItem.lastAdvanceWidth);
|
||||
textContentItem.height +=
|
||||
(args[1] - textContentItem.lastAdvanceHeight);
|
||||
var diff = (args[0] - textContentItem.lastAdvanceWidth) -
|
||||
(args[1] - textContentItem.lastAdvanceHeight);
|
||||
addFakeSpaces(diff, textContentItem.str);
|
||||
break;
|
||||
}
|
||||
|
||||
flushTextContentItem();
|
||||
textState.translateTextLineMatrix(args[0], args[1]);
|
||||
textState.textMatrix = textState.textLineMatrix.slice();
|
||||
break;
|
||||
case OPS.setLeadingMoveText:
|
||||
flushTextContentItem();
|
||||
textState.leading = -args[1];
|
||||
textState.translateTextLineMatrix(args[0], args[1]);
|
||||
textState.textMatrix = textState.textLineMatrix.slice();
|
||||
break;
|
||||
case OPS.nextLine:
|
||||
flushTextContentItem();
|
||||
textState.carriageReturn();
|
||||
break;
|
||||
case OPS.setTextMatrix:
|
||||
flushTextContentItem();
|
||||
textState.setTextMatrix(args[0], args[1], args[2], args[3],
|
||||
args[4], args[5]);
|
||||
textState.setTextLineMatrix(args[0], args[1], args[2], args[3],
|
||||
@ -1160,17 +1242,19 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
textState.wordSpacing = args[0];
|
||||
break;
|
||||
case OPS.beginText:
|
||||
flushTextContentItem();
|
||||
textState.textMatrix = IDENTITY_MATRIX.slice();
|
||||
textState.textLineMatrix = IDENTITY_MATRIX.slice();
|
||||
break;
|
||||
case OPS.showSpacedText:
|
||||
var items = args[0];
|
||||
var textChunk = newTextChunk();
|
||||
var offset;
|
||||
for (var j = 0, jj = items.length; j < jj; j++) {
|
||||
if (typeof items[j] === 'string') {
|
||||
buildTextGeometry(items[j], textChunk);
|
||||
buildTextContentItem(items[j]);
|
||||
} else {
|
||||
ensureTextContentItem();
|
||||
|
||||
// PDF Specification 5.3.2 states:
|
||||
// The number is expressed in thousandths of a unit of text
|
||||
// space.
|
||||
@ -1179,45 +1263,57 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
// In the default coordinate system, a positive adjustment
|
||||
// has the effect of moving the next glyph painted either to
|
||||
// the left or down by the given amount.
|
||||
var advance = items[j];
|
||||
var val = advance * textState.fontSize / 1000;
|
||||
advance = items[j] * textState.fontSize / 1000;
|
||||
var breakTextRun = false;
|
||||
if (textState.font.vertical) {
|
||||
offset = val *
|
||||
offset = advance *
|
||||
(textState.textHScale * textState.textMatrix[2] +
|
||||
textState.textMatrix[3]);
|
||||
textState.translateTextMatrix(0, val);
|
||||
textState.translateTextMatrix(0, advance);
|
||||
breakTextRun = textContentItem.textRunBreakAllowed &&
|
||||
advance > textContentItem.fakeMultiSpaceMax;
|
||||
if (!breakTextRun) {
|
||||
// Value needs to be added to height to paint down.
|
||||
textChunk.height += offset;
|
||||
textContentItem.height += offset;
|
||||
}
|
||||
} else {
|
||||
offset = val * (
|
||||
advance = -advance;
|
||||
offset = advance * (
|
||||
textState.textHScale * textState.textMatrix[0] +
|
||||
textState.textMatrix[1]);
|
||||
textState.translateTextMatrix(-val, 0);
|
||||
textState.translateTextMatrix(advance, 0);
|
||||
breakTextRun = textContentItem.textRunBreakAllowed &&
|
||||
advance > textContentItem.fakeMultiSpaceMax;
|
||||
if (!breakTextRun) {
|
||||
// Value needs to be subtracted from width to paint left.
|
||||
textChunk.width -= offset;
|
||||
advance = -advance;
|
||||
textContentItem.width += offset;
|
||||
}
|
||||
if (advance > 0) {
|
||||
addFakeSpaces(advance, textChunk.str);
|
||||
}
|
||||
if (breakTextRun) {
|
||||
flushTextContentItem();
|
||||
} else if (advance > 0) {
|
||||
addFakeSpaces(advance, textContentItem.str);
|
||||
}
|
||||
}
|
||||
}
|
||||
bidiTexts.push(runBidi(textChunk));
|
||||
break;
|
||||
case OPS.showText:
|
||||
bidiTexts.push(runBidi(buildTextGeometry(args[0])));
|
||||
buildTextContentItem(args[0]);
|
||||
break;
|
||||
case OPS.nextLineShowText:
|
||||
flushTextContentItem();
|
||||
textState.carriageReturn();
|
||||
bidiTexts.push(runBidi(buildTextGeometry(args[0])));
|
||||
buildTextContentItem(args[0]);
|
||||
break;
|
||||
case OPS.nextLineSetSpacingShowText:
|
||||
flushTextContentItem();
|
||||
textState.wordSpacing = args[0];
|
||||
textState.charSpacing = args[1];
|
||||
textState.carriageReturn();
|
||||
bidiTexts.push(runBidi(buildTextGeometry(args[2])));
|
||||
buildTextContentItem(args[2]);
|
||||
break;
|
||||
case OPS.paintXObject:
|
||||
flushTextContentItem();
|
||||
if (args[0].code) {
|
||||
break;
|
||||
}
|
||||
@ -1229,7 +1325,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
var name = args[0].name;
|
||||
if (xobjsCache.key === name) {
|
||||
if (xobjsCache.texts) {
|
||||
Util.appendToArray(bidiTexts, xobjsCache.texts.items);
|
||||
Util.appendToArray(textContent.items, xobjsCache.texts.items);
|
||||
Util.extendObj(textContent.styles, xobjsCache.texts.styles);
|
||||
}
|
||||
break;
|
||||
@ -1260,7 +1356,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
return self.getTextContent(xobj, task,
|
||||
xobj.dict.get('Resources') || resources, stateManager).
|
||||
then(function (formTextContent) {
|
||||
Util.appendToArray(bidiTexts, formTextContent.items);
|
||||
Util.appendToArray(textContent.items, formTextContent.items);
|
||||
Util.extendObj(textContent.styles, formTextContent.styles);
|
||||
stateManager.restore();
|
||||
|
||||
@ -1270,6 +1366,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
next(resolve, reject);
|
||||
}, reject);
|
||||
case OPS.setGState:
|
||||
flushTextContentItem();
|
||||
var dictName = args[0];
|
||||
var extGState = resources.get('ExtGState');
|
||||
|
||||
@ -1300,6 +1397,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
||||
}, reject);
|
||||
return;
|
||||
}
|
||||
flushTextContentItem();
|
||||
resolve(textContent);
|
||||
});
|
||||
},
|
||||
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
@ -137,6 +137,7 @@
|
||||
!vertical.pdf
|
||||
!ZapfDingbats.pdf
|
||||
!bug878026.pdf
|
||||
!issue1045.pdf
|
||||
!issue5010.pdf
|
||||
!issue4934.pdf
|
||||
!issue4650.pdf
|
||||
|
BIN
test/pdfs/issue1045.pdf
Normal file
BIN
test/pdfs/issue1045.pdf
Normal file
Binary file not shown.
@ -607,6 +607,13 @@
|
||||
"link": false,
|
||||
"type": "load"
|
||||
},
|
||||
{ "id": "issue1045",
|
||||
"file": "pdfs/issue1045.pdf",
|
||||
"md5": "61d7e9bfbc03cd457dcefeab3e78a687",
|
||||
"rounds": 1,
|
||||
"link": false,
|
||||
"type": "text"
|
||||
},
|
||||
{ "id": "issue5677",
|
||||
"file": "pdfs/issue5677.pdf",
|
||||
"md5": "c9101578fcb806269145132724d24ac1",
|
||||
|
Loading…
Reference in New Issue
Block a user