[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
This commit is contained in:
calixteman 2021-04-30 14:41:13 +02:00 committed by GitHub
parent e6fcb1e70b
commit af4dc55019
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 428 additions and 272 deletions

View File

@ -2042,7 +2042,7 @@ class PartialEvaluator {
resources = resources || Dict.empty; resources = resources || Dict.empty;
stateManager = stateManager || new StateManager(new TextState()); stateManager = stateManager || new StateManager(new TextState());
var WhitespaceRegexp = /\s/g; const WhitespaceRegexp = /\s/g;
var textContent = { var textContent = {
items: [], items: [],
@ -2051,26 +2051,43 @@ class PartialEvaluator {
var textContentItem = { var textContentItem = {
initialized: false, initialized: false,
str: [], str: [],
totalWidth: 0,
totalHeight: 0,
width: 0, width: 0,
height: 0, height: 0,
vertical: false, vertical: false,
lastAdvanceWidth: 0, lastCharSize: 0,
lastAdvanceHeight: 0, prevTransform: null,
textAdvanceScale: 0, textAdvanceScale: 0,
spaceWidth: 0, spaceWidth: 0,
fakeSpaceMin: Infinity, spaceInFlowMin: 0,
fakeMultiSpaceMin: Infinity, spaceInFlowMax: 0,
fakeMultiSpaceMax: -0, trackingSpaceMin: Infinity,
textRunBreakAllowed: false,
transform: null, transform: null,
fontName: null, fontName: null,
hasEOL: false,
isLastCharWhiteSpace: false,
}; };
var SPACE_FACTOR = 0.3;
var MULTI_SPACE_FACTOR = 1.5; // Used in addFakeSpaces.
var MULTI_SPACE_FACTOR_MAX = 4; // wsw stands for whitespace width.
// A white <= wsw * TRACKING_SPACE_FACTOR is a tracking space
// so it doesn't count as a space.
const TRACKING_SPACE_FACTOR = 0.3;
// A white with a width in [wsw * MIN_FACTOR; wsw * MAX_FACTOR]
// is a space which will be inserted in the current flow of words.
// If the width is outside of this range then the flow is broken
// (which means a new span in the text layer).
// It's useful to adjust the best as possible the span in the layer
// to what is displayed in the canvas.
const SPACE_IN_FLOW_MIN_FACTOR = 0.3;
const SPACE_IN_FLOW_MAX_FACTOR = 1.3;
var self = this; var self = this;
var xref = this.xref; var xref = this.xref;
const showSpacedTextBuffer = [];
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd. // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
var xobjs = null; var xobjs = null;
@ -2081,26 +2098,10 @@ class PartialEvaluator {
var textState; var textState;
function ensureTextContentItem() { function getCurrentTextTransform() {
if (textContentItem.initialized) {
return textContentItem;
}
const font = textState.font,
loadedName = font.loadedName;
if (!seenStyles.has(loadedName)) {
seenStyles.add(loadedName);
textContent.styles[loadedName] = {
fontFamily: font.fallbackName,
ascent: font.ascent,
descent: font.descent,
vertical: font.vertical,
};
}
textContentItem.fontName = loadedName;
// 9.4.4 Text Space Details // 9.4.4 Text Space Details
var tsm = [ const font = textState.font;
const tsm = [
textState.fontSize * textState.textHScale, textState.fontSize * textState.textHScale,
0, 0,
0, 0,
@ -2120,18 +2121,44 @@ class PartialEvaluator {
} }
} }
var trm = Util.transform( return Util.transform(
textState.ctm, textState.ctm,
Util.transform(textState.textMatrix, tsm) Util.transform(textState.textMatrix, tsm)
); );
textContentItem.transform = trm; }
function ensureTextContentItem() {
if (textContentItem.initialized) {
return textContentItem;
}
const font = textState.font,
loadedName = font.loadedName;
if (!seenStyles.has(loadedName)) {
seenStyles.add(loadedName);
textContent.styles[loadedName] = {
fontFamily: font.fallbackName,
ascent: font.ascent,
descent: font.descent,
vertical: font.vertical,
};
}
textContentItem.fontName = loadedName;
const trm = (textContentItem.transform = getCurrentTextTransform());
if (!font.vertical) { if (!font.vertical) {
textContentItem.width = 0; textContentItem.width = textContentItem.totalWidth = 0;
textContentItem.height = Math.hypot(trm[2], trm[3]); textContentItem.height = textContentItem.totalHeight = Math.hypot(
trm[2],
trm[3]
);
textContentItem.vertical = false; textContentItem.vertical = false;
} else { } else {
textContentItem.width = Math.hypot(trm[0], trm[1]); textContentItem.width = textContentItem.totalWidth = Math.hypot(
textContentItem.height = 0; trm[0],
trm[1]
);
textContentItem.height = textContentItem.totalHeight = 0;
textContentItem.vertical = true; textContentItem.vertical = true;
} }
@ -2141,29 +2168,53 @@ class PartialEvaluator {
); );
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]); const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
textContentItem.textAdvanceScale = scaleCtmX * scaleLineX; textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
textContentItem.lastAdvanceWidth = 0; textContentItem.lastCharSize = textContentItem.lastCharSize || 0;
textContentItem.lastAdvanceHeight = 0;
var spaceWidth = (font.spaceWidth / 1000) * textState.fontSize; var spaceWidth = (font.spaceWidth / 1000) * textState.fontSize;
if (spaceWidth) { if (spaceWidth) {
textContentItem.spaceWidth = spaceWidth; textContentItem.spaceWidth = spaceWidth;
textContentItem.fakeSpaceMin = spaceWidth * SPACE_FACTOR; textContentItem.trackingSpaceMin = spaceWidth * TRACKING_SPACE_FACTOR;
textContentItem.fakeMultiSpaceMin = spaceWidth * MULTI_SPACE_FACTOR; textContentItem.spaceInFlowMin = spaceWidth * SPACE_IN_FLOW_MIN_FACTOR;
textContentItem.fakeMultiSpaceMax = spaceWidth * MULTI_SPACE_FACTOR_MAX; textContentItem.spaceInFlowMax = spaceWidth * SPACE_IN_FLOW_MAX_FACTOR;
// It's okay for monospace fonts to fake as much space as needed.
textContentItem.textRunBreakAllowed = !font.isMonospace;
} else { } else {
textContentItem.spaceWidth = 0; textContentItem.spaceWidth = 0;
textContentItem.fakeSpaceMin = Infinity; textContentItem.trackingSpaceMin = Infinity;
textContentItem.fakeMultiSpaceMin = Infinity;
textContentItem.fakeMultiSpaceMax = 0;
textContentItem.textRunBreakAllowed = false;
} }
textContentItem.hasEOL = false;
textContentItem.initialized = true; textContentItem.initialized = true;
return textContentItem; return textContentItem;
} }
function updateAdvanceScale() {
if (!textContentItem.initialized) {
return;
}
const scaleLineX = Math.hypot(
textState.textLineMatrix[0],
textState.textLineMatrix[1]
);
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
const scaleFactor = scaleCtmX * scaleLineX;
if (scaleFactor === textContentItem.textAdvanceScale) {
return;
}
if (!textContentItem.vertical) {
textContentItem.totalWidth +=
textContentItem.width * textContentItem.textAdvanceScale;
textContentItem.width = 0;
} else {
textContentItem.totalHeight +=
textContentItem.height * textContentItem.textAdvanceScale;
textContentItem.height = 0;
}
textContentItem.textAdvanceScale = scaleFactor;
}
function replaceWhitespace(str) { function replaceWhitespace(str) {
// Replaces all whitespaces with standard spaces (0x20), to avoid // Replaces all whitespaces with standard spaces (0x20), to avoid
// alignment issues between the textLayer and the canvas if the text // alignment issues between the textLayer and the canvas if the text
@ -2178,17 +2229,19 @@ class PartialEvaluator {
} }
function runBidiTransform(textChunk) { function runBidiTransform(textChunk) {
var str = textChunk.str.join(""); const text = textChunk.str.join("");
var bidiResult = bidi(str, -1, textChunk.vertical); const bidiResult = bidi(text, -1, textChunk.vertical);
const str = normalizeWhitespace
? replaceWhitespace(bidiResult.str)
: bidiResult.str;
return { return {
str: normalizeWhitespace str,
? replaceWhitespace(bidiResult.str)
: bidiResult.str,
dir: bidiResult.dir, dir: bidiResult.dir,
width: textChunk.width, width: textChunk.totalWidth,
height: textChunk.height, height: textChunk.totalHeight,
transform: textChunk.transform, transform: textChunk.transform,
fontName: textChunk.fontName, fontName: textChunk.fontName,
hasEOL: textChunk.hasEOL,
}; };
} }
@ -2202,91 +2255,276 @@ class PartialEvaluator {
}); });
} }
function buildTextContentItem(chars) { function compareWithLastPosition(fontSize) {
var font = textState.font; if (
var textChunk = ensureTextContentItem(); !combineTextItems ||
var width = 0; !textState.font ||
var height = 0; !textContentItem.prevTransform
var glyphs = font.charsToGlyphs(chars); ) {
for (var i = 0; i < glyphs.length; i++) { return;
var glyph = glyphs[i]; }
var glyphWidth = null;
if (font.vertical && glyph.vmetric) { const currentTransform = getCurrentTextTransform();
glyphWidth = glyph.vmetric[0]; const posX = currentTransform[4];
} else { const posY = currentTransform[5];
glyphWidth = glyph.width; const lastPosX = textContentItem.prevTransform[4];
const lastPosY = textContentItem.prevTransform[5];
if (lastPosX === posX && lastPosY === posY) {
return;
}
const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
const advanceY = (posY - lastPosY) / textContentItem.textAdvanceScale;
const HALF_LAST_CHAR = -0.5 * textContentItem.lastCharSize;
if (textState.font.vertical) {
if (
Math.abs(advanceX) >
textContentItem.width /
textContentItem.textAdvanceScale /* not the same column */
) {
appendEOL();
return;
} }
var glyphUnicode = glyph.unicode; if (HALF_LAST_CHAR > advanceY) {
var NormalizedUnicodes = getNormalizedUnicodes(); return;
if (NormalizedUnicodes[glyphUnicode] !== undefined) {
glyphUnicode = NormalizedUnicodes[glyphUnicode];
} }
glyphUnicode = reverseIfRtl(glyphUnicode);
var charSpacing = textState.charSpacing; if (advanceY > textContentItem.trackingSpaceMin) {
if (glyph.isSpace) { textContentItem.height += advanceY;
var wordSpacing = textState.wordSpacing; } else if (!addFakeSpaces(advanceY, 0, textContentItem.prevTransform)) {
charSpacing += wordSpacing; if (textContentItem.str.length === 0) {
if (wordSpacing > 0) { textContent.items.push({
addFakeSpaces(wordSpacing, textChunk.str); str: " ",
dir: "ltr",
width: 0,
height: advanceY,
transform: textContentItem.prevTransform,
fontName: textContentItem.fontName,
hasEOL: false,
});
textContentItem.isLastCharWhiteSpace = true;
} else {
textContentItem.height += advanceY;
} }
} }
var tx = 0; return;
var ty = 0; }
if (!font.vertical) {
var w0 = glyphWidth * textState.fontMatrix[0]; if (
tx = (w0 * textState.fontSize + charSpacing) * textState.textHScale; Math.abs(advanceY) >
width += tx; textContentItem.height /
textContentItem.textAdvanceScale /* not the same line */
) {
appendEOL();
return;
}
if (HALF_LAST_CHAR > advanceX) {
return;
}
if (advanceX <= textContentItem.trackingSpaceMin) {
textContentItem.width += advanceX;
} else if (!addFakeSpaces(advanceX, 0, textContentItem.prevTransform)) {
if (textContentItem.str.length === 0) {
textContent.items.push({
str: " ",
dir: "ltr",
width: advanceX,
height: 0,
transform: textContentItem.prevTransform,
fontName: textContentItem.fontName,
hasEOL: false,
});
textContentItem.isLastCharWhiteSpace = true;
} else { } else {
var w1 = glyphWidth * textState.fontMatrix[0]; textContentItem.width += advanceX;
ty = w1 * textState.fontSize + charSpacing;
height += ty;
} }
textState.translateTextMatrix(tx, ty);
textChunk.str.push(glyphUnicode);
} }
if (!font.vertical) {
textChunk.lastAdvanceWidth = width;
textChunk.width += width;
} else {
textChunk.lastAdvanceHeight = height;
textChunk.height += Math.abs(height);
}
return textChunk;
} }
function addFakeSpaces(width, strBuf) { function buildTextContentItem({ chars, extraSpacing, isFirstChunk }) {
if (width < textContentItem.fakeSpaceMin) { const font = textState.font;
if (!chars) {
// Just move according to the space we have.
const charSpacing = textState.charSpacing + extraSpacing;
if (charSpacing) {
if (!font.vertical) {
textState.translateTextMatrix(
charSpacing * textState.textHScale,
0
);
} else {
textState.translateTextMatrix(0, charSpacing);
}
}
return; return;
} }
if (width < textContentItem.fakeMultiSpaceMin) {
strBuf.push(" "); const NormalizedUnicodes = getNormalizedUnicodes();
return; const glyphs = font.charsToGlyphs(chars);
const scale = textState.fontMatrix[0] * textState.fontSize;
if (isFirstChunk) {
compareWithLastPosition(scale);
} }
var fakeSpaces = Math.round(width / textContentItem.spaceWidth);
while (fakeSpaces-- > 0) { let textChunk = ensureTextContentItem();
strBuf.push(" "); let size = 0;
let lastCharSize = 0;
for (let i = 0, ii = glyphs.length; i < ii; i++) {
const glyph = glyphs[i];
let charSpacing =
textState.charSpacing + (i === ii - 1 ? extraSpacing : 0);
let glyphUnicode = glyph.unicode;
if (glyph.isSpace) {
charSpacing += textState.wordSpacing;
textChunk.isLastCharWhiteSpace = true;
} else {
glyphUnicode = NormalizedUnicodes[glyphUnicode] || glyphUnicode;
glyphUnicode = reverseIfRtl(glyphUnicode);
textChunk.isLastCharWhiteSpace = false;
}
textChunk.str.push(glyphUnicode);
const glyphWidth =
font.vertical && glyph.vmetric ? glyph.vmetric[0] : glyph.width;
let scaledDim = glyphWidth * scale;
if (!font.vertical) {
scaledDim *= textState.textHScale;
textState.translateTextMatrix(scaledDim, 0);
} else {
textState.translateTextMatrix(0, scaledDim);
scaledDim = Math.abs(scaledDim);
}
size += scaledDim;
if (charSpacing) {
if (!font.vertical) {
charSpacing *= textState.textHScale;
}
scaledDim += charSpacing;
const wasSplit =
charSpacing > textContentItem.trackingSpaceMin &&
addFakeSpaces(charSpacing, size);
if (!font.vertical) {
textState.translateTextMatrix(charSpacing, 0);
} else {
textState.translateTextMatrix(0, charSpacing);
}
if (wasSplit) {
textChunk = ensureTextContentItem();
size = 0;
} else {
size += charSpacing;
}
}
lastCharSize = scaledDim;
} }
textChunk.lastCharSize = lastCharSize;
if (!font.vertical) {
textChunk.width += size;
} else {
textChunk.height += size;
}
textChunk.prevTransform = getCurrentTextTransform();
}
function appendEOL() {
if (textContentItem.initialized) {
textContentItem.hasEOL = true;
flushTextContentItem();
} else if (textContent.items.length > 0) {
textContent.items[textContent.items.length - 1].hasEOL = true;
} else {
textContent.items.push({
str: "",
dir: "ltr",
width: 0,
height: 0,
transform: getCurrentTextTransform(),
fontName: textState.font.loadedName,
hasEOL: true,
});
}
textContentItem.isLastCharWhiteSpace = false;
textContentItem.lastCharSize = 0;
}
function addFakeSpaces(width, size, transf = null) {
if (
textContentItem.spaceInFlowMin <= width &&
width <= textContentItem.spaceInFlowMax
) {
if (textContentItem.initialized) {
textContentItem.str.push(" ");
textContentItem.isLastCharWhiteSpace = true;
}
return false;
}
const fontName = textContentItem.fontName;
let height = 0;
width *= textContentItem.textAdvanceScale;
if (!textContentItem.vertical) {
textContentItem.width += size;
} else {
textContentItem.height += size;
height = width;
width = 0;
}
flushTextContentItem();
if (textContentItem.isLastCharWhiteSpace) {
return true;
}
textContentItem.isLastCharWhiteSpace = true;
textContent.items.push({
str: " ",
// TODO: check if using the orientation from last chunk is
// better or not.
dir: "ltr",
width,
height,
transform: transf ? transf : getCurrentTextTransform(),
fontName,
hasEOL: false,
});
return true;
} }
function flushTextContentItem() { function flushTextContentItem() {
if (!textContentItem.initialized) { if (!textContentItem.initialized || !textContentItem.str) {
return; return;
} }
// Do final text scaling. // Do final text scaling.
if (!textContentItem.vertical) { if (!textContentItem.vertical) {
textContentItem.width *= textContentItem.textAdvanceScale; textContentItem.totalWidth +=
textContentItem.width * textContentItem.textAdvanceScale;
} else { } else {
textContentItem.height *= textContentItem.textAdvanceScale; textContentItem.totalHeight +=
textContentItem.height * textContentItem.textAdvanceScale;
} }
textContent.items.push(runBidiTransform(textContentItem));
textContent.items.push(runBidiTransform(textContentItem));
textContentItem.initialized = false; textContentItem.initialized = false;
textContentItem.str.length = 0; textContentItem.str.length = 0;
} }
@ -2330,7 +2568,6 @@ class PartialEvaluator {
textState = stateManager.state; textState = stateManager.state;
var fn = operation.fn; var fn = operation.fn;
args = operation.args; args = operation.args;
var advance, diff;
switch (fn | 0) { switch (fn | 0) {
case OPS.setFont: case OPS.setFont:
@ -2363,32 +2600,6 @@ class PartialEvaluator {
textState.leading = args[0]; textState.leading = args[0];
break; break;
case OPS.moveText: case OPS.moveText:
// Optimization to treat same line movement as advance
var isSameTextLine = !textState.font
? false
: (textState.font.vertical ? args[0] : args[1]) === 0;
advance = args[0] - args[1];
if (
combineTextItems &&
isSameTextLine &&
textContentItem.initialized &&
advance > 0 &&
advance <= textContentItem.fakeMultiSpaceMax
) {
textState.translateTextLineMatrix(args[0], args[1]);
textContentItem.width +=
args[0] - textContentItem.lastAdvanceWidth;
textContentItem.height +=
args[1] - textContentItem.lastAdvanceHeight;
diff =
args[0] -
textContentItem.lastAdvanceWidth -
(args[1] - textContentItem.lastAdvanceHeight);
addFakeSpaces(diff, textContentItem.str);
break;
}
flushTextContentItem();
textState.translateTextLineMatrix(args[0], args[1]); textState.translateTextLineMatrix(args[0], args[1]);
textState.textMatrix = textState.textLineMatrix.slice(); textState.textMatrix = textState.textLineMatrix.slice();
break; break;
@ -2399,40 +2610,10 @@ class PartialEvaluator {
textState.textMatrix = textState.textLineMatrix.slice(); textState.textMatrix = textState.textLineMatrix.slice();
break; break;
case OPS.nextLine: case OPS.nextLine:
flushTextContentItem(); appendEOL();
textState.carriageReturn(); textState.carriageReturn();
break; break;
case OPS.setTextMatrix: case OPS.setTextMatrix:
// Optimization to treat same line movement as advance.
advance = textState.calcTextLineMatrixAdvance(
args[0],
args[1],
args[2],
args[3],
args[4],
args[5]
);
if (
combineTextItems &&
advance !== null &&
textContentItem.initialized &&
advance.value > 0 &&
advance.value <= textContentItem.fakeMultiSpaceMax
) {
textState.translateTextLineMatrix(advance.width, advance.height);
textContentItem.width +=
advance.width - textContentItem.lastAdvanceWidth;
textContentItem.height +=
advance.height - textContentItem.lastAdvanceHeight;
diff =
advance.width -
textContentItem.lastAdvanceWidth -
(advance.height - textContentItem.lastAdvanceHeight);
addFakeSpaces(diff, textContentItem.str);
break;
}
flushTextContentItem();
textState.setTextMatrix( textState.setTextMatrix(
args[0], args[0],
args[1], args[1],
@ -2449,6 +2630,7 @@ class PartialEvaluator {
args[4], args[4],
args[5] args[5]
); );
updateAdvanceScale();
break; break;
case OPS.setCharSpacing: case OPS.setCharSpacing:
textState.charSpacing = args[0]; textState.charSpacing = args[0];
@ -2466,14 +2648,16 @@ class PartialEvaluator {
self.ensureStateFont(stateManager.state); self.ensureStateFont(stateManager.state);
continue; continue;
} }
var items = args[0];
var offset;
for (var j = 0, jj = items.length; j < jj; j++) {
if (typeof items[j] === "string") {
buildTextContentItem(items[j]);
} else if (isNum(items[j])) {
ensureTextContentItem();
const spaceFactor =
((textState.font.vertical ? 1 : -1) * textState.fontSize) / 1000;
const elements = args[0];
let isFirstChunk = true;
for (let i = 0, ii = elements.length; i < ii - 1; i++) {
const item = elements[i];
if (typeof item === "string") {
showSpacedTextBuffer.push(item);
} else if (typeof item === "number" && item !== 0) {
// PDF Specification 5.3.2 states: // PDF Specification 5.3.2 states:
// The number is expressed in thousandths of a unit of text // The number is expressed in thousandths of a unit of text
// space. // space.
@ -2482,64 +2666,75 @@ class PartialEvaluator {
// In the default coordinate system, a positive adjustment // In the default coordinate system, a positive adjustment
// has the effect of moving the next glyph painted either to // has the effect of moving the next glyph painted either to
// the left or down by the given amount. // the left or down by the given amount.
advance = (items[j] * textState.fontSize) / 1000; const str = showSpacedTextBuffer.join("");
var breakTextRun = false; showSpacedTextBuffer.length = 0;
if (textState.font.vertical) { buildTextContentItem({
offset = advance; chars: str,
textState.translateTextMatrix(0, offset); extraSpacing: item * spaceFactor,
breakTextRun = isFirstChunk,
textContentItem.textRunBreakAllowed && });
advance > textContentItem.fakeMultiSpaceMax; if (str && isFirstChunk) {
if (!breakTextRun) { isFirstChunk = false;
// Value needs to be added to height to paint down.
textContentItem.height += offset;
}
} else {
advance = -advance;
offset = advance * textState.textHScale;
textState.translateTextMatrix(offset, 0);
breakTextRun =
textContentItem.textRunBreakAllowed &&
advance > textContentItem.fakeMultiSpaceMax;
if (!breakTextRun) {
// Value needs to be subtracted from width to paint left.
textContentItem.width += offset;
}
}
if (breakTextRun) {
flushTextContentItem();
} else if (advance > 0) {
addFakeSpaces(advance, textContentItem.str);
} }
} }
} }
const item = elements[elements.length - 1];
if (typeof item === "string") {
showSpacedTextBuffer.push(item);
}
if (showSpacedTextBuffer.length > 0) {
const str = showSpacedTextBuffer.join("");
showSpacedTextBuffer.length = 0;
buildTextContentItem({
chars: str,
extraSpacing: 0,
isFirstChunk,
});
}
break; break;
case OPS.showText: case OPS.showText:
if (!stateManager.state.font) { if (!stateManager.state.font) {
self.ensureStateFont(stateManager.state); self.ensureStateFont(stateManager.state);
continue; continue;
} }
buildTextContentItem(args[0]);
buildTextContentItem({
chars: args[0],
extraSpacing: 0,
isFirstChunk: true,
});
break; break;
case OPS.nextLineShowText: case OPS.nextLineShowText:
if (!stateManager.state.font) { if (!stateManager.state.font) {
self.ensureStateFont(stateManager.state); self.ensureStateFont(stateManager.state);
continue; continue;
} }
textContentItem.hasEOL = true;
flushTextContentItem(); flushTextContentItem();
textState.carriageReturn(); textState.carriageReturn();
buildTextContentItem(args[0]); buildTextContentItem({
chars: args[0],
extraSpacing: 0,
isFirstChunk: true,
});
break; break;
case OPS.nextLineSetSpacingShowText: case OPS.nextLineSetSpacingShowText:
if (!stateManager.state.font) { if (!stateManager.state.font) {
self.ensureStateFont(stateManager.state); self.ensureStateFont(stateManager.state);
continue; continue;
} }
textContentItem.hasEOL = true;
flushTextContentItem(); flushTextContentItem();
textState.wordSpacing = args[0]; textState.wordSpacing = args[0];
textState.charSpacing = args[1]; textState.charSpacing = args[1];
textState.carriageReturn(); textState.carriageReturn();
buildTextContentItem(args[2]); buildTextContentItem({
chars: args[2],
extraSpacing: 0,
isFirstChunk: true,
});
break; break;
case OPS.paintXObject: case OPS.paintXObject:
flushTextContentItem(); flushTextContentItem();
@ -3887,46 +4082,6 @@ class TextState {
m[5] = m[1] * x + m[3] * y + m[5]; m[5] = m[1] * x + m[3] * y + m[5];
} }
calcTextLineMatrixAdvance(a, b, c, d, e, f) {
var font = this.font;
if (!font) {
return null;
}
var m = this.textLineMatrix;
if (!(a === m[0] && b === m[1] && c === m[2] && d === m[3])) {
return null;
}
var txDiff = e - m[4],
tyDiff = f - m[5];
if ((font.vertical && txDiff !== 0) || (!font.vertical && tyDiff !== 0)) {
return null;
}
var tx,
ty,
denominator = a * d - b * c;
if (font.vertical) {
tx = (-tyDiff * c) / denominator;
ty = (tyDiff * a) / denominator;
} else {
tx = (txDiff * d) / denominator;
ty = (-txDiff * b) / denominator;
}
return { width: tx, height: ty, value: font.vertical ? ty : tx };
}
calcRenderMatrix(ctm) {
// 9.4.4 Text Space Details
var tsm = [
this.fontSize * this.textHScale,
0,
0,
this.fontSize,
0,
this.textRise,
];
return Util.transform(ctm, Util.transform(this.textMatrix, tsm));
}
carriageReturn() { carriageReturn() {
this.translateTextLineMatrix(0, -this.leading); this.translateTextLineMatrix(0, -this.leading);
this.textMatrix = this.textLineMatrix.slice(); this.textMatrix = this.textLineMatrix.slice();

View File

@ -57,12 +57,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
const DEFAULT_FONT_SIZE = 30; const DEFAULT_FONT_SIZE = 30;
const DEFAULT_FONT_ASCENT = 0.8; const DEFAULT_FONT_ASCENT = 0.8;
const ascentCache = new Map(); const ascentCache = new Map();
const AllWhitespaceRegexp = /^\s+$/g;
const NonWhitespaceRegexp = /\S/;
function isAllWhitespace(str) {
return !NonWhitespaceRegexp.test(str);
}
function getAscent(fontFamily, ctx) { function getAscent(fontFamily, ctx) {
const cachedAscent = ascentCache.get(fontFamily); const cachedAscent = ascentCache.get(fontFamily);
@ -133,7 +128,8 @@ const renderTextLayer = (function renderTextLayerClosure() {
const textDivProperties = { const textDivProperties = {
angle: 0, angle: 0,
canvasWidth: 0, canvasWidth: 0,
isWhitespace: false, hasText: geom.str !== "",
hasEOL: geom.hasEOL,
originalTransform: null, originalTransform: null,
paddingBottom: 0, paddingBottom: 0,
paddingLeft: 0, paddingLeft: 0,
@ -142,12 +138,8 @@ const renderTextLayer = (function renderTextLayerClosure() {
scale: 1, scale: 1,
}; };
textDiv.textContent = geom.str;
task._textDivs.push(textDiv); task._textDivs.push(textDiv);
if (isAllWhitespace(geom.str)) {
textDivProperties.isWhitespace = true;
task._textDivProperties.set(textDiv, textDivProperties);
return;
}
const tx = Util.transform(task._viewport.transform, geom.transform); const tx = Util.transform(task._viewport.transform, geom.transform);
let angle = Math.atan2(tx[1], tx[0]); let angle = Math.atan2(tx[1], tx[0]);
@ -176,7 +168,6 @@ const renderTextLayer = (function renderTextLayerClosure() {
// Keeps screen readers from pausing on every new text span. // Keeps screen readers from pausing on every new text span.
textDiv.setAttribute("role", "presentation"); textDiv.setAttribute("role", "presentation");
textDiv.textContent = geom.str;
// geom.dir may be 'ttb' for vertical texts. // geom.dir may be 'ttb' for vertical texts.
textDiv.dir = geom.dir; textDiv.dir = geom.dir;
@ -192,7 +183,10 @@ const renderTextLayer = (function renderTextLayerClosure() {
// little effect on text highlighting. This makes scrolling on docs with // little effect on text highlighting. This makes scrolling on docs with
// lots of such divs a lot faster. // lots of such divs a lot faster.
let shouldScaleText = false; let shouldScaleText = false;
if (geom.str.length > 1) { if (
geom.str.length > 1 ||
(task._enhanceTextSelection && AllWhitespaceRegexp.test(geom.str))
) {
shouldScaleText = true; shouldScaleText = true;
} else if (geom.transform[0] !== geom.transform[3]) { } else if (geom.transform[0] !== geom.transform[3]) {
const absScaleX = Math.abs(geom.transform[0]), const absScaleX = Math.abs(geom.transform[0]),
@ -218,7 +212,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
task._layoutText(textDiv); task._layoutText(textDiv);
} }
if (task._enhanceTextSelection) { if (task._enhanceTextSelection && textDivProperties.hasText) {
let angleCos = 1, let angleCos = 1,
angleSin = 0; angleSin = 0;
if (angle !== 0) { if (angle !== 0) {
@ -666,12 +660,9 @@ const renderTextLayer = (function renderTextLayerClosure() {
_layoutText(textDiv) { _layoutText(textDiv) {
const textDivProperties = this._textDivProperties.get(textDiv); const textDivProperties = this._textDivProperties.get(textDiv);
if (textDivProperties.isWhitespace) {
return;
}
let transform = ""; let transform = "";
if (textDivProperties.canvasWidth !== 0) { if (textDivProperties.canvasWidth !== 0 && textDivProperties.hasText) {
const { fontSize, fontFamily } = textDiv.style; const { fontSize, fontFamily } = textDiv.style;
// Only build font string and set to context if different from last. // Only build font string and set to context if different from last.
@ -700,8 +691,15 @@ const renderTextLayer = (function renderTextLayerClosure() {
} }
textDiv.style.transform = transform; textDiv.style.transform = transform;
} }
this._textDivProperties.set(textDiv, textDivProperties);
this._container.appendChild(textDiv); if (textDivProperties.hasText) {
this._container.appendChild(textDiv);
}
if (textDivProperties.hasEOL) {
const br = document.createElement("br");
br.setAttribute("role", "presentation");
this._container.appendChild(br);
}
}, },
_render: function TextLayer_render(timeout) { _render: function TextLayer_render(timeout) {
@ -778,7 +776,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
const div = this._textDivs[i]; const div = this._textDivs[i];
const divProps = this._textDivProperties.get(div); const divProps = this._textDivProperties.get(div);
if (divProps.isWhitespace) { if (!divProps.hasText) {
continue; continue;
} }
if (expandDivs) { if (expandDivs) {

View File

@ -1495,12 +1495,14 @@ describe("api", function () {
}); });
const data = await Promise.all([defaultPromise, parametersPromise]); const data = await Promise.all([defaultPromise, parametersPromise]);
expect(!!data[0].items).toEqual(true); expect(!!data[0].items).toEqual(true);
expect(data[0].items.length).toEqual(7); expect(data[0].items.length).toEqual(12);
expect(!!data[0].styles).toEqual(true); expect(!!data[0].styles).toEqual(true);
// A simple check that ensures the two `textContent` object match. expect(!!data[1].items).toEqual(true);
expect(JSON.stringify(data[0])).toEqual(JSON.stringify(data[1])); expect(data[1].items.length).toEqual(7);
expect(!!data[1].styles).toEqual(true);
}); });
it("gets text content, with correct properties (issue 8276)", async function () { it("gets text content, with correct properties (issue 8276)", async function () {
@ -1520,6 +1522,7 @@ describe("api", function () {
str: "Issue 8276", str: "Issue 8276",
transform: [18, 0, 0, 18, 441.81, 708.4499999999999], transform: [18, 0, 0, 18, 441.81, 708.4499999999999],
width: 77.49, width: 77.49,
hasEOL: false,
}); });
expect(styles.Times).toEqual({ expect(styles.Times).toEqual({
fontFamily: "serif", fontFamily: "serif",