Fix issues in text selection
- PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues. - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn; - no space are "drawn": it just moves the cursor but they aren't added in the chunk; - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one. - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font) - it was a pretty good idea in general but it fails with some fonts where space was too big: - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).
This commit is contained in:
parent
f5b79be0b7
commit
61d1063276
@ -2169,6 +2169,8 @@ class PartialEvaluator {
|
|||||||
stateManager = stateManager || new StateManager(new TextState());
|
stateManager = stateManager || new StateManager(new TextState());
|
||||||
|
|
||||||
const WhitespaceRegexp = /\s/g;
|
const WhitespaceRegexp = /\s/g;
|
||||||
|
const DiacriticRegExp = new RegExp("^\\p{Mn}$", "u");
|
||||||
|
const NormalizedUnicodes = getNormalizedUnicodes();
|
||||||
|
|
||||||
const textContent = {
|
const textContent = {
|
||||||
items: [],
|
items: [],
|
||||||
@ -2182,34 +2184,37 @@ class PartialEvaluator {
|
|||||||
width: 0,
|
width: 0,
|
||||||
height: 0,
|
height: 0,
|
||||||
vertical: false,
|
vertical: false,
|
||||||
lastCharSize: 0,
|
|
||||||
prevTransform: null,
|
prevTransform: null,
|
||||||
textAdvanceScale: 0,
|
textAdvanceScale: 0,
|
||||||
spaceWidth: 0,
|
|
||||||
spaceInFlowMin: 0,
|
spaceInFlowMin: 0,
|
||||||
spaceInFlowMax: 0,
|
spaceInFlowMax: 0,
|
||||||
trackingSpaceMin: Infinity,
|
trackingSpaceMin: Infinity,
|
||||||
|
negativeSpaceMax: -Infinity,
|
||||||
transform: null,
|
transform: null,
|
||||||
fontName: null,
|
fontName: null,
|
||||||
hasEOL: false,
|
hasEOL: false,
|
||||||
isLastCharWhiteSpace: false,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Used in addFakeSpaces.
|
// Used in addFakeSpaces.
|
||||||
// wsw stands for whitespace width.
|
|
||||||
|
|
||||||
// A white <= wsw * TRACKING_SPACE_FACTOR is a tracking space
|
// A white <= fontSize * TRACKING_SPACE_FACTOR is a tracking space
|
||||||
// so it doesn't count as a space.
|
// so it doesn't count as a space.
|
||||||
const TRACKING_SPACE_FACTOR = 0.3;
|
const TRACKING_SPACE_FACTOR = 0.1;
|
||||||
|
|
||||||
// A white with a width in [wsw * MIN_FACTOR; wsw * MAX_FACTOR]
|
// A negative white < fontSize * NEGATIVE_SPACE_FACTOR induces
|
||||||
|
// a break (a new chunk of text is created).
|
||||||
|
// It doesn't change anything when the text is copied but
|
||||||
|
// it improves potential mismatch between text layer and canvas.
|
||||||
|
const NEGATIVE_SPACE_FACTOR = -0.2;
|
||||||
|
|
||||||
|
// A white with a width in [fontSize * MIN_FACTOR; fontSize * MAX_FACTOR]
|
||||||
// is a space which will be inserted in the current flow of words.
|
// is a space which will be inserted in the current flow of words.
|
||||||
// If the width is outside of this range then the flow is broken
|
// If the width is outside of this range then the flow is broken
|
||||||
// (which means a new span in the text layer).
|
// (which means a new span in the text layer).
|
||||||
// It's useful to adjust the best as possible the span in the layer
|
// It's useful to adjust the best as possible the span in the layer
|
||||||
// to what is displayed in the canvas.
|
// to what is displayed in the canvas.
|
||||||
const SPACE_IN_FLOW_MIN_FACTOR = 0.3;
|
const SPACE_IN_FLOW_MIN_FACTOR = 0.1;
|
||||||
const SPACE_IN_FLOW_MAX_FACTOR = 1.3;
|
const SPACE_IN_FLOW_MAX_FACTOR = 0.6;
|
||||||
|
|
||||||
const self = this;
|
const self = this;
|
||||||
const xref = this.xref;
|
const xref = this.xref;
|
||||||
@ -2294,18 +2299,15 @@ class PartialEvaluator {
|
|||||||
);
|
);
|
||||||
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
|
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
|
||||||
textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
|
textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
|
||||||
textContentItem.lastCharSize = textContentItem.lastCharSize || 0;
|
|
||||||
|
|
||||||
const spaceWidth = (font.spaceWidth / 1000) * textState.fontSize;
|
textContentItem.trackingSpaceMin =
|
||||||
if (spaceWidth) {
|
textState.fontSize * TRACKING_SPACE_FACTOR;
|
||||||
textContentItem.spaceWidth = spaceWidth;
|
textContentItem.negativeSpaceMax =
|
||||||
textContentItem.trackingSpaceMin = spaceWidth * TRACKING_SPACE_FACTOR;
|
textState.fontSize * NEGATIVE_SPACE_FACTOR;
|
||||||
textContentItem.spaceInFlowMin = spaceWidth * SPACE_IN_FLOW_MIN_FACTOR;
|
textContentItem.spaceInFlowMin =
|
||||||
textContentItem.spaceInFlowMax = spaceWidth * SPACE_IN_FLOW_MAX_FACTOR;
|
textState.fontSize * SPACE_IN_FLOW_MIN_FACTOR;
|
||||||
} else {
|
textContentItem.spaceInFlowMax =
|
||||||
textContentItem.spaceWidth = 0;
|
textState.fontSize * SPACE_IN_FLOW_MAX_FACTOR;
|
||||||
textContentItem.trackingSpaceMin = Infinity;
|
|
||||||
}
|
|
||||||
|
|
||||||
textContentItem.hasEOL = false;
|
textContentItem.hasEOL = false;
|
||||||
|
|
||||||
@ -2395,7 +2397,7 @@ class PartialEvaluator {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function compareWithLastPosition(fontSize) {
|
function compareWithLastPosition() {
|
||||||
if (
|
if (
|
||||||
!combineTextItems ||
|
!combineTextItems ||
|
||||||
!textState.font ||
|
!textState.font ||
|
||||||
@ -2405,36 +2407,76 @@ class PartialEvaluator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const currentTransform = getCurrentTextTransform();
|
const currentTransform = getCurrentTextTransform();
|
||||||
const posX = currentTransform[4];
|
let posX = currentTransform[4];
|
||||||
const posY = currentTransform[5];
|
let posY = currentTransform[5];
|
||||||
const lastPosX = textContentItem.prevTransform[4];
|
let lastPosX = textContentItem.prevTransform[4];
|
||||||
const lastPosY = textContentItem.prevTransform[5];
|
let lastPosY = textContentItem.prevTransform[5];
|
||||||
|
|
||||||
if (lastPosX === posX && lastPosY === posY) {
|
if (lastPosX === posX && lastPosY === posY) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
|
let rotate = 0;
|
||||||
const advanceY = (posY - lastPosY) / textContentItem.textAdvanceScale;
|
// Take into account the rotation is the current transform.
|
||||||
const HALF_LAST_CHAR = -0.5 * textContentItem.lastCharSize;
|
// Only rotations with an angle of 0, 90, 180 or 270 are considered.
|
||||||
|
if (
|
||||||
|
currentTransform[0] &&
|
||||||
|
currentTransform[1] === 0 &&
|
||||||
|
currentTransform[2] === 0
|
||||||
|
) {
|
||||||
|
rotate = currentTransform[0] > 0 ? 0 : 180;
|
||||||
|
} else if (
|
||||||
|
currentTransform[1] &&
|
||||||
|
currentTransform[0] === 0 &&
|
||||||
|
currentTransform[3] === 0
|
||||||
|
) {
|
||||||
|
rotate += currentTransform[1] > 0 ? 90 : 270;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rotate !== 0) {
|
||||||
|
switch (rotate) {
|
||||||
|
case 90:
|
||||||
|
[posX, posY] = [posY, posX];
|
||||||
|
[lastPosX, lastPosY] = [lastPosY, lastPosX];
|
||||||
|
break;
|
||||||
|
case 180:
|
||||||
|
[posX, posY, lastPosX, lastPosY] = [
|
||||||
|
-posX,
|
||||||
|
-posY,
|
||||||
|
-lastPosX,
|
||||||
|
-lastPosY,
|
||||||
|
];
|
||||||
|
break;
|
||||||
|
case 270:
|
||||||
|
[posX, posY] = [-posY, -posX];
|
||||||
|
[lastPosX, lastPosY] = [-lastPosY, -lastPosX];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (textState.font.vertical) {
|
if (textState.font.vertical) {
|
||||||
|
const advanceY = (lastPosY - posY) / textContentItem.textAdvanceScale;
|
||||||
|
const advanceX = posX - lastPosX;
|
||||||
|
if (advanceY < textContentItem.negativeSpaceMax) {
|
||||||
if (
|
if (
|
||||||
Math.abs(advanceX) >
|
Math.abs(advanceX) >
|
||||||
textContentItem.width /
|
0.5 * textContentItem.width /* not the same column */
|
||||||
textContentItem.textAdvanceScale /* not the same column */
|
|
||||||
) {
|
) {
|
||||||
appendEOL();
|
appendEOL();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (HALF_LAST_CHAR > advanceY) {
|
flushTextContentItem();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (advanceY > textContentItem.trackingSpaceMin) {
|
if (Math.abs(advanceX) > textContentItem.height) {
|
||||||
|
appendEOL();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (advanceY <= textContentItem.trackingSpaceMin) {
|
||||||
textContentItem.height += advanceY;
|
textContentItem.height += advanceY;
|
||||||
} else if (!addFakeSpaces(advanceY, 0, textContentItem.prevTransform)) {
|
} else if (!addFakeSpaces(advanceY, textContentItem.prevTransform)) {
|
||||||
if (textContentItem.str.length === 0) {
|
if (textContentItem.str.length === 0) {
|
||||||
textContent.items.push({
|
textContent.items.push({
|
||||||
str: " ",
|
str: " ",
|
||||||
@ -2445,7 +2487,6 @@ class PartialEvaluator {
|
|||||||
fontName: textContentItem.fontName,
|
fontName: textContentItem.fontName,
|
||||||
hasEOL: false,
|
hasEOL: false,
|
||||||
});
|
});
|
||||||
textContentItem.isLastCharWhiteSpace = true;
|
|
||||||
} else {
|
} else {
|
||||||
textContentItem.height += advanceY;
|
textContentItem.height += advanceY;
|
||||||
}
|
}
|
||||||
@ -2454,22 +2495,28 @@ class PartialEvaluator {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
|
||||||
|
const advanceY = posY - lastPosY;
|
||||||
|
if (advanceX < textContentItem.negativeSpaceMax) {
|
||||||
if (
|
if (
|
||||||
Math.abs(advanceY) >
|
Math.abs(advanceY) >
|
||||||
textContentItem.height /
|
0.5 * textContentItem.height /* not the same line */
|
||||||
textContentItem.textAdvanceScale /* not the same line */
|
|
||||||
) {
|
) {
|
||||||
appendEOL();
|
appendEOL();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
flushTextContentItem();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (HALF_LAST_CHAR > advanceX) {
|
if (Math.abs(advanceY) > textContentItem.height) {
|
||||||
|
appendEOL();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (advanceX <= textContentItem.trackingSpaceMin) {
|
if (advanceX <= textContentItem.trackingSpaceMin) {
|
||||||
textContentItem.width += advanceX;
|
textContentItem.width += advanceX;
|
||||||
} else if (!addFakeSpaces(advanceX, 0, textContentItem.prevTransform)) {
|
} else if (!addFakeSpaces(advanceX, textContentItem.prevTransform)) {
|
||||||
if (textContentItem.str.length === 0) {
|
if (textContentItem.str.length === 0) {
|
||||||
textContent.items.push({
|
textContent.items.push({
|
||||||
str: " ",
|
str: " ",
|
||||||
@ -2480,14 +2527,13 @@ class PartialEvaluator {
|
|||||||
fontName: textContentItem.fontName,
|
fontName: textContentItem.fontName,
|
||||||
hasEOL: false,
|
hasEOL: false,
|
||||||
});
|
});
|
||||||
textContentItem.isLastCharWhiteSpace = true;
|
|
||||||
} else {
|
} else {
|
||||||
textContentItem.width += advanceX;
|
textContentItem.width += advanceX;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildTextContentItem({ chars, extraSpacing, isFirstChunk }) {
|
function buildTextContentItem({ chars, extraSpacing }) {
|
||||||
const font = textState.font;
|
const font = textState.font;
|
||||||
if (!chars) {
|
if (!chars) {
|
||||||
// Just move according to the space we have.
|
// Just move according to the space we have.
|
||||||
@ -2499,87 +2545,91 @@ class PartialEvaluator {
|
|||||||
0
|
0
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
textState.translateTextMatrix(0, charSpacing);
|
textState.translateTextMatrix(0, -charSpacing);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const NormalizedUnicodes = getNormalizedUnicodes();
|
|
||||||
const glyphs = font.charsToGlyphs(chars);
|
const glyphs = font.charsToGlyphs(chars);
|
||||||
const scale = textState.fontMatrix[0] * textState.fontSize;
|
const scale = textState.fontMatrix[0] * textState.fontSize;
|
||||||
if (isFirstChunk) {
|
|
||||||
compareWithLastPosition(scale);
|
|
||||||
}
|
|
||||||
|
|
||||||
let textChunk = ensureTextContentItem();
|
|
||||||
let size = 0;
|
|
||||||
let lastCharSize = 0;
|
|
||||||
|
|
||||||
for (let i = 0, ii = glyphs.length; i < ii; i++) {
|
for (let i = 0, ii = glyphs.length; i < ii; i++) {
|
||||||
const glyph = glyphs[i];
|
const glyph = glyphs[i];
|
||||||
let charSpacing =
|
let charSpacing =
|
||||||
textState.charSpacing + (i === ii - 1 ? extraSpacing : 0);
|
textState.charSpacing + (i + 1 === ii ? extraSpacing : 0);
|
||||||
|
|
||||||
|
let glyphWidth = glyph.width;
|
||||||
|
if (font.vertical) {
|
||||||
|
glyphWidth = glyph.vmetric ? glyph.vmetric[0] : -glyphWidth;
|
||||||
|
}
|
||||||
|
let scaledDim = glyphWidth * scale;
|
||||||
|
|
||||||
let glyphUnicode = glyph.unicode;
|
let glyphUnicode = glyph.unicode;
|
||||||
if (glyph.isSpace) {
|
if (
|
||||||
charSpacing += textState.wordSpacing;
|
glyphUnicode === " " &&
|
||||||
textChunk.isLastCharWhiteSpace = true;
|
(i === 0 ||
|
||||||
|
i + 1 === ii ||
|
||||||
|
glyphs[i - 1].unicode === " " ||
|
||||||
|
glyphs[i + 1].unicode === " ")
|
||||||
|
) {
|
||||||
|
// Don't push a " " in the textContentItem
|
||||||
|
// (except when it's between two non-spaces chars),
|
||||||
|
// it will be done (if required) in next call to
|
||||||
|
// compareWithLastPosition.
|
||||||
|
// This way we can merge real spaces and spaces due to cursor moves.
|
||||||
|
if (!font.vertical) {
|
||||||
|
charSpacing += scaledDim + textState.wordSpacing;
|
||||||
|
textState.translateTextMatrix(
|
||||||
|
charSpacing * textState.textHScale,
|
||||||
|
0
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
glyphUnicode = NormalizedUnicodes[glyphUnicode] || glyphUnicode;
|
charSpacing += -scaledDim + textState.wordSpacing;
|
||||||
glyphUnicode = reverseIfRtl(glyphUnicode);
|
textState.translateTextMatrix(0, -charSpacing);
|
||||||
textChunk.isLastCharWhiteSpace = false;
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
textChunk.str.push(glyphUnicode);
|
|
||||||
|
|
||||||
const glyphWidth =
|
compareWithLastPosition();
|
||||||
font.vertical && glyph.vmetric ? glyph.vmetric[0] : glyph.width;
|
|
||||||
|
// Must be called after compareWithLastPosition because
|
||||||
|
// the textContentItem could have been flushed.
|
||||||
|
const textChunk = ensureTextContentItem();
|
||||||
|
if (DiacriticRegExp.test(glyph.unicode)) {
|
||||||
|
scaledDim = 0;
|
||||||
|
}
|
||||||
|
|
||||||
let scaledDim = glyphWidth * scale;
|
|
||||||
if (!font.vertical) {
|
if (!font.vertical) {
|
||||||
scaledDim *= textState.textHScale;
|
scaledDim *= textState.textHScale;
|
||||||
textState.translateTextMatrix(scaledDim, 0);
|
textState.translateTextMatrix(scaledDim, 0);
|
||||||
|
textChunk.width += scaledDim;
|
||||||
} else {
|
} else {
|
||||||
textState.translateTextMatrix(0, scaledDim);
|
textState.translateTextMatrix(0, scaledDim);
|
||||||
scaledDim = Math.abs(scaledDim);
|
scaledDim = Math.abs(scaledDim);
|
||||||
|
textChunk.height += scaledDim;
|
||||||
}
|
}
|
||||||
size += scaledDim;
|
|
||||||
|
if (scaledDim) {
|
||||||
|
// Save the position of the last visible character.
|
||||||
|
textChunk.prevTransform = getCurrentTextTransform();
|
||||||
|
}
|
||||||
|
|
||||||
|
glyphUnicode = NormalizedUnicodes[glyphUnicode] || glyphUnicode;
|
||||||
|
glyphUnicode = reverseIfRtl(glyphUnicode);
|
||||||
|
textChunk.str.push(glyphUnicode);
|
||||||
|
|
||||||
if (charSpacing) {
|
if (charSpacing) {
|
||||||
if (!font.vertical) {
|
if (!font.vertical) {
|
||||||
charSpacing *= textState.textHScale;
|
textState.translateTextMatrix(
|
||||||
}
|
charSpacing * textState.textHScale,
|
||||||
|
0
|
||||||
scaledDim += charSpacing;
|
);
|
||||||
const wasSplit =
|
|
||||||
charSpacing > textContentItem.trackingSpaceMin &&
|
|
||||||
addFakeSpaces(charSpacing, size);
|
|
||||||
if (!font.vertical) {
|
|
||||||
textState.translateTextMatrix(charSpacing, 0);
|
|
||||||
} else {
|
} else {
|
||||||
textState.translateTextMatrix(0, charSpacing);
|
textState.translateTextMatrix(0, -charSpacing);
|
||||||
}
|
|
||||||
|
|
||||||
if (wasSplit) {
|
|
||||||
textChunk = ensureTextContentItem();
|
|
||||||
size = 0;
|
|
||||||
} else {
|
|
||||||
size += charSpacing;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lastCharSize = scaledDim;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
textChunk.lastCharSize = lastCharSize;
|
|
||||||
if (!font.vertical) {
|
|
||||||
textChunk.width += size;
|
|
||||||
} else {
|
|
||||||
textChunk.height += size;
|
|
||||||
}
|
|
||||||
|
|
||||||
textChunk.prevTransform = getCurrentTextTransform();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function appendEOL() {
|
function appendEOL() {
|
||||||
@ -2597,19 +2647,15 @@ class PartialEvaluator {
|
|||||||
hasEOL: true,
|
hasEOL: true,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
textContentItem.isLastCharWhiteSpace = false;
|
|
||||||
textContentItem.lastCharSize = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function addFakeSpaces(width, size, transf = null) {
|
function addFakeSpaces(width, transf) {
|
||||||
if (
|
if (
|
||||||
textContentItem.spaceInFlowMin <= width &&
|
textContentItem.spaceInFlowMin <= width &&
|
||||||
width <= textContentItem.spaceInFlowMax
|
width <= textContentItem.spaceInFlowMax
|
||||||
) {
|
) {
|
||||||
if (textContentItem.initialized) {
|
if (textContentItem.initialized) {
|
||||||
textContentItem.str.push(" ");
|
textContentItem.str.push(" ");
|
||||||
textContentItem.isLastCharWhiteSpace = true;
|
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -2617,22 +2663,12 @@ class PartialEvaluator {
|
|||||||
const fontName = textContentItem.fontName;
|
const fontName = textContentItem.fontName;
|
||||||
|
|
||||||
let height = 0;
|
let height = 0;
|
||||||
width *= textContentItem.textAdvanceScale;
|
if (textContentItem.vertical) {
|
||||||
if (!textContentItem.vertical) {
|
|
||||||
textContentItem.width += size;
|
|
||||||
} else {
|
|
||||||
textContentItem.height += size;
|
|
||||||
height = width;
|
height = width;
|
||||||
width = 0;
|
width = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
flushTextContentItem();
|
flushTextContentItem();
|
||||||
|
|
||||||
if (textContentItem.isLastCharWhiteSpace) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
textContentItem.isLastCharWhiteSpace = true;
|
|
||||||
textContent.items.push({
|
textContent.items.push({
|
||||||
str: " ",
|
str: " ",
|
||||||
// TODO: check if using the orientation from last chunk is
|
// TODO: check if using the orientation from last chunk is
|
||||||
@ -2640,7 +2676,7 @@ class PartialEvaluator {
|
|||||||
dir: "ltr",
|
dir: "ltr",
|
||||||
width,
|
width,
|
||||||
height,
|
height,
|
||||||
transform: transf ? transf : getCurrentTextTransform(),
|
transform: transf || getCurrentTextTransform(),
|
||||||
fontName,
|
fontName,
|
||||||
hasEOL: false,
|
hasEOL: false,
|
||||||
});
|
});
|
||||||
@ -2731,15 +2767,12 @@ class PartialEvaluator {
|
|||||||
next(handleSetFont(fontNameArg, null));
|
next(handleSetFont(fontNameArg, null));
|
||||||
return;
|
return;
|
||||||
case OPS.setTextRise:
|
case OPS.setTextRise:
|
||||||
flushTextContentItem();
|
|
||||||
textState.textRise = args[0];
|
textState.textRise = args[0];
|
||||||
break;
|
break;
|
||||||
case OPS.setHScale:
|
case OPS.setHScale:
|
||||||
flushTextContentItem();
|
|
||||||
textState.textHScale = args[0] / 100;
|
textState.textHScale = args[0] / 100;
|
||||||
break;
|
break;
|
||||||
case OPS.setLeading:
|
case OPS.setLeading:
|
||||||
flushTextContentItem();
|
|
||||||
textState.leading = args[0];
|
textState.leading = args[0];
|
||||||
break;
|
break;
|
||||||
case OPS.moveText:
|
case OPS.moveText:
|
||||||
@ -2747,13 +2780,11 @@ class PartialEvaluator {
|
|||||||
textState.textMatrix = textState.textLineMatrix.slice();
|
textState.textMatrix = textState.textLineMatrix.slice();
|
||||||
break;
|
break;
|
||||||
case OPS.setLeadingMoveText:
|
case OPS.setLeadingMoveText:
|
||||||
flushTextContentItem();
|
|
||||||
textState.leading = -args[1];
|
textState.leading = -args[1];
|
||||||
textState.translateTextLineMatrix(args[0], args[1]);
|
textState.translateTextLineMatrix(args[0], args[1]);
|
||||||
textState.textMatrix = textState.textLineMatrix.slice();
|
textState.textMatrix = textState.textLineMatrix.slice();
|
||||||
break;
|
break;
|
||||||
case OPS.nextLine:
|
case OPS.nextLine:
|
||||||
appendEOL();
|
|
||||||
textState.carriageReturn();
|
textState.carriageReturn();
|
||||||
break;
|
break;
|
||||||
case OPS.setTextMatrix:
|
case OPS.setTextMatrix:
|
||||||
@ -2782,7 +2813,6 @@ class PartialEvaluator {
|
|||||||
textState.wordSpacing = args[0];
|
textState.wordSpacing = args[0];
|
||||||
break;
|
break;
|
||||||
case OPS.beginText:
|
case OPS.beginText:
|
||||||
flushTextContentItem();
|
|
||||||
textState.textMatrix = IDENTITY_MATRIX.slice();
|
textState.textMatrix = IDENTITY_MATRIX.slice();
|
||||||
textState.textLineMatrix = IDENTITY_MATRIX.slice();
|
textState.textLineMatrix = IDENTITY_MATRIX.slice();
|
||||||
break;
|
break;
|
||||||
@ -2795,7 +2825,6 @@ class PartialEvaluator {
|
|||||||
const spaceFactor =
|
const spaceFactor =
|
||||||
((textState.font.vertical ? 1 : -1) * textState.fontSize) / 1000;
|
((textState.font.vertical ? 1 : -1) * textState.fontSize) / 1000;
|
||||||
const elements = args[0];
|
const elements = args[0];
|
||||||
let isFirstChunk = true;
|
|
||||||
for (let i = 0, ii = elements.length; i < ii - 1; i++) {
|
for (let i = 0, ii = elements.length; i < ii - 1; i++) {
|
||||||
const item = elements[i];
|
const item = elements[i];
|
||||||
if (typeof item === "string") {
|
if (typeof item === "string") {
|
||||||
@ -2814,11 +2843,7 @@ class PartialEvaluator {
|
|||||||
buildTextContentItem({
|
buildTextContentItem({
|
||||||
chars: str,
|
chars: str,
|
||||||
extraSpacing: item * spaceFactor,
|
extraSpacing: item * spaceFactor,
|
||||||
isFirstChunk,
|
|
||||||
});
|
});
|
||||||
if (str && isFirstChunk) {
|
|
||||||
isFirstChunk = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2833,7 +2858,6 @@ class PartialEvaluator {
|
|||||||
buildTextContentItem({
|
buildTextContentItem({
|
||||||
chars: str,
|
chars: str,
|
||||||
extraSpacing: 0,
|
extraSpacing: 0,
|
||||||
isFirstChunk,
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -2842,11 +2866,9 @@ class PartialEvaluator {
|
|||||||
self.ensureStateFont(stateManager.state);
|
self.ensureStateFont(stateManager.state);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
buildTextContentItem({
|
buildTextContentItem({
|
||||||
chars: args[0],
|
chars: args[0],
|
||||||
extraSpacing: 0,
|
extraSpacing: 0,
|
||||||
isFirstChunk: true,
|
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
case OPS.nextLineShowText:
|
case OPS.nextLineShowText:
|
||||||
@ -2854,13 +2876,10 @@ class PartialEvaluator {
|
|||||||
self.ensureStateFont(stateManager.state);
|
self.ensureStateFont(stateManager.state);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
textContentItem.hasEOL = true;
|
|
||||||
flushTextContentItem();
|
|
||||||
textState.carriageReturn();
|
textState.carriageReturn();
|
||||||
buildTextContentItem({
|
buildTextContentItem({
|
||||||
chars: args[0],
|
chars: args[0],
|
||||||
extraSpacing: 0,
|
extraSpacing: 0,
|
||||||
isFirstChunk: true,
|
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
case OPS.nextLineSetSpacingShowText:
|
case OPS.nextLineSetSpacingShowText:
|
||||||
@ -2868,15 +2887,12 @@ class PartialEvaluator {
|
|||||||
self.ensureStateFont(stateManager.state);
|
self.ensureStateFont(stateManager.state);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
textContentItem.hasEOL = true;
|
|
||||||
flushTextContentItem();
|
|
||||||
textState.wordSpacing = args[0];
|
textState.wordSpacing = args[0];
|
||||||
textState.charSpacing = args[1];
|
textState.charSpacing = args[1];
|
||||||
textState.carriageReturn();
|
textState.carriageReturn();
|
||||||
buildTextContentItem({
|
buildTextContentItem({
|
||||||
chars: args[2],
|
chars: args[2],
|
||||||
extraSpacing: 0,
|
extraSpacing: 0,
|
||||||
isFirstChunk: true,
|
|
||||||
});
|
});
|
||||||
break;
|
break;
|
||||||
case OPS.paintXObject:
|
case OPS.paintXObject:
|
||||||
|
@ -188,7 +188,7 @@ function appendText(task, geom, styles, ctx) {
|
|||||||
(task._enhanceTextSelection && AllWhitespaceRegexp.test(geom.str))
|
(task._enhanceTextSelection && AllWhitespaceRegexp.test(geom.str))
|
||||||
) {
|
) {
|
||||||
shouldScaleText = true;
|
shouldScaleText = true;
|
||||||
} else if (geom.transform[0] !== geom.transform[3]) {
|
} else if (geom.str !== " " && geom.transform[0] !== geom.transform[3]) {
|
||||||
const absScaleX = Math.abs(geom.transform[0]),
|
const absScaleX = Math.abs(geom.transform[0]),
|
||||||
absScaleY = Math.abs(geom.transform[3]);
|
absScaleY = Math.abs(geom.transform[3]);
|
||||||
// When the horizontal/vertical scaling differs significantly, also scale
|
// When the horizontal/vertical scaling differs significantly, also scale
|
||||||
|
5
test/pdfs/.gitignore
vendored
5
test/pdfs/.gitignore
vendored
@ -13,6 +13,7 @@
|
|||||||
!issue1155r.pdf
|
!issue1155r.pdf
|
||||||
!issue2017r.pdf
|
!issue2017r.pdf
|
||||||
!bug1727053.pdf
|
!bug1727053.pdf
|
||||||
|
!issue11913.pdf
|
||||||
!issue2391-1.pdf
|
!issue2391-1.pdf
|
||||||
!issue2391-2.pdf
|
!issue2391-2.pdf
|
||||||
!issue14046.pdf
|
!issue14046.pdf
|
||||||
@ -182,6 +183,7 @@
|
|||||||
!issue11931.pdf
|
!issue11931.pdf
|
||||||
!issue1655r.pdf
|
!issue1655r.pdf
|
||||||
!issue6541.pdf
|
!issue6541.pdf
|
||||||
|
!issue10640.pdf
|
||||||
!issue2948.pdf
|
!issue2948.pdf
|
||||||
!issue6231_1.pdf
|
!issue6231_1.pdf
|
||||||
!issue10402.pdf
|
!issue10402.pdf
|
||||||
@ -285,6 +287,7 @@
|
|||||||
!issue2840.pdf
|
!issue2840.pdf
|
||||||
!issue4061.pdf
|
!issue4061.pdf
|
||||||
!issue4668.pdf
|
!issue4668.pdf
|
||||||
|
!issue13226.pdf
|
||||||
!PDFJS-7562-reduced.pdf
|
!PDFJS-7562-reduced.pdf
|
||||||
!issue11768_reduced.pdf
|
!issue11768_reduced.pdf
|
||||||
!issue5039.pdf
|
!issue5039.pdf
|
||||||
@ -440,6 +443,7 @@
|
|||||||
!annotation-fileattachment.pdf
|
!annotation-fileattachment.pdf
|
||||||
!annotation-text-widget.pdf
|
!annotation-text-widget.pdf
|
||||||
!annotation-choice-widget.pdf
|
!annotation-choice-widget.pdf
|
||||||
|
!issue10900.pdf
|
||||||
!annotation-button-widget.pdf
|
!annotation-button-widget.pdf
|
||||||
!annotation-polyline-polygon.pdf
|
!annotation-polyline-polygon.pdf
|
||||||
!annotation-polyline-polygon-without-appearance.pdf
|
!annotation-polyline-polygon-without-appearance.pdf
|
||||||
@ -462,6 +466,7 @@
|
|||||||
!issue9972-3.pdf
|
!issue9972-3.pdf
|
||||||
!tiling-pattern-box.pdf
|
!tiling-pattern-box.pdf
|
||||||
!tiling-pattern-large-steps.pdf
|
!tiling-pattern-large-steps.pdf
|
||||||
|
!issue13201.pdf
|
||||||
!issue11555.pdf
|
!issue11555.pdf
|
||||||
!issue12337.pdf
|
!issue12337.pdf
|
||||||
!pr12564.pdf
|
!pr12564.pdf
|
||||||
|
BIN
test/pdfs/issue10640.pdf
Normal file
BIN
test/pdfs/issue10640.pdf
Normal file
Binary file not shown.
BIN
test/pdfs/issue10900.pdf
Normal file
BIN
test/pdfs/issue10900.pdf
Normal file
Binary file not shown.
BIN
test/pdfs/issue11913.pdf
Normal file
BIN
test/pdfs/issue11913.pdf
Normal file
Binary file not shown.
BIN
test/pdfs/issue13201.pdf
Normal file
BIN
test/pdfs/issue13201.pdf
Normal file
Binary file not shown.
86
test/pdfs/issue13226.pdf
Normal file
86
test/pdfs/issue13226.pdf
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
%PDF-1.7
|
||||||
|
%âãÏÓ
|
||||||
|
1 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Encoding
|
||||||
|
/BaseEncoding /WinAnsiEncoding
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
2 0 obj
|
||||||
|
<<
|
||||||
|
/Pages 3 0 R
|
||||||
|
/Type /Catalog
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
3 0 obj
|
||||||
|
<<
|
||||||
|
/MediaBox [0 0 400 50]
|
||||||
|
/Kids [4 0 R]
|
||||||
|
/Count 1
|
||||||
|
/Type /Pages
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
4 0 obj
|
||||||
|
<<
|
||||||
|
/Parent 3 0 R
|
||||||
|
/MediaBox [0 0 400 50]
|
||||||
|
/Resources
|
||||||
|
<<
|
||||||
|
/Font
|
||||||
|
<<
|
||||||
|
/F1 5 0 R
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
/Contents 6 0 R
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
5 0 obj
|
||||||
|
<<
|
||||||
|
/BaseFont /Times-Italic
|
||||||
|
/Subtype /Type1
|
||||||
|
/Encoding 1 0 R
|
||||||
|
/Type /Font
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
6 0 obj
|
||||||
|
<<
|
||||||
|
/Length 278
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
/F1 10 Tf
|
||||||
|
0.005 Tc 1 0 0 1 10 30 Tm
|
||||||
|
[(M)5 (i)5 (t)]TJ
|
||||||
|
/Span<</ActualText<FEFF00AD>>> BDC
|
||||||
|
14 0 Td
|
||||||
|
( )Tj
|
||||||
|
EMC
|
||||||
|
T*
|
||||||
|
(arbei)Tj
|
||||||
|
/Span<</ActualText<FEFF00AD>>> BDC
|
||||||
|
( )Tj
|
||||||
|
EMC
|
||||||
|
21.2 0 Td
|
||||||
|
[(terinnen und Mitarbeiter arbeiten in \374ber 100 L\344ndern engagiert im\
|
||||||
|
Dienste)5 ( )]TJ
|
||||||
|
ET
|
||||||
|
endstream
|
||||||
|
endobj xref
|
||||||
|
0 7
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000015 00000 n
|
||||||
|
0000000085 00000 n
|
||||||
|
0000000136 00000 n
|
||||||
|
0000000218 00000 n
|
||||||
|
0000000347 00000 n
|
||||||
|
0000000438 00000 n
|
||||||
|
trailer
|
||||||
|
|
||||||
|
<<
|
||||||
|
/Root 2 0 R
|
||||||
|
/Size 7
|
||||||
|
>>
|
||||||
|
startxref
|
||||||
|
768
|
||||||
|
%%EOF
|
@ -73,6 +73,10 @@ describe("api", function () {
|
|||||||
}, WAIT_TIMEOUT);
|
}, WAIT_TIMEOUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function mergeText(items) {
|
||||||
|
return items.map(chunk => chunk.str + (chunk.hasEOL ? "\n" : "")).join("");
|
||||||
|
}
|
||||||
|
|
||||||
describe("getDocument", function () {
|
describe("getDocument", function () {
|
||||||
it("creates pdf doc from URL-string", async function () {
|
it("creates pdf doc from URL-string", async function () {
|
||||||
const urlStr = TEST_PDFS_PATH + basicApiFileName;
|
const urlStr = TEST_PDFS_PATH + basicApiFileName;
|
||||||
@ -1604,11 +1608,17 @@ describe("api", function () {
|
|||||||
const data = await Promise.all([defaultPromise, parametersPromise]);
|
const data = await Promise.all([defaultPromise, parametersPromise]);
|
||||||
|
|
||||||
expect(!!data[0].items).toEqual(true);
|
expect(!!data[0].items).toEqual(true);
|
||||||
expect(data[0].items.length).toEqual(12);
|
expect(data[0].items.length).toEqual(11);
|
||||||
expect(!!data[0].styles).toEqual(true);
|
expect(!!data[0].styles).toEqual(true);
|
||||||
|
|
||||||
|
const page1 = mergeText(data[0].items);
|
||||||
|
expect(page1).toEqual(`Table Of Content
|
||||||
|
Chapter 1 .......................................................... 2
|
||||||
|
Paragraph 1.1 ...................................................... 3
|
||||||
|
page 1 / 3`);
|
||||||
|
|
||||||
expect(!!data[1].items).toEqual(true);
|
expect(!!data[1].items).toEqual(true);
|
||||||
expect(data[1].items.length).toEqual(7);
|
expect(data[1].items.length).toEqual(6);
|
||||||
expect(!!data[1].styles).toEqual(true);
|
expect(!!data[1].styles).toEqual(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -1643,6 +1653,107 @@ describe("api", function () {
|
|||||||
await loadingTask.destroy();
|
await loadingTask.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("gets text content, with no extra spaces (issue 13226)", async function () {
|
||||||
|
const loadingTask = getDocument(buildGetDocumentParams("issue13226.pdf"));
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(1);
|
||||||
|
const { items } = await pdfPage.getTextContent();
|
||||||
|
const text = mergeText(items);
|
||||||
|
|
||||||
|
expect(text).toEqual(
|
||||||
|
"Mitarbeiterinnen und Mitarbeiter arbeiten in über 100 Ländern engagiert im Dienste"
|
||||||
|
);
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("gets text content, with merged spaces (issue 13201)", async function () {
|
||||||
|
const loadingTask = getDocument(buildGetDocumentParams("issue13201.pdf"));
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(1);
|
||||||
|
const { items } = await pdfPage.getTextContent();
|
||||||
|
const text = mergeText(items);
|
||||||
|
|
||||||
|
expect(
|
||||||
|
text.includes(
|
||||||
|
"Abstract. A purely peer-to-peer version of electronic cash would allow online"
|
||||||
|
)
|
||||||
|
).toEqual(true);
|
||||||
|
expect(
|
||||||
|
text.includes(
|
||||||
|
"avoid mediating disputes. The cost of mediation increases transaction costs, limiting the"
|
||||||
|
)
|
||||||
|
).toEqual(true);
|
||||||
|
expect(
|
||||||
|
text.includes(
|
||||||
|
"system is secure as long as honest nodes collectively control more CPU power than any"
|
||||||
|
)
|
||||||
|
).toEqual(true);
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("gets text content, with no spaces between letters of words (issue 11913)", async function () {
|
||||||
|
const loadingTask = getDocument(buildGetDocumentParams("issue11913.pdf"));
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(1);
|
||||||
|
const { items } = await pdfPage.getTextContent();
|
||||||
|
const text = mergeText(items);
|
||||||
|
|
||||||
|
expect(
|
||||||
|
text.includes(
|
||||||
|
"1. The first of these cases arises from the tragic handicap which has blighted the life of the Plaintiff, and from the response of the"
|
||||||
|
)
|
||||||
|
).toEqual(true);
|
||||||
|
expect(
|
||||||
|
text.includes(
|
||||||
|
"argued in this Court the appeal raises narrower, but important, issues which may be summarised as follows:-"
|
||||||
|
)
|
||||||
|
).toEqual(true);
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("gets text content, with merged spaces (issue 10900)", async function () {
|
||||||
|
const loadingTask = getDocument(buildGetDocumentParams("issue10900.pdf"));
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(1);
|
||||||
|
const { items } = await pdfPage.getTextContent();
|
||||||
|
const text = mergeText(items);
|
||||||
|
|
||||||
|
expect(
|
||||||
|
text.includes(`3 3 3 3
|
||||||
|
851.5 854.9 839.3 837.5
|
||||||
|
633.6 727.8 789.9 796.2
|
||||||
|
1,485.1 1,582.7 1,629.2 1,633.7
|
||||||
|
114.2 121.7 125.3 130.7
|
||||||
|
13.0x 13.0x 13.0x 12.5x`)
|
||||||
|
).toEqual(true);
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("gets text content, with spaces (issue 10640)", async function () {
|
||||||
|
const loadingTask = getDocument(buildGetDocumentParams("issue10640.pdf"));
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(1);
|
||||||
|
const { items } = await pdfPage.getTextContent();
|
||||||
|
const text = mergeText(items);
|
||||||
|
|
||||||
|
expect(
|
||||||
|
text.includes(`Open Sans is a humanist sans serif typeface designed by Steve Matteson.
|
||||||
|
Open Sans was designed with an upright stress, open forms and a neu-
|
||||||
|
tral, yet friendly appearance. It was optimized for print, web, and mobile
|
||||||
|
interfaces, and has excellent legibility characteristics in its letterforms (see
|
||||||
|
figure \x81 on the following page). This font is available from the Google Font
|
||||||
|
Directory [\x81] as TrueType files licensed under the Apache License version \x82.\x80.
|
||||||
|
This package provides support for this font in LATEX. It includes Type \x81
|
||||||
|
versions of the fonts, converted for this package using FontForge from its
|
||||||
|
sources, for full support with Dvips.`)
|
||||||
|
).toEqual(true);
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
it("gets empty structure tree", async function () {
|
it("gets empty structure tree", async function () {
|
||||||
const tree = await page.getStructTree();
|
const tree = await page.getStructTree();
|
||||||
|
|
||||||
|
@ -268,7 +268,7 @@ describe("pdf_find_controller", function () {
|
|||||||
pageIndex: 0,
|
pageIndex: 0,
|
||||||
matchIndex: 0,
|
matchIndex: 0,
|
||||||
},
|
},
|
||||||
pageMatches: [[19, 48, 66]],
|
pageMatches: [[19, 46, 62]],
|
||||||
pageMatchesLength: [[8, 8, 8]],
|
pageMatchesLength: [[8, 8, 8]],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
Loading…
Reference in New Issue
Block a user