Fix issues in text selection
- PR #13257 fixed a lot of issues but not all and this patch aims to fix almost all remaining issues. - the idea in this new patch is to compare position of new glyph with the last position where a glyph has been drawn; - no space are "drawn": it just moves the cursor but they aren't added in the chunk; - so this way a space followed by a cursor move can be treated as only one space: it helps to merge all spaces into one. - to make difference between real spaces and tracking ones, we used a factor of the space width (from the font) - it was a pretty good idea in general but it fails with some fonts where space was too big: - in Poppler, they're using a factor of the font size: this is an excellent idea (<= 0.1 * fontSize implies tracking space).
This commit is contained in:
parent
f5b79be0b7
commit
61d1063276
@ -2169,6 +2169,8 @@ class PartialEvaluator {
|
||||
stateManager = stateManager || new StateManager(new TextState());
|
||||
|
||||
const WhitespaceRegexp = /\s/g;
|
||||
const DiacriticRegExp = new RegExp("^\\p{Mn}$", "u");
|
||||
const NormalizedUnicodes = getNormalizedUnicodes();
|
||||
|
||||
const textContent = {
|
||||
items: [],
|
||||
@ -2182,34 +2184,37 @@ class PartialEvaluator {
|
||||
width: 0,
|
||||
height: 0,
|
||||
vertical: false,
|
||||
lastCharSize: 0,
|
||||
prevTransform: null,
|
||||
textAdvanceScale: 0,
|
||||
spaceWidth: 0,
|
||||
spaceInFlowMin: 0,
|
||||
spaceInFlowMax: 0,
|
||||
trackingSpaceMin: Infinity,
|
||||
negativeSpaceMax: -Infinity,
|
||||
transform: null,
|
||||
fontName: null,
|
||||
hasEOL: false,
|
||||
isLastCharWhiteSpace: false,
|
||||
};
|
||||
|
||||
// Used in addFakeSpaces.
|
||||
// wsw stands for whitespace width.
|
||||
|
||||
// A white <= wsw * TRACKING_SPACE_FACTOR is a tracking space
|
||||
// A white <= fontSize * TRACKING_SPACE_FACTOR is a tracking space
|
||||
// so it doesn't count as a space.
|
||||
const TRACKING_SPACE_FACTOR = 0.3;
|
||||
const TRACKING_SPACE_FACTOR = 0.1;
|
||||
|
||||
// A white with a width in [wsw * MIN_FACTOR; wsw * MAX_FACTOR]
|
||||
// A negative white < fontSize * NEGATIVE_SPACE_FACTOR induces
|
||||
// a break (a new chunk of text is created).
|
||||
// It doesn't change anything when the text is copied but
|
||||
// it improves potential mismatch between text layer and canvas.
|
||||
const NEGATIVE_SPACE_FACTOR = -0.2;
|
||||
|
||||
// A white with a width in [fontSize * MIN_FACTOR; fontSize * MAX_FACTOR]
|
||||
// is a space which will be inserted in the current flow of words.
|
||||
// If the width is outside of this range then the flow is broken
|
||||
// (which means a new span in the text layer).
|
||||
// It's useful to adjust the best as possible the span in the layer
|
||||
// to what is displayed in the canvas.
|
||||
const SPACE_IN_FLOW_MIN_FACTOR = 0.3;
|
||||
const SPACE_IN_FLOW_MAX_FACTOR = 1.3;
|
||||
const SPACE_IN_FLOW_MIN_FACTOR = 0.1;
|
||||
const SPACE_IN_FLOW_MAX_FACTOR = 0.6;
|
||||
|
||||
const self = this;
|
||||
const xref = this.xref;
|
||||
@ -2294,18 +2299,15 @@ class PartialEvaluator {
|
||||
);
|
||||
const scaleCtmX = Math.hypot(textState.ctm[0], textState.ctm[1]);
|
||||
textContentItem.textAdvanceScale = scaleCtmX * scaleLineX;
|
||||
textContentItem.lastCharSize = textContentItem.lastCharSize || 0;
|
||||
|
||||
const spaceWidth = (font.spaceWidth / 1000) * textState.fontSize;
|
||||
if (spaceWidth) {
|
||||
textContentItem.spaceWidth = spaceWidth;
|
||||
textContentItem.trackingSpaceMin = spaceWidth * TRACKING_SPACE_FACTOR;
|
||||
textContentItem.spaceInFlowMin = spaceWidth * SPACE_IN_FLOW_MIN_FACTOR;
|
||||
textContentItem.spaceInFlowMax = spaceWidth * SPACE_IN_FLOW_MAX_FACTOR;
|
||||
} else {
|
||||
textContentItem.spaceWidth = 0;
|
||||
textContentItem.trackingSpaceMin = Infinity;
|
||||
}
|
||||
textContentItem.trackingSpaceMin =
|
||||
textState.fontSize * TRACKING_SPACE_FACTOR;
|
||||
textContentItem.negativeSpaceMax =
|
||||
textState.fontSize * NEGATIVE_SPACE_FACTOR;
|
||||
textContentItem.spaceInFlowMin =
|
||||
textState.fontSize * SPACE_IN_FLOW_MIN_FACTOR;
|
||||
textContentItem.spaceInFlowMax =
|
||||
textState.fontSize * SPACE_IN_FLOW_MAX_FACTOR;
|
||||
|
||||
textContentItem.hasEOL = false;
|
||||
|
||||
@ -2395,7 +2397,7 @@ class PartialEvaluator {
|
||||
});
|
||||
}
|
||||
|
||||
function compareWithLastPosition(fontSize) {
|
||||
function compareWithLastPosition() {
|
||||
if (
|
||||
!combineTextItems ||
|
||||
!textState.font ||
|
||||
@ -2405,36 +2407,76 @@ class PartialEvaluator {
|
||||
}
|
||||
|
||||
const currentTransform = getCurrentTextTransform();
|
||||
const posX = currentTransform[4];
|
||||
const posY = currentTransform[5];
|
||||
const lastPosX = textContentItem.prevTransform[4];
|
||||
const lastPosY = textContentItem.prevTransform[5];
|
||||
let posX = currentTransform[4];
|
||||
let posY = currentTransform[5];
|
||||
let lastPosX = textContentItem.prevTransform[4];
|
||||
let lastPosY = textContentItem.prevTransform[5];
|
||||
|
||||
if (lastPosX === posX && lastPosY === posY) {
|
||||
return;
|
||||
}
|
||||
|
||||
const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
|
||||
const advanceY = (posY - lastPosY) / textContentItem.textAdvanceScale;
|
||||
const HALF_LAST_CHAR = -0.5 * textContentItem.lastCharSize;
|
||||
let rotate = 0;
|
||||
// Take into account the rotation is the current transform.
|
||||
// Only rotations with an angle of 0, 90, 180 or 270 are considered.
|
||||
if (
|
||||
currentTransform[0] &&
|
||||
currentTransform[1] === 0 &&
|
||||
currentTransform[2] === 0
|
||||
) {
|
||||
rotate = currentTransform[0] > 0 ? 0 : 180;
|
||||
} else if (
|
||||
currentTransform[1] &&
|
||||
currentTransform[0] === 0 &&
|
||||
currentTransform[3] === 0
|
||||
) {
|
||||
rotate += currentTransform[1] > 0 ? 90 : 270;
|
||||
}
|
||||
|
||||
if (rotate !== 0) {
|
||||
switch (rotate) {
|
||||
case 90:
|
||||
[posX, posY] = [posY, posX];
|
||||
[lastPosX, lastPosY] = [lastPosY, lastPosX];
|
||||
break;
|
||||
case 180:
|
||||
[posX, posY, lastPosX, lastPosY] = [
|
||||
-posX,
|
||||
-posY,
|
||||
-lastPosX,
|
||||
-lastPosY,
|
||||
];
|
||||
break;
|
||||
case 270:
|
||||
[posX, posY] = [-posY, -posX];
|
||||
[lastPosX, lastPosY] = [-lastPosY, -lastPosX];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (textState.font.vertical) {
|
||||
if (
|
||||
Math.abs(advanceX) >
|
||||
textContentItem.width /
|
||||
textContentItem.textAdvanceScale /* not the same column */
|
||||
) {
|
||||
const advanceY = (lastPosY - posY) / textContentItem.textAdvanceScale;
|
||||
const advanceX = posX - lastPosX;
|
||||
if (advanceY < textContentItem.negativeSpaceMax) {
|
||||
if (
|
||||
Math.abs(advanceX) >
|
||||
0.5 * textContentItem.width /* not the same column */
|
||||
) {
|
||||
appendEOL();
|
||||
return;
|
||||
}
|
||||
|
||||
flushTextContentItem();
|
||||
return;
|
||||
}
|
||||
|
||||
if (Math.abs(advanceX) > textContentItem.height) {
|
||||
appendEOL();
|
||||
return;
|
||||
}
|
||||
|
||||
if (HALF_LAST_CHAR > advanceY) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (advanceY > textContentItem.trackingSpaceMin) {
|
||||
if (advanceY <= textContentItem.trackingSpaceMin) {
|
||||
textContentItem.height += advanceY;
|
||||
} else if (!addFakeSpaces(advanceY, 0, textContentItem.prevTransform)) {
|
||||
} else if (!addFakeSpaces(advanceY, textContentItem.prevTransform)) {
|
||||
if (textContentItem.str.length === 0) {
|
||||
textContent.items.push({
|
||||
str: " ",
|
||||
@ -2445,7 +2487,6 @@ class PartialEvaluator {
|
||||
fontName: textContentItem.fontName,
|
||||
hasEOL: false,
|
||||
});
|
||||
textContentItem.isLastCharWhiteSpace = true;
|
||||
} else {
|
||||
textContentItem.height += advanceY;
|
||||
}
|
||||
@ -2454,22 +2495,28 @@ class PartialEvaluator {
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
Math.abs(advanceY) >
|
||||
textContentItem.height /
|
||||
textContentItem.textAdvanceScale /* not the same line */
|
||||
) {
|
||||
appendEOL();
|
||||
const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
|
||||
const advanceY = posY - lastPosY;
|
||||
if (advanceX < textContentItem.negativeSpaceMax) {
|
||||
if (
|
||||
Math.abs(advanceY) >
|
||||
0.5 * textContentItem.height /* not the same line */
|
||||
) {
|
||||
appendEOL();
|
||||
return;
|
||||
}
|
||||
flushTextContentItem();
|
||||
return;
|
||||
}
|
||||
|
||||
if (HALF_LAST_CHAR > advanceX) {
|
||||
if (Math.abs(advanceY) > textContentItem.height) {
|
||||
appendEOL();
|
||||
return;
|
||||
}
|
||||
|
||||
if (advanceX <= textContentItem.trackingSpaceMin) {
|
||||
textContentItem.width += advanceX;
|
||||
} else if (!addFakeSpaces(advanceX, 0, textContentItem.prevTransform)) {
|
||||
} else if (!addFakeSpaces(advanceX, textContentItem.prevTransform)) {
|
||||
if (textContentItem.str.length === 0) {
|
||||
textContent.items.push({
|
||||
str: " ",
|
||||
@ -2480,14 +2527,13 @@ class PartialEvaluator {
|
||||
fontName: textContentItem.fontName,
|
||||
hasEOL: false,
|
||||
});
|
||||
textContentItem.isLastCharWhiteSpace = true;
|
||||
} else {
|
||||
textContentItem.width += advanceX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function buildTextContentItem({ chars, extraSpacing, isFirstChunk }) {
|
||||
function buildTextContentItem({ chars, extraSpacing }) {
|
||||
const font = textState.font;
|
||||
if (!chars) {
|
||||
// Just move according to the space we have.
|
||||
@ -2499,87 +2545,91 @@ class PartialEvaluator {
|
||||
0
|
||||
);
|
||||
} else {
|
||||
textState.translateTextMatrix(0, charSpacing);
|
||||
textState.translateTextMatrix(0, -charSpacing);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
const NormalizedUnicodes = getNormalizedUnicodes();
|
||||
const glyphs = font.charsToGlyphs(chars);
|
||||
const scale = textState.fontMatrix[0] * textState.fontSize;
|
||||
if (isFirstChunk) {
|
||||
compareWithLastPosition(scale);
|
||||
}
|
||||
|
||||
let textChunk = ensureTextContentItem();
|
||||
let size = 0;
|
||||
let lastCharSize = 0;
|
||||
|
||||
for (let i = 0, ii = glyphs.length; i < ii; i++) {
|
||||
const glyph = glyphs[i];
|
||||
let charSpacing =
|
||||
textState.charSpacing + (i === ii - 1 ? extraSpacing : 0);
|
||||
textState.charSpacing + (i + 1 === ii ? extraSpacing : 0);
|
||||
|
||||
let glyphWidth = glyph.width;
|
||||
if (font.vertical) {
|
||||
glyphWidth = glyph.vmetric ? glyph.vmetric[0] : -glyphWidth;
|
||||
}
|
||||
let scaledDim = glyphWidth * scale;
|
||||
|
||||
let glyphUnicode = glyph.unicode;
|
||||
if (glyph.isSpace) {
|
||||
charSpacing += textState.wordSpacing;
|
||||
textChunk.isLastCharWhiteSpace = true;
|
||||
} else {
|
||||
glyphUnicode = NormalizedUnicodes[glyphUnicode] || glyphUnicode;
|
||||
glyphUnicode = reverseIfRtl(glyphUnicode);
|
||||
textChunk.isLastCharWhiteSpace = false;
|
||||
if (
|
||||
glyphUnicode === " " &&
|
||||
(i === 0 ||
|
||||
i + 1 === ii ||
|
||||
glyphs[i - 1].unicode === " " ||
|
||||
glyphs[i + 1].unicode === " ")
|
||||
) {
|
||||
// Don't push a " " in the textContentItem
|
||||
// (except when it's between two non-spaces chars),
|
||||
// it will be done (if required) in next call to
|
||||
// compareWithLastPosition.
|
||||
// This way we can merge real spaces and spaces due to cursor moves.
|
||||
if (!font.vertical) {
|
||||
charSpacing += scaledDim + textState.wordSpacing;
|
||||
textState.translateTextMatrix(
|
||||
charSpacing * textState.textHScale,
|
||||
0
|
||||
);
|
||||
} else {
|
||||
charSpacing += -scaledDim + textState.wordSpacing;
|
||||
textState.translateTextMatrix(0, -charSpacing);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
textChunk.str.push(glyphUnicode);
|
||||
|
||||
const glyphWidth =
|
||||
font.vertical && glyph.vmetric ? glyph.vmetric[0] : glyph.width;
|
||||
compareWithLastPosition();
|
||||
|
||||
// Must be called after compareWithLastPosition because
|
||||
// the textContentItem could have been flushed.
|
||||
const textChunk = ensureTextContentItem();
|
||||
if (DiacriticRegExp.test(glyph.unicode)) {
|
||||
scaledDim = 0;
|
||||
}
|
||||
|
||||
let scaledDim = glyphWidth * scale;
|
||||
if (!font.vertical) {
|
||||
scaledDim *= textState.textHScale;
|
||||
textState.translateTextMatrix(scaledDim, 0);
|
||||
textChunk.width += scaledDim;
|
||||
} else {
|
||||
textState.translateTextMatrix(0, scaledDim);
|
||||
scaledDim = Math.abs(scaledDim);
|
||||
textChunk.height += scaledDim;
|
||||
}
|
||||
size += scaledDim;
|
||||
|
||||
if (scaledDim) {
|
||||
// Save the position of the last visible character.
|
||||
textChunk.prevTransform = getCurrentTextTransform();
|
||||
}
|
||||
|
||||
glyphUnicode = NormalizedUnicodes[glyphUnicode] || glyphUnicode;
|
||||
glyphUnicode = reverseIfRtl(glyphUnicode);
|
||||
textChunk.str.push(glyphUnicode);
|
||||
|
||||
if (charSpacing) {
|
||||
if (!font.vertical) {
|
||||
charSpacing *= textState.textHScale;
|
||||
}
|
||||
|
||||
scaledDim += charSpacing;
|
||||
const wasSplit =
|
||||
charSpacing > textContentItem.trackingSpaceMin &&
|
||||
addFakeSpaces(charSpacing, size);
|
||||
if (!font.vertical) {
|
||||
textState.translateTextMatrix(charSpacing, 0);
|
||||
textState.translateTextMatrix(
|
||||
charSpacing * textState.textHScale,
|
||||
0
|
||||
);
|
||||
} else {
|
||||
textState.translateTextMatrix(0, charSpacing);
|
||||
}
|
||||
|
||||
if (wasSplit) {
|
||||
textChunk = ensureTextContentItem();
|
||||
size = 0;
|
||||
} else {
|
||||
size += charSpacing;
|
||||
textState.translateTextMatrix(0, -charSpacing);
|
||||
}
|
||||
}
|
||||
|
||||
lastCharSize = scaledDim;
|
||||
}
|
||||
|
||||
textChunk.lastCharSize = lastCharSize;
|
||||
if (!font.vertical) {
|
||||
textChunk.width += size;
|
||||
} else {
|
||||
textChunk.height += size;
|
||||
}
|
||||
|
||||
textChunk.prevTransform = getCurrentTextTransform();
|
||||
}
|
||||
|
||||
function appendEOL() {
|
||||
@ -2597,19 +2647,15 @@ class PartialEvaluator {
|
||||
hasEOL: true,
|
||||
});
|
||||
}
|
||||
|
||||
textContentItem.isLastCharWhiteSpace = false;
|
||||
textContentItem.lastCharSize = 0;
|
||||
}
|
||||
|
||||
function addFakeSpaces(width, size, transf = null) {
|
||||
function addFakeSpaces(width, transf) {
|
||||
if (
|
||||
textContentItem.spaceInFlowMin <= width &&
|
||||
width <= textContentItem.spaceInFlowMax
|
||||
) {
|
||||
if (textContentItem.initialized) {
|
||||
textContentItem.str.push(" ");
|
||||
textContentItem.isLastCharWhiteSpace = true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -2617,22 +2663,12 @@ class PartialEvaluator {
|
||||
const fontName = textContentItem.fontName;
|
||||
|
||||
let height = 0;
|
||||
width *= textContentItem.textAdvanceScale;
|
||||
if (!textContentItem.vertical) {
|
||||
textContentItem.width += size;
|
||||
} else {
|
||||
textContentItem.height += size;
|
||||
if (textContentItem.vertical) {
|
||||
height = width;
|
||||
width = 0;
|
||||
}
|
||||
|
||||
flushTextContentItem();
|
||||
|
||||
if (textContentItem.isLastCharWhiteSpace) {
|
||||
return true;
|
||||
}
|
||||
|
||||
textContentItem.isLastCharWhiteSpace = true;
|
||||
textContent.items.push({
|
||||
str: " ",
|
||||
// TODO: check if using the orientation from last chunk is
|
||||
@ -2640,7 +2676,7 @@ class PartialEvaluator {
|
||||
dir: "ltr",
|
||||
width,
|
||||
height,
|
||||
transform: transf ? transf : getCurrentTextTransform(),
|
||||
transform: transf || getCurrentTextTransform(),
|
||||
fontName,
|
||||
hasEOL: false,
|
||||
});
|
||||
@ -2731,15 +2767,12 @@ class PartialEvaluator {
|
||||
next(handleSetFont(fontNameArg, null));
|
||||
return;
|
||||
case OPS.setTextRise:
|
||||
flushTextContentItem();
|
||||
textState.textRise = args[0];
|
||||
break;
|
||||
case OPS.setHScale:
|
||||
flushTextContentItem();
|
||||
textState.textHScale = args[0] / 100;
|
||||
break;
|
||||
case OPS.setLeading:
|
||||
flushTextContentItem();
|
||||
textState.leading = args[0];
|
||||
break;
|
||||
case OPS.moveText:
|
||||
@ -2747,13 +2780,11 @@ class PartialEvaluator {
|
||||
textState.textMatrix = textState.textLineMatrix.slice();
|
||||
break;
|
||||
case OPS.setLeadingMoveText:
|
||||
flushTextContentItem();
|
||||
textState.leading = -args[1];
|
||||
textState.translateTextLineMatrix(args[0], args[1]);
|
||||
textState.textMatrix = textState.textLineMatrix.slice();
|
||||
break;
|
||||
case OPS.nextLine:
|
||||
appendEOL();
|
||||
textState.carriageReturn();
|
||||
break;
|
||||
case OPS.setTextMatrix:
|
||||
@ -2782,7 +2813,6 @@ class PartialEvaluator {
|
||||
textState.wordSpacing = args[0];
|
||||
break;
|
||||
case OPS.beginText:
|
||||
flushTextContentItem();
|
||||
textState.textMatrix = IDENTITY_MATRIX.slice();
|
||||
textState.textLineMatrix = IDENTITY_MATRIX.slice();
|
||||
break;
|
||||
@ -2795,7 +2825,6 @@ class PartialEvaluator {
|
||||
const spaceFactor =
|
||||
((textState.font.vertical ? 1 : -1) * textState.fontSize) / 1000;
|
||||
const elements = args[0];
|
||||
let isFirstChunk = true;
|
||||
for (let i = 0, ii = elements.length; i < ii - 1; i++) {
|
||||
const item = elements[i];
|
||||
if (typeof item === "string") {
|
||||
@ -2814,11 +2843,7 @@ class PartialEvaluator {
|
||||
buildTextContentItem({
|
||||
chars: str,
|
||||
extraSpacing: item * spaceFactor,
|
||||
isFirstChunk,
|
||||
});
|
||||
if (str && isFirstChunk) {
|
||||
isFirstChunk = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2833,7 +2858,6 @@ class PartialEvaluator {
|
||||
buildTextContentItem({
|
||||
chars: str,
|
||||
extraSpacing: 0,
|
||||
isFirstChunk,
|
||||
});
|
||||
}
|
||||
break;
|
||||
@ -2842,11 +2866,9 @@ class PartialEvaluator {
|
||||
self.ensureStateFont(stateManager.state);
|
||||
continue;
|
||||
}
|
||||
|
||||
buildTextContentItem({
|
||||
chars: args[0],
|
||||
extraSpacing: 0,
|
||||
isFirstChunk: true,
|
||||
});
|
||||
break;
|
||||
case OPS.nextLineShowText:
|
||||
@ -2854,13 +2876,10 @@ class PartialEvaluator {
|
||||
self.ensureStateFont(stateManager.state);
|
||||
continue;
|
||||
}
|
||||
textContentItem.hasEOL = true;
|
||||
flushTextContentItem();
|
||||
textState.carriageReturn();
|
||||
buildTextContentItem({
|
||||
chars: args[0],
|
||||
extraSpacing: 0,
|
||||
isFirstChunk: true,
|
||||
});
|
||||
break;
|
||||
case OPS.nextLineSetSpacingShowText:
|
||||
@ -2868,15 +2887,12 @@ class PartialEvaluator {
|
||||
self.ensureStateFont(stateManager.state);
|
||||
continue;
|
||||
}
|
||||
textContentItem.hasEOL = true;
|
||||
flushTextContentItem();
|
||||
textState.wordSpacing = args[0];
|
||||
textState.charSpacing = args[1];
|
||||
textState.carriageReturn();
|
||||
buildTextContentItem({
|
||||
chars: args[2],
|
||||
extraSpacing: 0,
|
||||
isFirstChunk: true,
|
||||
});
|
||||
break;
|
||||
case OPS.paintXObject:
|
||||
|
@ -188,7 +188,7 @@ function appendText(task, geom, styles, ctx) {
|
||||
(task._enhanceTextSelection && AllWhitespaceRegexp.test(geom.str))
|
||||
) {
|
||||
shouldScaleText = true;
|
||||
} else if (geom.transform[0] !== geom.transform[3]) {
|
||||
} else if (geom.str !== " " && geom.transform[0] !== geom.transform[3]) {
|
||||
const absScaleX = Math.abs(geom.transform[0]),
|
||||
absScaleY = Math.abs(geom.transform[3]);
|
||||
// When the horizontal/vertical scaling differs significantly, also scale
|
||||
|
5
test/pdfs/.gitignore
vendored
5
test/pdfs/.gitignore
vendored
@ -13,6 +13,7 @@
|
||||
!issue1155r.pdf
|
||||
!issue2017r.pdf
|
||||
!bug1727053.pdf
|
||||
!issue11913.pdf
|
||||
!issue2391-1.pdf
|
||||
!issue2391-2.pdf
|
||||
!issue14046.pdf
|
||||
@ -182,6 +183,7 @@
|
||||
!issue11931.pdf
|
||||
!issue1655r.pdf
|
||||
!issue6541.pdf
|
||||
!issue10640.pdf
|
||||
!issue2948.pdf
|
||||
!issue6231_1.pdf
|
||||
!issue10402.pdf
|
||||
@ -285,6 +287,7 @@
|
||||
!issue2840.pdf
|
||||
!issue4061.pdf
|
||||
!issue4668.pdf
|
||||
!issue13226.pdf
|
||||
!PDFJS-7562-reduced.pdf
|
||||
!issue11768_reduced.pdf
|
||||
!issue5039.pdf
|
||||
@ -440,6 +443,7 @@
|
||||
!annotation-fileattachment.pdf
|
||||
!annotation-text-widget.pdf
|
||||
!annotation-choice-widget.pdf
|
||||
!issue10900.pdf
|
||||
!annotation-button-widget.pdf
|
||||
!annotation-polyline-polygon.pdf
|
||||
!annotation-polyline-polygon-without-appearance.pdf
|
||||
@ -462,6 +466,7 @@
|
||||
!issue9972-3.pdf
|
||||
!tiling-pattern-box.pdf
|
||||
!tiling-pattern-large-steps.pdf
|
||||
!issue13201.pdf
|
||||
!issue11555.pdf
|
||||
!issue12337.pdf
|
||||
!pr12564.pdf
|
||||
|
BIN
test/pdfs/issue10640.pdf
Normal file
BIN
test/pdfs/issue10640.pdf
Normal file
Binary file not shown.
BIN
test/pdfs/issue10900.pdf
Normal file
BIN
test/pdfs/issue10900.pdf
Normal file
Binary file not shown.
BIN
test/pdfs/issue11913.pdf
Normal file
BIN
test/pdfs/issue11913.pdf
Normal file
Binary file not shown.
BIN
test/pdfs/issue13201.pdf
Normal file
BIN
test/pdfs/issue13201.pdf
Normal file
Binary file not shown.
86
test/pdfs/issue13226.pdf
Normal file
86
test/pdfs/issue13226.pdf
Normal file
@ -0,0 +1,86 @@
|
||||
%PDF-1.7
|
||||
%âãÏÓ
|
||||
1 0 obj
|
||||
<<
|
||||
/Type /Encoding
|
||||
/BaseEncoding /WinAnsiEncoding
|
||||
>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<<
|
||||
/Pages 3 0 R
|
||||
/Type /Catalog
|
||||
>>
|
||||
endobj
|
||||
3 0 obj
|
||||
<<
|
||||
/MediaBox [0 0 400 50]
|
||||
/Kids [4 0 R]
|
||||
/Count 1
|
||||
/Type /Pages
|
||||
>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<<
|
||||
/Parent 3 0 R
|
||||
/MediaBox [0 0 400 50]
|
||||
/Resources
|
||||
<<
|
||||
/Font
|
||||
<<
|
||||
/F1 5 0 R
|
||||
>>
|
||||
>>
|
||||
/Contents 6 0 R
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
5 0 obj
|
||||
<<
|
||||
/BaseFont /Times-Italic
|
||||
/Subtype /Type1
|
||||
/Encoding 1 0 R
|
||||
/Type /Font
|
||||
>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<<
|
||||
/Length 278
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
/F1 10 Tf
|
||||
0.005 Tc 1 0 0 1 10 30 Tm
|
||||
[(M)5 (i)5 (t)]TJ
|
||||
/Span<</ActualText<FEFF00AD>>> BDC
|
||||
14 0 Td
|
||||
( )Tj
|
||||
EMC
|
||||
T*
|
||||
(arbei)Tj
|
||||
/Span<</ActualText<FEFF00AD>>> BDC
|
||||
( )Tj
|
||||
EMC
|
||||
21.2 0 Td
|
||||
[(terinnen und Mitarbeiter arbeiten in \374ber 100 L\344ndern engagiert im\
|
||||
Dienste)5 ( )]TJ
|
||||
ET
|
||||
endstream
|
||||
endobj xref
|
||||
0 7
|
||||
0000000000 65535 f
|
||||
0000000015 00000 n
|
||||
0000000085 00000 n
|
||||
0000000136 00000 n
|
||||
0000000218 00000 n
|
||||
0000000347 00000 n
|
||||
0000000438 00000 n
|
||||
trailer
|
||||
|
||||
<<
|
||||
/Root 2 0 R
|
||||
/Size 7
|
||||
>>
|
||||
startxref
|
||||
768
|
||||
%%EOF
|
@ -73,6 +73,10 @@ describe("api", function () {
|
||||
}, WAIT_TIMEOUT);
|
||||
}
|
||||
|
||||
function mergeText(items) {
|
||||
return items.map(chunk => chunk.str + (chunk.hasEOL ? "\n" : "")).join("");
|
||||
}
|
||||
|
||||
describe("getDocument", function () {
|
||||
it("creates pdf doc from URL-string", async function () {
|
||||
const urlStr = TEST_PDFS_PATH + basicApiFileName;
|
||||
@ -1604,11 +1608,17 @@ describe("api", function () {
|
||||
const data = await Promise.all([defaultPromise, parametersPromise]);
|
||||
|
||||
expect(!!data[0].items).toEqual(true);
|
||||
expect(data[0].items.length).toEqual(12);
|
||||
expect(data[0].items.length).toEqual(11);
|
||||
expect(!!data[0].styles).toEqual(true);
|
||||
|
||||
const page1 = mergeText(data[0].items);
|
||||
expect(page1).toEqual(`Table Of Content
|
||||
Chapter 1 .......................................................... 2
|
||||
Paragraph 1.1 ...................................................... 3
|
||||
page 1 / 3`);
|
||||
|
||||
expect(!!data[1].items).toEqual(true);
|
||||
expect(data[1].items.length).toEqual(7);
|
||||
expect(data[1].items.length).toEqual(6);
|
||||
expect(!!data[1].styles).toEqual(true);
|
||||
});
|
||||
|
||||
@ -1643,6 +1653,107 @@ describe("api", function () {
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets text content, with no extra spaces (issue 13226)", async function () {
|
||||
const loadingTask = getDocument(buildGetDocumentParams("issue13226.pdf"));
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(1);
|
||||
const { items } = await pdfPage.getTextContent();
|
||||
const text = mergeText(items);
|
||||
|
||||
expect(text).toEqual(
|
||||
"Mitarbeiterinnen und Mitarbeiter arbeiten in über 100 Ländern engagiert im Dienste"
|
||||
);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets text content, with merged spaces (issue 13201)", async function () {
|
||||
const loadingTask = getDocument(buildGetDocumentParams("issue13201.pdf"));
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(1);
|
||||
const { items } = await pdfPage.getTextContent();
|
||||
const text = mergeText(items);
|
||||
|
||||
expect(
|
||||
text.includes(
|
||||
"Abstract. A purely peer-to-peer version of electronic cash would allow online"
|
||||
)
|
||||
).toEqual(true);
|
||||
expect(
|
||||
text.includes(
|
||||
"avoid mediating disputes. The cost of mediation increases transaction costs, limiting the"
|
||||
)
|
||||
).toEqual(true);
|
||||
expect(
|
||||
text.includes(
|
||||
"system is secure as long as honest nodes collectively control more CPU power than any"
|
||||
)
|
||||
).toEqual(true);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets text content, with no spaces between letters of words (issue 11913)", async function () {
|
||||
const loadingTask = getDocument(buildGetDocumentParams("issue11913.pdf"));
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(1);
|
||||
const { items } = await pdfPage.getTextContent();
|
||||
const text = mergeText(items);
|
||||
|
||||
expect(
|
||||
text.includes(
|
||||
"1. The first of these cases arises from the tragic handicap which has blighted the life of the Plaintiff, and from the response of the"
|
||||
)
|
||||
).toEqual(true);
|
||||
expect(
|
||||
text.includes(
|
||||
"argued in this Court the appeal raises narrower, but important, issues which may be summarised as follows:-"
|
||||
)
|
||||
).toEqual(true);
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets text content, with merged spaces (issue 10900)", async function () {
|
||||
const loadingTask = getDocument(buildGetDocumentParams("issue10900.pdf"));
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(1);
|
||||
const { items } = await pdfPage.getTextContent();
|
||||
const text = mergeText(items);
|
||||
|
||||
expect(
|
||||
text.includes(`3 3 3 3
|
||||
851.5 854.9 839.3 837.5
|
||||
633.6 727.8 789.9 796.2
|
||||
1,485.1 1,582.7 1,629.2 1,633.7
|
||||
114.2 121.7 125.3 130.7
|
||||
13.0x 13.0x 13.0x 12.5x`)
|
||||
).toEqual(true);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets text content, with spaces (issue 10640)", async function () {
|
||||
const loadingTask = getDocument(buildGetDocumentParams("issue10640.pdf"));
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(1);
|
||||
const { items } = await pdfPage.getTextContent();
|
||||
const text = mergeText(items);
|
||||
|
||||
expect(
|
||||
text.includes(`Open Sans is a humanist sans serif typeface designed by Steve Matteson.
|
||||
Open Sans was designed with an upright stress, open forms and a neu-
|
||||
tral, yet friendly appearance. It was optimized for print, web, and mobile
|
||||
interfaces, and has excellent legibility characteristics in its letterforms (see
|
||||
figure \x81 on the following page). This font is available from the Google Font
|
||||
Directory [\x81] as TrueType files licensed under the Apache License version \x82.\x80.
|
||||
This package provides support for this font in LATEX. It includes Type \x81
|
||||
versions of the fonts, converted for this package using FontForge from its
|
||||
sources, for full support with Dvips.`)
|
||||
).toEqual(true);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets empty structure tree", async function () {
|
||||
const tree = await page.getStructTree();
|
||||
|
||||
|
@ -268,7 +268,7 @@ describe("pdf_find_controller", function () {
|
||||
pageIndex: 0,
|
||||
matchIndex: 0,
|
||||
},
|
||||
pageMatches: [[19, 48, 66]],
|
||||
pageMatches: [[19, 46, 62]],
|
||||
pageMatchesLength: [[8, 8, 8]],
|
||||
});
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user