[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201; - if the glyph position is not within the view then skip it.
This commit is contained in:
parent
78246719f8
commit
18e3a98c2b
@ -471,6 +471,7 @@ class Page {
|
||||
includeMarkedContent,
|
||||
combineTextItems,
|
||||
sink,
|
||||
viewBox: this.view,
|
||||
});
|
||||
});
|
||||
}
|
||||
|
@ -2167,6 +2167,7 @@ class PartialEvaluator {
|
||||
includeMarkedContent = false,
|
||||
sink,
|
||||
seenStyles = new Set(),
|
||||
viewBox,
|
||||
}) {
|
||||
// Ensure that `resources`/`stateManager` is correctly initialized,
|
||||
// even if the provided parameter is e.g. `null`.
|
||||
@ -2393,22 +2394,35 @@ class PartialEvaluator {
|
||||
}
|
||||
|
||||
function compareWithLastPosition() {
|
||||
const currentTransform = getCurrentTextTransform();
|
||||
let posX = currentTransform[4];
|
||||
let posY = currentTransform[5];
|
||||
|
||||
const shiftedX = posX - viewBox[0];
|
||||
const shiftedY = posY - viewBox[1];
|
||||
|
||||
if (
|
||||
shiftedX < 0 ||
|
||||
shiftedX > viewBox[2] ||
|
||||
shiftedY < 0 ||
|
||||
shiftedY > viewBox[3]
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (
|
||||
!combineTextItems ||
|
||||
!textState.font ||
|
||||
!textContentItem.prevTransform
|
||||
) {
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
const currentTransform = getCurrentTextTransform();
|
||||
let posX = currentTransform[4];
|
||||
let posY = currentTransform[5];
|
||||
let lastPosX = textContentItem.prevTransform[4];
|
||||
let lastPosY = textContentItem.prevTransform[5];
|
||||
|
||||
if (lastPosX === posX && lastPosY === posY) {
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
let rotate = -1;
|
||||
@ -2473,16 +2487,16 @@ class PartialEvaluator {
|
||||
0.5 * textContentItem.width /* not the same column */
|
||||
) {
|
||||
appendEOL();
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
flushTextContentItem();
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Math.abs(advanceX) > textContentItem.width) {
|
||||
appendEOL();
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
if (advanceY <= textOrientation * textContentItem.trackingSpaceMin) {
|
||||
textContentItem.height += advanceY;
|
||||
@ -2508,7 +2522,7 @@ class PartialEvaluator {
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
|
||||
@ -2523,15 +2537,15 @@ class PartialEvaluator {
|
||||
0.5 * textContentItem.height /* not the same line */
|
||||
) {
|
||||
appendEOL();
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
flushTextContentItem();
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Math.abs(advanceY) > textContentItem.height) {
|
||||
appendEOL();
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (advanceX <= textOrientation * textContentItem.trackingSpaceMin) {
|
||||
@ -2553,6 +2567,8 @@ class PartialEvaluator {
|
||||
textContentItem.width += advanceX;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
function buildTextContentItem({ chars, extraSpacing }) {
|
||||
@ -2617,7 +2633,10 @@ class PartialEvaluator {
|
||||
continue;
|
||||
}
|
||||
|
||||
compareWithLastPosition();
|
||||
if (!compareWithLastPosition()) {
|
||||
// The glyph is not in page so just skip it.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Must be called after compareWithLastPosition because
|
||||
// the textContentItem could have been flushed.
|
||||
@ -3026,6 +3045,7 @@ class PartialEvaluator {
|
||||
includeMarkedContent,
|
||||
sink: sinkWrapper,
|
||||
seenStyles,
|
||||
viewBox,
|
||||
})
|
||||
.then(function () {
|
||||
if (!sinkWrapper.enqueueInvoked) {
|
||||
|
1
test/pdfs/bug1755201.pdf.link
Normal file
1
test/pdfs/bug1755201.pdf.link
Normal file
@ -0,0 +1 @@
|
||||
https://bugzilla.mozilla.org/attachment.cgi?id=9263657
|
@ -1,4 +1,11 @@
|
||||
[
|
||||
{ "id": "bug1755201",
|
||||
"file": "pdfs/bug1755201.pdf",
|
||||
"md5": "cece14097812d8a1f69e86a51e4a3804",
|
||||
"rounds": 1,
|
||||
"link": true,
|
||||
"type": "other"
|
||||
},
|
||||
{ "id": "filled-background-range",
|
||||
"file": "pdfs/filled-background.pdf",
|
||||
"md5": "2e3120255d9c3e79b96d2543b12d2589",
|
||||
|
@ -2219,6 +2219,22 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets text content, and check that out-of-page text is not present (bug 1755201)", async function () {
|
||||
if (isNodeJS) {
|
||||
pending("Linked test-cases are not supported in Node.js.");
|
||||
}
|
||||
|
||||
const loadingTask = getDocument(buildGetDocumentParams("bug1755201.pdf"));
|
||||
const pdfDoc = await loadingTask.promise;
|
||||
const pdfPage = await pdfDoc.getPage(6);
|
||||
const { items } = await pdfPage.getTextContent();
|
||||
const text = mergeText(items);
|
||||
|
||||
expect(/win aisle/.test(text)).toEqual(false);
|
||||
|
||||
await loadingTask.destroy();
|
||||
});
|
||||
|
||||
it("gets empty structure tree", async function () {
|
||||
const tree = await page.getStructTree();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user