[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
- it aims to fix https://bugzilla.mozilla.org/show_bug.cgi?id=1755201; - if the glyph position is not within the view then skip it.
This commit is contained in:
parent
78246719f8
commit
18e3a98c2b
@ -471,6 +471,7 @@ class Page {
|
|||||||
includeMarkedContent,
|
includeMarkedContent,
|
||||||
combineTextItems,
|
combineTextItems,
|
||||||
sink,
|
sink,
|
||||||
|
viewBox: this.view,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -2167,6 +2167,7 @@ class PartialEvaluator {
|
|||||||
includeMarkedContent = false,
|
includeMarkedContent = false,
|
||||||
sink,
|
sink,
|
||||||
seenStyles = new Set(),
|
seenStyles = new Set(),
|
||||||
|
viewBox,
|
||||||
}) {
|
}) {
|
||||||
// Ensure that `resources`/`stateManager` is correctly initialized,
|
// Ensure that `resources`/`stateManager` is correctly initialized,
|
||||||
// even if the provided parameter is e.g. `null`.
|
// even if the provided parameter is e.g. `null`.
|
||||||
@ -2393,22 +2394,35 @@ class PartialEvaluator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function compareWithLastPosition() {
|
function compareWithLastPosition() {
|
||||||
|
const currentTransform = getCurrentTextTransform();
|
||||||
|
let posX = currentTransform[4];
|
||||||
|
let posY = currentTransform[5];
|
||||||
|
|
||||||
|
const shiftedX = posX - viewBox[0];
|
||||||
|
const shiftedY = posY - viewBox[1];
|
||||||
|
|
||||||
|
if (
|
||||||
|
shiftedX < 0 ||
|
||||||
|
shiftedX > viewBox[2] ||
|
||||||
|
shiftedY < 0 ||
|
||||||
|
shiftedY > viewBox[3]
|
||||||
|
) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (
|
if (
|
||||||
!combineTextItems ||
|
!combineTextItems ||
|
||||||
!textState.font ||
|
!textState.font ||
|
||||||
!textContentItem.prevTransform
|
!textContentItem.prevTransform
|
||||||
) {
|
) {
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const currentTransform = getCurrentTextTransform();
|
|
||||||
let posX = currentTransform[4];
|
|
||||||
let posY = currentTransform[5];
|
|
||||||
let lastPosX = textContentItem.prevTransform[4];
|
let lastPosX = textContentItem.prevTransform[4];
|
||||||
let lastPosY = textContentItem.prevTransform[5];
|
let lastPosY = textContentItem.prevTransform[5];
|
||||||
|
|
||||||
if (lastPosX === posX && lastPosY === posY) {
|
if (lastPosX === posX && lastPosY === posY) {
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
let rotate = -1;
|
let rotate = -1;
|
||||||
@ -2473,16 +2487,16 @@ class PartialEvaluator {
|
|||||||
0.5 * textContentItem.width /* not the same column */
|
0.5 * textContentItem.width /* not the same column */
|
||||||
) {
|
) {
|
||||||
appendEOL();
|
appendEOL();
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
flushTextContentItem();
|
flushTextContentItem();
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Math.abs(advanceX) > textContentItem.width) {
|
if (Math.abs(advanceX) > textContentItem.width) {
|
||||||
appendEOL();
|
appendEOL();
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
if (advanceY <= textOrientation * textContentItem.trackingSpaceMin) {
|
if (advanceY <= textOrientation * textContentItem.trackingSpaceMin) {
|
||||||
textContentItem.height += advanceY;
|
textContentItem.height += advanceY;
|
||||||
@ -2508,7 +2522,7 @@ class PartialEvaluator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
|
const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
|
||||||
@ -2523,15 +2537,15 @@ class PartialEvaluator {
|
|||||||
0.5 * textContentItem.height /* not the same line */
|
0.5 * textContentItem.height /* not the same line */
|
||||||
) {
|
) {
|
||||||
appendEOL();
|
appendEOL();
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
flushTextContentItem();
|
flushTextContentItem();
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Math.abs(advanceY) > textContentItem.height) {
|
if (Math.abs(advanceY) > textContentItem.height) {
|
||||||
appendEOL();
|
appendEOL();
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (advanceX <= textOrientation * textContentItem.trackingSpaceMin) {
|
if (advanceX <= textOrientation * textContentItem.trackingSpaceMin) {
|
||||||
@ -2553,6 +2567,8 @@ class PartialEvaluator {
|
|||||||
textContentItem.width += advanceX;
|
textContentItem.width += advanceX;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildTextContentItem({ chars, extraSpacing }) {
|
function buildTextContentItem({ chars, extraSpacing }) {
|
||||||
@ -2617,7 +2633,10 @@ class PartialEvaluator {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
compareWithLastPosition();
|
if (!compareWithLastPosition()) {
|
||||||
|
// The glyph is not in page so just skip it.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Must be called after compareWithLastPosition because
|
// Must be called after compareWithLastPosition because
|
||||||
// the textContentItem could have been flushed.
|
// the textContentItem could have been flushed.
|
||||||
@ -3026,6 +3045,7 @@ class PartialEvaluator {
|
|||||||
includeMarkedContent,
|
includeMarkedContent,
|
||||||
sink: sinkWrapper,
|
sink: sinkWrapper,
|
||||||
seenStyles,
|
seenStyles,
|
||||||
|
viewBox,
|
||||||
})
|
})
|
||||||
.then(function () {
|
.then(function () {
|
||||||
if (!sinkWrapper.enqueueInvoked) {
|
if (!sinkWrapper.enqueueInvoked) {
|
||||||
|
1
test/pdfs/bug1755201.pdf.link
Normal file
1
test/pdfs/bug1755201.pdf.link
Normal file
@ -0,0 +1 @@
|
|||||||
|
https://bugzilla.mozilla.org/attachment.cgi?id=9263657
|
@ -1,4 +1,11 @@
|
|||||||
[
|
[
|
||||||
|
{ "id": "bug1755201",
|
||||||
|
"file": "pdfs/bug1755201.pdf",
|
||||||
|
"md5": "cece14097812d8a1f69e86a51e4a3804",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": true,
|
||||||
|
"type": "other"
|
||||||
|
},
|
||||||
{ "id": "filled-background-range",
|
{ "id": "filled-background-range",
|
||||||
"file": "pdfs/filled-background.pdf",
|
"file": "pdfs/filled-background.pdf",
|
||||||
"md5": "2e3120255d9c3e79b96d2543b12d2589",
|
"md5": "2e3120255d9c3e79b96d2543b12d2589",
|
||||||
|
@ -2219,6 +2219,22 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
|||||||
await loadingTask.destroy();
|
await loadingTask.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("gets text content, and check that out-of-page text is not present (bug 1755201)", async function () {
|
||||||
|
if (isNodeJS) {
|
||||||
|
pending("Linked test-cases are not supported in Node.js.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const loadingTask = getDocument(buildGetDocumentParams("bug1755201.pdf"));
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(6);
|
||||||
|
const { items } = await pdfPage.getTextContent();
|
||||||
|
const text = mergeText(items);
|
||||||
|
|
||||||
|
expect(/win aisle/.test(text)).toEqual(false);
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
it("gets empty structure tree", async function () {
|
it("gets empty structure tree", async function () {
|
||||||
const tree = await page.getStructTree();
|
const tree = await page.getStructTree();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user