Merge pull request #14564 from calixteman/bug1755201
[api-minor] Don't add in the text content the chars which are out-of-page (bug 1755201)
This commit is contained in:
		
						commit
						263c89581f
					
				@ -471,6 +471,7 @@ class Page {
 | 
				
			|||||||
        includeMarkedContent,
 | 
					        includeMarkedContent,
 | 
				
			||||||
        combineTextItems,
 | 
					        combineTextItems,
 | 
				
			||||||
        sink,
 | 
					        sink,
 | 
				
			||||||
 | 
					        viewBox: this.view,
 | 
				
			||||||
      });
 | 
					      });
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
				
			|||||||
@ -2167,6 +2167,7 @@ class PartialEvaluator {
 | 
				
			|||||||
    includeMarkedContent = false,
 | 
					    includeMarkedContent = false,
 | 
				
			||||||
    sink,
 | 
					    sink,
 | 
				
			||||||
    seenStyles = new Set(),
 | 
					    seenStyles = new Set(),
 | 
				
			||||||
 | 
					    viewBox,
 | 
				
			||||||
  }) {
 | 
					  }) {
 | 
				
			||||||
    // Ensure that `resources`/`stateManager` is correctly initialized,
 | 
					    // Ensure that `resources`/`stateManager` is correctly initialized,
 | 
				
			||||||
    // even if the provided parameter is e.g. `null`.
 | 
					    // even if the provided parameter is e.g. `null`.
 | 
				
			||||||
@ -2393,22 +2394,35 @@ class PartialEvaluator {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    function compareWithLastPosition() {
 | 
					    function compareWithLastPosition() {
 | 
				
			||||||
 | 
					      const currentTransform = getCurrentTextTransform();
 | 
				
			||||||
 | 
					      let posX = currentTransform[4];
 | 
				
			||||||
 | 
					      let posY = currentTransform[5];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      const shiftedX = posX - viewBox[0];
 | 
				
			||||||
 | 
					      const shiftedY = posY - viewBox[1];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if (
 | 
				
			||||||
 | 
					        shiftedX < 0 ||
 | 
				
			||||||
 | 
					        shiftedX > viewBox[2] ||
 | 
				
			||||||
 | 
					        shiftedY < 0 ||
 | 
				
			||||||
 | 
					        shiftedY > viewBox[3]
 | 
				
			||||||
 | 
					      ) {
 | 
				
			||||||
 | 
					        return false;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      if (
 | 
					      if (
 | 
				
			||||||
        !combineTextItems ||
 | 
					        !combineTextItems ||
 | 
				
			||||||
        !textState.font ||
 | 
					        !textState.font ||
 | 
				
			||||||
        !textContentItem.prevTransform
 | 
					        !textContentItem.prevTransform
 | 
				
			||||||
      ) {
 | 
					      ) {
 | 
				
			||||||
        return;
 | 
					        return true;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      const currentTransform = getCurrentTextTransform();
 | 
					 | 
				
			||||||
      let posX = currentTransform[4];
 | 
					 | 
				
			||||||
      let posY = currentTransform[5];
 | 
					 | 
				
			||||||
      let lastPosX = textContentItem.prevTransform[4];
 | 
					      let lastPosX = textContentItem.prevTransform[4];
 | 
				
			||||||
      let lastPosY = textContentItem.prevTransform[5];
 | 
					      let lastPosY = textContentItem.prevTransform[5];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      if (lastPosX === posX && lastPosY === posY) {
 | 
					      if (lastPosX === posX && lastPosY === posY) {
 | 
				
			||||||
        return;
 | 
					        return true;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      let rotate = -1;
 | 
					      let rotate = -1;
 | 
				
			||||||
@ -2473,16 +2487,16 @@ class PartialEvaluator {
 | 
				
			|||||||
            0.5 * textContentItem.width /* not the same column */
 | 
					            0.5 * textContentItem.width /* not the same column */
 | 
				
			||||||
          ) {
 | 
					          ) {
 | 
				
			||||||
            appendEOL();
 | 
					            appendEOL();
 | 
				
			||||||
            return;
 | 
					            return true;
 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
          flushTextContentItem();
 | 
					          flushTextContentItem();
 | 
				
			||||||
          return;
 | 
					          return true;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (Math.abs(advanceX) > textContentItem.width) {
 | 
					        if (Math.abs(advanceX) > textContentItem.width) {
 | 
				
			||||||
          appendEOL();
 | 
					          appendEOL();
 | 
				
			||||||
          return;
 | 
					          return true;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        if (advanceY <= textOrientation * textContentItem.trackingSpaceMin) {
 | 
					        if (advanceY <= textOrientation * textContentItem.trackingSpaceMin) {
 | 
				
			||||||
          textContentItem.height += advanceY;
 | 
					          textContentItem.height += advanceY;
 | 
				
			||||||
@ -2508,7 +2522,7 @@ class PartialEvaluator {
 | 
				
			|||||||
          }
 | 
					          }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return;
 | 
					        return true;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
 | 
					      const advanceX = (posX - lastPosX) / textContentItem.textAdvanceScale;
 | 
				
			||||||
@ -2523,15 +2537,15 @@ class PartialEvaluator {
 | 
				
			|||||||
          0.5 * textContentItem.height /* not the same line */
 | 
					          0.5 * textContentItem.height /* not the same line */
 | 
				
			||||||
        ) {
 | 
					        ) {
 | 
				
			||||||
          appendEOL();
 | 
					          appendEOL();
 | 
				
			||||||
          return;
 | 
					          return true;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        flushTextContentItem();
 | 
					        flushTextContentItem();
 | 
				
			||||||
        return;
 | 
					        return true;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      if (Math.abs(advanceY) > textContentItem.height) {
 | 
					      if (Math.abs(advanceY) > textContentItem.height) {
 | 
				
			||||||
        appendEOL();
 | 
					        appendEOL();
 | 
				
			||||||
        return;
 | 
					        return true;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      if (advanceX <= textOrientation * textContentItem.trackingSpaceMin) {
 | 
					      if (advanceX <= textOrientation * textContentItem.trackingSpaceMin) {
 | 
				
			||||||
@ -2553,6 +2567,8 @@ class PartialEvaluator {
 | 
				
			|||||||
          textContentItem.width += advanceX;
 | 
					          textContentItem.width += advanceX;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      return true;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    function buildTextContentItem({ chars, extraSpacing }) {
 | 
					    function buildTextContentItem({ chars, extraSpacing }) {
 | 
				
			||||||
@ -2617,7 +2633,10 @@ class PartialEvaluator {
 | 
				
			|||||||
          continue;
 | 
					          continue;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        compareWithLastPosition();
 | 
					        if (!compareWithLastPosition()) {
 | 
				
			||||||
 | 
					          // The glyph is not in page so just skip it.
 | 
				
			||||||
 | 
					          continue;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Must be called after compareWithLastPosition because
 | 
					        // Must be called after compareWithLastPosition because
 | 
				
			||||||
        // the textContentItem could have been flushed.
 | 
					        // the textContentItem could have been flushed.
 | 
				
			||||||
@ -3026,6 +3045,7 @@ class PartialEvaluator {
 | 
				
			|||||||
                    includeMarkedContent,
 | 
					                    includeMarkedContent,
 | 
				
			||||||
                    sink: sinkWrapper,
 | 
					                    sink: sinkWrapper,
 | 
				
			||||||
                    seenStyles,
 | 
					                    seenStyles,
 | 
				
			||||||
 | 
					                    viewBox,
 | 
				
			||||||
                  })
 | 
					                  })
 | 
				
			||||||
                  .then(function () {
 | 
					                  .then(function () {
 | 
				
			||||||
                    if (!sinkWrapper.enqueueInvoked) {
 | 
					                    if (!sinkWrapper.enqueueInvoked) {
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										1
									
								
								test/pdfs/bug1755201.pdf.link
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								test/pdfs/bug1755201.pdf.link
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1 @@
 | 
				
			|||||||
 | 
					https://bugzilla.mozilla.org/attachment.cgi?id=9263657
 | 
				
			||||||
@ -1,4 +1,11 @@
 | 
				
			|||||||
[
 | 
					[
 | 
				
			||||||
 | 
					    {  "id": "bug1755201",
 | 
				
			||||||
 | 
					      "file": "pdfs/bug1755201.pdf",
 | 
				
			||||||
 | 
					      "md5": "cece14097812d8a1f69e86a51e4a3804",
 | 
				
			||||||
 | 
					      "rounds": 1,
 | 
				
			||||||
 | 
					      "link": true,
 | 
				
			||||||
 | 
					      "type": "other"
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
    {  "id": "filled-background-range",
 | 
					    {  "id": "filled-background-range",
 | 
				
			||||||
      "file": "pdfs/filled-background.pdf",
 | 
					      "file": "pdfs/filled-background.pdf",
 | 
				
			||||||
      "md5": "2e3120255d9c3e79b96d2543b12d2589",
 | 
					      "md5": "2e3120255d9c3e79b96d2543b12d2589",
 | 
				
			||||||
 | 
				
			|||||||
@ -2219,6 +2219,22 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
 | 
				
			|||||||
      await loadingTask.destroy();
 | 
					      await loadingTask.destroy();
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    it("gets text content, and check that out-of-page text is not present (bug 1755201)", async function () {
 | 
				
			||||||
 | 
					      if (isNodeJS) {
 | 
				
			||||||
 | 
					        pending("Linked test-cases are not supported in Node.js.");
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      const loadingTask = getDocument(buildGetDocumentParams("bug1755201.pdf"));
 | 
				
			||||||
 | 
					      const pdfDoc = await loadingTask.promise;
 | 
				
			||||||
 | 
					      const pdfPage = await pdfDoc.getPage(6);
 | 
				
			||||||
 | 
					      const { items } = await pdfPage.getTextContent();
 | 
				
			||||||
 | 
					      const text = mergeText(items);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      expect(/win aisle/.test(text)).toEqual(false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      await loadingTask.destroy();
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    it("gets empty structure tree", async function () {
 | 
					    it("gets empty structure tree", async function () {
 | 
				
			||||||
      const tree = await page.getStructTree();
 | 
					      const tree = await page.getStructTree();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user