Improve text-selection for Type3 fonts with bogus /FontBBox-entries (issue 14999)

This extends PR 13461, by also building a fallback bounding box for Type3 fonts that contain a much too small /FontBBox-entry.

*Please note:* While this patch improves things overall, copy-and-pasting still doesn't work perfectly for this document. In particular the lowercase letter "c" cannot be selected/copied, however this can be reproduced in both Adobe Reader and PDFium (in Google Chrome) too, which is caused by a lack of proper /ToUnicode-data in the PDF document.
This commit is contained in:
Jonas Jenwald 2022-07-05 14:08:53 +02:00
parent a1ac1a61b7
commit 79cfc548fc
4 changed files with 21 additions and 5 deletions

View File

@ -4404,8 +4404,10 @@ class TranslatedFont {
const fontResources = this.dict.get("Resources") || resources; const fontResources = this.dict.get("Resources") || resources;
const charProcOperatorList = Object.create(null); const charProcOperatorList = Object.create(null);
const isEmptyBBox = const fontBBox = Util.normalizeRect(translatedFont.bbox || [0, 0, 0, 0]),
!translatedFont.bbox || isArrayEqual(translatedFont.bbox, [0, 0, 0, 0]); width = fontBBox[2] - fontBBox[0],
height = fontBBox[3] - fontBBox[1];
const fontBBoxSize = Math.hypot(width, height);
for (const key of charProcs.getKeys()) { for (const key of charProcs.getKeys()) {
loadCharProcsPromise = loadCharProcsPromise.then(() => { loadCharProcsPromise = loadCharProcsPromise.then(() => {
@ -4426,7 +4428,7 @@ class TranslatedFont {
// colour-related parameters) in the graphics state; // colour-related parameters) in the graphics state;
// any use of such operators shall be ignored." // any use of such operators shall be ignored."
if (operatorList.fnArray[0] === OPS.setCharWidthAndBounds) { if (operatorList.fnArray[0] === OPS.setCharWidthAndBounds) {
this._removeType3ColorOperators(operatorList, isEmptyBBox); this._removeType3ColorOperators(operatorList, fontBBoxSize);
} }
charProcOperatorList[key] = operatorList.getIR(); charProcOperatorList[key] = operatorList.getIR();
@ -4454,7 +4456,7 @@ class TranslatedFont {
/** /**
* @private * @private
*/ */
_removeType3ColorOperators(operatorList, isEmptyBBox = false) { _removeType3ColorOperators(operatorList, fontBBoxSize = NaN) {
if ( if (
typeof PDFJSDev === "undefined" || typeof PDFJSDev === "undefined" ||
PDFJSDev.test("!PRODUCTION || TESTING") PDFJSDev.test("!PRODUCTION || TESTING")
@ -4467,12 +4469,19 @@ class TranslatedFont {
const charBBox = Util.normalizeRect(operatorList.argsArray[0].slice(2)), const charBBox = Util.normalizeRect(operatorList.argsArray[0].slice(2)),
width = charBBox[2] - charBBox[0], width = charBBox[2] - charBBox[0],
height = charBBox[3] - charBBox[1]; height = charBBox[3] - charBBox[1];
const charBBoxSize = Math.hypot(width, height);
if (width === 0 || height === 0) { if (width === 0 || height === 0) {
// Skip the d1 operator when its bounds are bogus (fixes issue14953.pdf). // Skip the d1 operator when its bounds are bogus (fixes issue14953.pdf).
operatorList.fnArray.splice(0, 1); operatorList.fnArray.splice(0, 1);
operatorList.argsArray.splice(0, 1); operatorList.argsArray.splice(0, 1);
} else if (isEmptyBBox) { } else if (
fontBBoxSize === 0 ||
Math.round(charBBoxSize / fontBBoxSize) >= 10
) {
// Override the fontBBox when it's undefined/empty, or when it's at least
// (approximately) one order of magnitude smaller than the charBBox
// (fixes issue14999_reduced.pdf).
if (!this._bbox) { if (!this._bbox) {
this._bbox = [Infinity, Infinity, -Infinity, -Infinity]; this._bbox = [Infinity, Infinity, -Infinity, -Infinity];
} }

View File

@ -121,6 +121,7 @@
!issue13916.pdf !issue13916.pdf
!issue14023.pdf !issue14023.pdf
!issue14438.pdf !issue14438.pdf
!issue14999_reduced.pdf
!bad-PageLabels.pdf !bad-PageLabels.pdf
!decodeACSuccessive.pdf !decodeACSuccessive.pdf
!issue13003.pdf !issue13003.pdf

Binary file not shown.

View File

@ -2848,6 +2848,12 @@
"link": false, "link": false,
"type": "text" "type": "text"
}, },
{ "id": "issue14999",
"file": "pdfs/issue14999_reduced.pdf",
"md5": "a4e664e734f6869aa66245e72d448874",
"rounds": 1,
"type": "text"
},
{ "id": "issue6901-eq", { "id": "issue6901-eq",
"file": "pdfs/issue6901.pdf", "file": "pdfs/issue6901.pdf",
"md5": "1a0604b1a7a3aaf2162b425a9a84230b", "md5": "1a0604b1a7a3aaf2162b425a9a84230b",