Merge pull request #15854 from Snuffleupagus/issue-15803
Re-factor searching for incomplete objects in `XRef.indexObjects` (issue 15803)
This commit is contained in:
commit
8aed0c3613
@ -431,16 +431,14 @@ class XRef {
|
|||||||
}
|
}
|
||||||
return skipped;
|
return skipped;
|
||||||
}
|
}
|
||||||
|
const gEndobjRegExp = /\b(endobj|\d+\s+\d+\s+obj|xref|trailer)\b/g;
|
||||||
|
const gStartxrefRegExp = /\b(startxref|\d+\s+\d+\s+obj)\b/g;
|
||||||
const objRegExp = /^(\d+)\s+(\d+)\s+obj\b/;
|
const objRegExp = /^(\d+)\s+(\d+)\s+obj\b/;
|
||||||
const endobjRegExp = /\bendobj[\b\s]$/;
|
|
||||||
const nestedObjRegExp = /\s+(\d+\s+\d+\s+obj[\b\s<])$/;
|
|
||||||
const CHECK_CONTENT_LENGTH = 25;
|
|
||||||
|
|
||||||
const trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);
|
const trailerBytes = new Uint8Array([116, 114, 97, 105, 108, 101, 114]);
|
||||||
const startxrefBytes = new Uint8Array([
|
const startxrefBytes = new Uint8Array([
|
||||||
115, 116, 97, 114, 116, 120, 114, 101, 102,
|
115, 116, 97, 114, 116, 120, 114, 101, 102,
|
||||||
]);
|
]);
|
||||||
const objBytes = new Uint8Array([111, 98, 106]);
|
|
||||||
const xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
|
const xrefBytes = new Uint8Array([47, 88, 82, 101, 102]);
|
||||||
|
|
||||||
// Clear out any existing entries, since they may be bogus.
|
// Clear out any existing entries, since they may be bogus.
|
||||||
@ -450,6 +448,7 @@ class XRef {
|
|||||||
const stream = this.stream;
|
const stream = this.stream;
|
||||||
stream.pos = 0;
|
stream.pos = 0;
|
||||||
const buffer = stream.getBytes(),
|
const buffer = stream.getBytes(),
|
||||||
|
bufferStr = bytesToString(buffer),
|
||||||
length = buffer.length;
|
length = buffer.length;
|
||||||
let position = stream.start;
|
let position = stream.start;
|
||||||
const trailers = [],
|
const trailers = [],
|
||||||
@ -484,8 +483,8 @@ class XRef {
|
|||||||
const num = m[1] | 0,
|
const num = m[1] | 0,
|
||||||
gen = m[2] | 0;
|
gen = m[2] | 0;
|
||||||
|
|
||||||
|
const startPos = position + token.length;
|
||||||
let contentLength,
|
let contentLength,
|
||||||
startPos = position + token.length,
|
|
||||||
updateEntries = false;
|
updateEntries = false;
|
||||||
if (!this.entries[num]) {
|
if (!this.entries[num]) {
|
||||||
updateEntries = true;
|
updateEntries = true;
|
||||||
@ -519,31 +518,22 @@ class XRef {
|
|||||||
// Find the next "obj" string, rather than "endobj", to ensure that
|
// Find the next "obj" string, rather than "endobj", to ensure that
|
||||||
// we won't skip over a new 'obj' operator in corrupt files where
|
// we won't skip over a new 'obj' operator in corrupt files where
|
||||||
// 'endobj' operators are missing (fixes issue9105_reduced.pdf).
|
// 'endobj' operators are missing (fixes issue9105_reduced.pdf).
|
||||||
while (startPos < length) {
|
gEndobjRegExp.lastIndex = startPos;
|
||||||
const endPos = startPos + skipUntil(buffer, startPos, objBytes) + 4;
|
const match = gEndobjRegExp.exec(bufferStr);
|
||||||
|
|
||||||
|
if (match) {
|
||||||
|
const endPos = gEndobjRegExp.lastIndex + 1;
|
||||||
contentLength = endPos - position;
|
contentLength = endPos - position;
|
||||||
|
|
||||||
const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, startPos);
|
if (match[1] !== "endobj") {
|
||||||
const tokenStr = bytesToString(buffer.subarray(checkPos, endPos));
|
warn(
|
||||||
|
`indexObjects: Found "${match[1]}" inside of another "obj", ` +
|
||||||
// Check if the current object ends with an 'endobj' operator.
|
'caused by missing "endobj" -- trying to recover.'
|
||||||
if (endobjRegExp.test(tokenStr)) {
|
);
|
||||||
break;
|
contentLength -= match[1].length + 1;
|
||||||
} else {
|
|
||||||
// Check if an "obj" occurrence is actually a new object,
|
|
||||||
// i.e. the current object is missing the 'endobj' operator.
|
|
||||||
const objToken = nestedObjRegExp.exec(tokenStr);
|
|
||||||
|
|
||||||
if (objToken && objToken[1]) {
|
|
||||||
warn(
|
|
||||||
'indexObjects: Found new "obj" inside of another "obj", ' +
|
|
||||||
'caused by missing "endobj" -- trying to recover.'
|
|
||||||
);
|
|
||||||
contentLength -= objToken[1].length;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
startPos = endPos;
|
} else {
|
||||||
|
contentLength = length - position;
|
||||||
}
|
}
|
||||||
const content = buffer.subarray(position, position + contentLength);
|
const content = buffer.subarray(position, position + contentLength);
|
||||||
|
|
||||||
@ -562,26 +552,26 @@ class XRef {
|
|||||||
) {
|
) {
|
||||||
trailers.push(position);
|
trailers.push(position);
|
||||||
|
|
||||||
const contentLength = skipUntil(buffer, position, startxrefBytes);
|
const startPos = position + token.length;
|
||||||
|
let contentLength;
|
||||||
// Attempt to handle (some) corrupt documents, where no 'startxref'
|
// Attempt to handle (some) corrupt documents, where no 'startxref'
|
||||||
// operators are present (fixes issue15590.pdf).
|
// operators are present (fixes issue15590.pdf).
|
||||||
if (position + contentLength >= length) {
|
gStartxrefRegExp.lastIndex = startPos;
|
||||||
const endPos = position + skipUntil(buffer, position, objBytes) + 4;
|
const match = gStartxrefRegExp.exec(bufferStr);
|
||||||
|
|
||||||
const checkPos = Math.max(endPos - CHECK_CONTENT_LENGTH, position);
|
if (match) {
|
||||||
const tokenStr = bytesToString(buffer.subarray(checkPos, endPos));
|
const endPos = gStartxrefRegExp.lastIndex + 1;
|
||||||
|
contentLength = endPos - position;
|
||||||
|
|
||||||
// Find the first "obj" occurrence after the 'trailer' operator.
|
if (match[1] !== "startxref") {
|
||||||
const objToken = nestedObjRegExp.exec(tokenStr);
|
|
||||||
|
|
||||||
if (objToken && objToken[1]) {
|
|
||||||
warn(
|
warn(
|
||||||
'indexObjects: Found first "obj" after "trailer", ' +
|
`indexObjects: Found "${match[1]}" after "trailer", ` +
|
||||||
'caused by missing "startxref" -- trying to recover.'
|
'caused by missing "startxref" -- trying to recover.'
|
||||||
);
|
);
|
||||||
position = endPos - objToken[1].length;
|
contentLength -= match[1].length + 1;
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
contentLength = length - position;
|
||||||
}
|
}
|
||||||
position += contentLength;
|
position += contentLength;
|
||||||
} else {
|
} else {
|
||||||
|
1
test/pdfs/issue15803.pdf.link
Normal file
1
test/pdfs/issue15803.pdf.link
Normal file
@ -0,0 +1 @@
|
|||||||
|
https://github.com/mozilla/pdf.js/files/10200431/ocg.pdf
|
@ -1761,6 +1761,15 @@
|
|||||||
"link": false,
|
"link": false,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
},
|
},
|
||||||
|
{ "id": "issue15803",
|
||||||
|
"file": "pdfs/issue15803.pdf",
|
||||||
|
"md5": "e501a4418d4ece5be7ce4e8acf029100",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": true,
|
||||||
|
"lastPage": 1,
|
||||||
|
"type": "eq",
|
||||||
|
"annotations": true
|
||||||
|
},
|
||||||
{ "id": "issue9105_other",
|
{ "id": "issue9105_other",
|
||||||
"file": "pdfs/issue9105_other.pdf",
|
"file": "pdfs/issue9105_other.pdf",
|
||||||
"md5": "4c8b9c2cceb9c5d621e1d50b3dc38efc",
|
"md5": "4c8b9c2cceb9c5d621e1d50b3dc38efc",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user