Merge pull request #9827 from Snuffleupagus/misc-corrupt-pdf-fixes
Fix various corrupt PDF files (issue 9252, issue 9418)
This commit is contained in:
commit
98ea39f9d0
@ -855,7 +855,16 @@ var XRef = (function XRefClosure() {
|
|||||||
}
|
}
|
||||||
trailerDict.assignXref(this);
|
trailerDict.assignXref(this);
|
||||||
this.trailer = trailerDict;
|
this.trailer = trailerDict;
|
||||||
var encrypt = trailerDict.get('Encrypt');
|
|
||||||
|
let encrypt;
|
||||||
|
try {
|
||||||
|
encrypt = trailerDict.get('Encrypt');
|
||||||
|
} catch (ex) {
|
||||||
|
if (ex instanceof MissingDataException) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
warn(`XRef.parse - Invalid "Encrypt" reference: "${ex}".`);
|
||||||
|
}
|
||||||
if (isDict(encrypt)) {
|
if (isDict(encrypt)) {
|
||||||
var ids = trailerDict.get('ID');
|
var ids = trailerDict.get('ID');
|
||||||
var fileId = (ids && ids.length) ? ids[0] : '';
|
var fileId = (ids && ids.length) ? ids[0] : '';
|
||||||
@ -868,8 +877,22 @@ var XRef = (function XRefClosure() {
|
|||||||
this.pdfManager.password);
|
this.pdfManager.password);
|
||||||
}
|
}
|
||||||
|
|
||||||
// get the root dictionary (catalog) object
|
// Get the root dictionary (catalog) object, and do some basic validation.
|
||||||
if (!(this.root = trailerDict.get('Root'))) {
|
let root;
|
||||||
|
try {
|
||||||
|
root = trailerDict.get('Root');
|
||||||
|
} catch (ex) {
|
||||||
|
if (ex instanceof MissingDataException) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
warn(`XRef.parse - Invalid "Root" reference: "${ex}".`);
|
||||||
|
}
|
||||||
|
if (isDict(root) && root.has('Pages')) {
|
||||||
|
this.root = root;
|
||||||
|
} else {
|
||||||
|
if (!recoveryMode) {
|
||||||
|
throw new XRefParseException();
|
||||||
|
}
|
||||||
throw new FormatError('Invalid root reference');
|
throw new FormatError('Invalid root reference');
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -1208,7 +1231,7 @@ var XRef = (function XRefClosure() {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
startPos += contentLength;
|
startPos = endPos;
|
||||||
}
|
}
|
||||||
let content = buffer.subarray(position, position + contentLength);
|
let content = buffer.subarray(position, position + contentLength);
|
||||||
|
|
||||||
@ -1237,7 +1260,7 @@ var XRef = (function XRefClosure() {
|
|||||||
this.readXRef(/* recoveryMode */ true);
|
this.readXRef(/* recoveryMode */ true);
|
||||||
}
|
}
|
||||||
// finding main trailer
|
// finding main trailer
|
||||||
var dict;
|
let trailerDict;
|
||||||
for (i = 0, ii = trailers.length; i < ii; ++i) {
|
for (i = 0, ii = trailers.length; i < ii; ++i) {
|
||||||
stream.pos = trailers[i];
|
stream.pos = trailers[i];
|
||||||
var parser = new Parser(new Lexer(stream), /* allowStreams = */ true,
|
var parser = new Parser(new Lexer(stream), /* allowStreams = */ true,
|
||||||
@ -1247,18 +1270,33 @@ var XRef = (function XRefClosure() {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// read the trailer dictionary
|
// read the trailer dictionary
|
||||||
dict = parser.getObj();
|
let dict = parser.getObj();
|
||||||
if (!isDict(dict)) {
|
if (!isDict(dict)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
// Do some basic validation of the trailer/root dictionary candidate.
|
||||||
|
let rootDict;
|
||||||
|
try {
|
||||||
|
rootDict = dict.get('Root');
|
||||||
|
} catch (ex) {
|
||||||
|
if (ex instanceof MissingDataException) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!isDict(rootDict) || !rootDict.has('Pages')) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
// taking the first one with 'ID'
|
// taking the first one with 'ID'
|
||||||
if (dict.has('ID')) {
|
if (dict.has('ID')) {
|
||||||
return dict;
|
return dict;
|
||||||
}
|
}
|
||||||
|
// The current dictionary is a candidate, but continue searching.
|
||||||
|
trailerDict = dict;
|
||||||
}
|
}
|
||||||
// no tailer with 'ID', taking last one (if exists)
|
// No trailer with 'ID', taking last one (if exists).
|
||||||
if (dict) {
|
if (trailerDict) {
|
||||||
return dict;
|
return trailerDict;
|
||||||
}
|
}
|
||||||
// nothing helps
|
// nothing helps
|
||||||
throw new InvalidPDFException('Invalid PDF structure');
|
throw new InvalidPDFException('Invalid PDF structure');
|
||||||
|
@ -18,8 +18,8 @@ import {
|
|||||||
PredictorStream, RunLengthStream
|
PredictorStream, RunLengthStream
|
||||||
} from './stream';
|
} from './stream';
|
||||||
import {
|
import {
|
||||||
assert, FormatError, info, isNum, isString, MissingDataException, StreamType,
|
assert, FormatError, info, isNum, isSpace, isString, MissingDataException,
|
||||||
warn
|
StreamType, warn
|
||||||
} from '../shared/util';
|
} from '../shared/util';
|
||||||
import {
|
import {
|
||||||
Cmd, Dict, EOF, isCmd, isDict, isEOF, isName, Name, Ref
|
Cmd, Dict, EOF, isCmd, isDict, isEOF, isName, Name, Ref
|
||||||
@ -721,7 +721,7 @@ var Lexer = (function LexerClosure() {
|
|||||||
var ch = this.currentChar;
|
var ch = this.currentChar;
|
||||||
var eNotation = false;
|
var eNotation = false;
|
||||||
var divideBy = 0; // different from 0 if it's a floating point value
|
var divideBy = 0; // different from 0 if it's a floating point value
|
||||||
var sign = 1;
|
var sign = 0;
|
||||||
|
|
||||||
if (ch === 0x2D) { // '-'
|
if (ch === 0x2D) { // '-'
|
||||||
sign = -1;
|
sign = -1;
|
||||||
@ -732,10 +732,7 @@ var Lexer = (function LexerClosure() {
|
|||||||
ch = this.nextChar();
|
ch = this.nextChar();
|
||||||
}
|
}
|
||||||
} else if (ch === 0x2B) { // '+'
|
} else if (ch === 0x2B) { // '+'
|
||||||
ch = this.nextChar();
|
sign = 1;
|
||||||
}
|
|
||||||
if (ch === 0x2E) { // '.'
|
|
||||||
divideBy = 10;
|
|
||||||
ch = this.nextChar();
|
ch = this.nextChar();
|
||||||
}
|
}
|
||||||
if (ch === 0x0A || ch === 0x0D) { // LF, CR
|
if (ch === 0x0A || ch === 0x0D) { // LF, CR
|
||||||
@ -744,11 +741,22 @@ var Lexer = (function LexerClosure() {
|
|||||||
ch = this.nextChar();
|
ch = this.nextChar();
|
||||||
} while (ch === 0x0A || ch === 0x0D);
|
} while (ch === 0x0A || ch === 0x0D);
|
||||||
}
|
}
|
||||||
|
if (ch === 0x2E) { // '.'
|
||||||
|
divideBy = 10;
|
||||||
|
ch = this.nextChar();
|
||||||
|
}
|
||||||
if (ch < 0x30 || ch > 0x39) { // '0' - '9'
|
if (ch < 0x30 || ch > 0x39) { // '0' - '9'
|
||||||
|
if (divideBy === 10 && sign === 0 &&
|
||||||
|
(isSpace(ch) || ch === /* EOF = */ -1)) {
|
||||||
|
// This is consistent with Adobe Reader (fixes issue9252.pdf).
|
||||||
|
warn('Lexer.getNumber - treating a single decimal point as zero.');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
throw new FormatError(
|
throw new FormatError(
|
||||||
`Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`);
|
`Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sign = sign || 1;
|
||||||
var baseValue = ch - 0x30; // '0'
|
var baseValue = ch - 0x30; // '0'
|
||||||
var powerValue = 0;
|
var powerValue = 0;
|
||||||
var powerValueSign = 1;
|
var powerValueSign = 1;
|
||||||
|
2
test/pdfs/.gitignore
vendored
2
test/pdfs/.gitignore
vendored
@ -65,8 +65,10 @@
|
|||||||
!issue8823.pdf
|
!issue8823.pdf
|
||||||
!issue9084.pdf
|
!issue9084.pdf
|
||||||
!issue9105_reduced.pdf
|
!issue9105_reduced.pdf
|
||||||
|
!issue9252.pdf
|
||||||
!issue9262_reduced.pdf
|
!issue9262_reduced.pdf
|
||||||
!issue9291.pdf
|
!issue9291.pdf
|
||||||
|
!issue9418.pdf
|
||||||
!issue9458.pdf
|
!issue9458.pdf
|
||||||
!bad-PageLabels.pdf
|
!bad-PageLabels.pdf
|
||||||
!decodeACSuccessive.pdf
|
!decodeACSuccessive.pdf
|
||||||
|
90
test/pdfs/issue9252.pdf
Normal file
90
test/pdfs/issue9252.pdf
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
%PDF-1.4
|
||||||
|
1 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Catalog
|
||||||
|
/Version /1.4
|
||||||
|
/Pages 5 0 R
|
||||||
|
/Outlines 3 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
2 0 obj
|
||||||
|
<<
|
||||||
|
/Title (Test)
|
||||||
|
/Author (Test)
|
||||||
|
/Creator (sharpPDF)
|
||||||
|
/CreationDate (2017124)
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
3 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Outlines
|
||||||
|
/Count 0
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
4 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Font
|
||||||
|
/Subtype /Type1
|
||||||
|
/Name /F1
|
||||||
|
/BaseFont /Helvetica
|
||||||
|
/Encoding /WinAnsiEncoding
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
5 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Pages
|
||||||
|
/Count 1
|
||||||
|
/Kids [6 0 R ]
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
6 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Page
|
||||||
|
/Parent 5 0 R
|
||||||
|
/Resources <</Font <</F1 4 0 R >>
|
||||||
|
>>
|
||||||
|
/MediaBox [0 0 612 792]
|
||||||
|
/CropBox [0 0 612 792]
|
||||||
|
/Rotate 0
|
||||||
|
/ProcSet [/PDF /Text /ImageC]
|
||||||
|
/Contents [7 0 R ]
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
7 0 obj
|
||||||
|
<<
|
||||||
|
/Filter [/ASCIIHexDecode]
|
||||||
|
/Length 105
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
710A42540A2F46312031352054660A2E202E3539202E38342072670A3530203735302054640A28546573742920546A0A45540A51>
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
0 19
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000010 00000 n
|
||||||
|
0000000098 00000 n
|
||||||
|
0000000202 00000 n
|
||||||
|
0000000254 00000 n
|
||||||
|
0000000370 00000 n
|
||||||
|
0000000491 00000 n
|
||||||
|
0000000615 00000 n
|
||||||
|
0000000743 00000 n
|
||||||
|
0000000857 00000 n
|
||||||
|
0000000976 00000 n
|
||||||
|
0000001099 00000 n
|
||||||
|
0000001226 00000 n
|
||||||
|
0000001345 00000 n
|
||||||
|
0000001464 00000 n
|
||||||
|
0000001585 00000 n
|
||||||
|
0000001710 00000 n
|
||||||
|
0000001777 00000 n
|
||||||
|
0000002102 00000 n
|
||||||
|
trailer
|
||||||
|
<<
|
||||||
|
/Size 102
|
||||||
|
/Root 1 0 R
|
||||||
|
/Info 2 0 R
|
||||||
|
>>
|
||||||
|
startxref
|
||||||
|
36673
|
||||||
|
%%EOF
|
BIN
test/pdfs/issue9418.pdf
Normal file
BIN
test/pdfs/issue9418.pdf
Normal file
Binary file not shown.
@ -741,6 +741,20 @@
|
|||||||
"lastPage": 1,
|
"lastPage": 1,
|
||||||
"type": "eq"
|
"type": "eq"
|
||||||
},
|
},
|
||||||
|
{ "id": "issue9252",
|
||||||
|
"file": "pdfs/issue9252.pdf",
|
||||||
|
"md5": "c7d039d808d9344a95d2c9cfa7586ca3",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": false,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
|
{ "id": "issue9418",
|
||||||
|
"file": "pdfs/issue9418.pdf",
|
||||||
|
"md5": "32ecad8098acb1938539d47944ecb54b",
|
||||||
|
"rounds": 1,
|
||||||
|
"link": false,
|
||||||
|
"type": "eq"
|
||||||
|
},
|
||||||
{ "id": "issue9262",
|
{ "id": "issue9262",
|
||||||
"file": "pdfs/issue9262_reduced.pdf",
|
"file": "pdfs/issue9262_reduced.pdf",
|
||||||
"md5": "5347ce2d7b3866625c22e115fd90e0de",
|
"md5": "5347ce2d7b3866625c22e115fd90e0de",
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { Lexer, Linearization } from '../../src/core/parser';
|
import { Lexer, Linearization } from '../../src/core/parser';
|
||||||
|
import { FormatError } from '../../src/shared/util';
|
||||||
import { Name } from '../../src/core/primitives';
|
import { Name } from '../../src/core/primitives';
|
||||||
import { StringStream } from '../../src/core/stream';
|
import { StringStream } from '../../src/core/stream';
|
||||||
|
|
||||||
@ -58,11 +59,32 @@ describe('parser', function() {
|
|||||||
|
|
||||||
it('should ignore line-breaks between operator and digit in number',
|
it('should ignore line-breaks between operator and digit in number',
|
||||||
function() {
|
function() {
|
||||||
var input = new StringStream('-\r\n205.88');
|
let minusInput = new StringStream('-\r\n205.88');
|
||||||
var lexer = new Lexer(input);
|
let minusLexer = new Lexer(minusInput);
|
||||||
var result = lexer.getNumber();
|
|
||||||
|
|
||||||
expect(result).toEqual(-205.88);
|
expect(minusLexer.getNumber()).toEqual(-205.88);
|
||||||
|
|
||||||
|
let plusInput = new StringStream('+\r\n205.88');
|
||||||
|
let plusLexer = new Lexer(plusInput);
|
||||||
|
|
||||||
|
expect(plusLexer.getNumber()).toEqual(205.88);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should treat a single decimal point as zero', function() {
|
||||||
|
let input = new StringStream('.');
|
||||||
|
let lexer = new Lexer(input);
|
||||||
|
|
||||||
|
expect(lexer.getNumber()).toEqual(0);
|
||||||
|
|
||||||
|
let numbers = ['..', '-.', '+.', '-\r\n.', '+\r\n.'];
|
||||||
|
for (let number of numbers) {
|
||||||
|
let input = new StringStream(number);
|
||||||
|
let lexer = new Lexer(input);
|
||||||
|
|
||||||
|
expect(function() {
|
||||||
|
return lexer.getNumber();
|
||||||
|
}).toThrowError(FormatError, /^Invalid number:\s/);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
it('should handle glued numbers and operators', function() {
|
it('should handle glued numbers and operators', function() {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user