Merge pull request #913 from notmasteryet/issue-863

Bad pdf with scanned image
This commit is contained in:
Artur Adib 2011-12-09 07:29:02 -08:00
commit d65c38c995
5 changed files with 24 additions and 8 deletions

View File

@ -392,12 +392,17 @@ var PDFDocModel = (function pdfDoc() {
if (find(stream, 'endobj', 1024)) if (find(stream, 'endobj', 1024))
startXRef = stream.pos + 6; startXRef = stream.pos + 6;
} else { } else {
// Find startxref at the end of the file. // Find startxref by jumping backward from the end of the file.
var start = stream.end - 1024; var step = 1024;
if (start < 0) var found = false, pos = stream.end;
start = 0; while (!found && pos > 0) {
stream.pos = start; pos -= step - 'startxref'.length;
if (find(stream, 'startxref', 1024, true)) { if (pos < 0)
pos = 0;
stream.pos = pos;
found = find(stream, 'startxref', step, true);
}
if (found) {
stream.skip(9); stream.skip(9);
var ch; var ch;
do { do {

View File

@ -17,4 +17,5 @@
!devicen.pdf !devicen.pdf
!cmykjpeg.pdf !cmykjpeg.pdf
!issue840.pdf !issue840.pdf
!scan-bad.pdf
!freeculture.pdf !freeculture.pdf

BIN
test/pdfs/scan-bad.pdf Executable file

Binary file not shown.

View File

@ -12,6 +12,7 @@ DOC_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__),".."))
ANAL = True ANAL = True
DEFAULT_MANIFEST_FILE = 'test_manifest.json' DEFAULT_MANIFEST_FILE = 'test_manifest.json'
EQLOG_FILE = 'eq.log' EQLOG_FILE = 'eq.log'
BROWSERLOG_FILE = 'browser.log'
REFDIR = 'ref' REFDIR = 'ref'
TMPDIR = 'tmp' TMPDIR = 'tmp'
VERBOSE = False VERBOSE = False
@ -229,6 +230,7 @@ class BaseBrowserCommand(object):
def setup(self): def setup(self):
self.tempDir = tempfile.mkdtemp() self.tempDir = tempfile.mkdtemp()
self.profileDir = os.path.join(self.tempDir, "profile") self.profileDir = os.path.join(self.tempDir, "profile")
self.browserLog = open(BROWSERLOG_FILE, "w")
def teardown(self): def teardown(self):
# If the browser is still running, wait up to ten seconds for it to quit # If the browser is still running, wait up to ten seconds for it to quit
@ -245,6 +247,8 @@ class BaseBrowserCommand(object):
if self.tempDir is not None and os.path.exists(self.tempDir): if self.tempDir is not None and os.path.exists(self.tempDir):
shutil.rmtree(self.tempDir) shutil.rmtree(self.tempDir)
self.browserLog.close()
def start(self, url): def start(self, url):
raise Exception("Can't start BaseBrowserCommand") raise Exception("Can't start BaseBrowserCommand")
@ -262,7 +266,7 @@ class FirefoxBrowserCommand(BaseBrowserCommand):
if platform.system() == "Darwin": if platform.system() == "Darwin":
cmds.append("-foreground") cmds.append("-foreground")
cmds.extend(["-no-remote", "-profile", self.profileDir, url]) cmds.extend(["-no-remote", "-profile", self.profileDir, url])
self.process = subprocess.Popen(cmds) self.process = subprocess.Popen(cmds, stdout = self.browserLog, stderr = self.browserLog)
class ChromeBrowserCommand(BaseBrowserCommand): class ChromeBrowserCommand(BaseBrowserCommand):
def _fixupMacPath(self): def _fixupMacPath(self):
@ -272,7 +276,7 @@ class ChromeBrowserCommand(BaseBrowserCommand):
cmds = [self.path] cmds = [self.path]
cmds.extend(["--user-data-dir=%s" % self.profileDir, cmds.extend(["--user-data-dir=%s" % self.profileDir,
"--no-first-run", "--disable-sync", url]) "--no-first-run", "--disable-sync", url])
self.process = subprocess.Popen(cmds) self.process = subprocess.Popen(cmds, stdout = self.browserLog, stderr = self.browserLog)
def makeBrowserCommand(browser): def makeBrowserCommand(browser):
path = browser["path"].lower() path = browser["path"].lower()

View File

@ -227,6 +227,12 @@
"rounds": 1, "rounds": 1,
"type": "load" "type": "load"
}, },
{ "id": "scan-bad",
"file": "pdfs/scan-bad.pdf",
"md5": "4cf988f01ab83f61aca57f406dfd6584",
"rounds": 1,
"type": "load"
},
{ "id": "ibwa-bad", { "id": "ibwa-bad",
"file": "pdfs/ibwa-bad.pdf", "file": "pdfs/ibwa-bad.pdf",
"md5": "6ca059d32b74ac2688ae06f727fee755", "md5": "6ca059d32b74ac2688ae06f727fee755",