Make the find helper function, in src/core/document.js, more efficient by using peekBytes rather reading the stream one byte at a time

*Please note:* A a similar change was attempted in PR 5005, but it was subsequently backed out in PR 5069.

Unfortunately I don't think anyone ever tried to debug *exactly* why it didn't work, since it ought to have worked, and having re-tested this now I'm not able to reproduce the problem any more. However, given just how inefficient the current code is, with thousands of strictly unnecessary function calls for each `find` invocation, I'd really like to try fixing this again.
This commit is contained in:
Jonas Jenwald 2019-07-06 11:34:56 +02:00
parent 5517c94d66
commit bdc31f8b50

View File

@ -15,8 +15,8 @@
/* eslint no-var: error */
import {
assert, FormatError, info, isArrayBuffer, isBool, isNum, isSpace, isString,
OPS, shadow, stringToBytes, stringToPDFString, Util, warn
assert, bytesToString, FormatError, info, isArrayBuffer, isBool, isNum,
isSpace, isString, OPS, shadow, stringToBytes, stringToPDFString, Util, warn
} from '../shared/util';
import { Catalog, ObjectLoader, XRef } from './obj';
import { Dict, isDict, isName, isStream, Ref } from './primitives';
@ -337,20 +337,11 @@ const FINGERPRINT_FIRST_BYTES = 1024;
const EMPTY_FINGERPRINT = '\x00\x00\x00\x00\x00\x00\x00' +
'\x00\x00\x00\x00\x00\x00\x00\x00\x00';
function find(stream, needle, limit, backwards) {
const pos = stream.pos;
const end = stream.end;
if (pos + limit > end) {
limit = end - pos;
}
function find(stream, needle, limit, backwards = false) {
assert(limit > 0, 'The "limit" must be a positive integer.');
const strBuf = [];
for (let i = 0; i < limit; ++i) {
strBuf.push(String.fromCharCode(stream.getByte()));
}
const str = strBuf.join('');
const str = bytesToString(stream.peekBytes(limit));
stream.pos = pos;
const index = backwards ? str.lastIndexOf(needle) : str.indexOf(needle);
if (index === -1) {
return false;