Add a (global) cache to the getCharUnicodeCategory
function
Given that the regular expression has already become more complex (after the initial patch adding it), it seems to me that it probably cannot hurt to add a global cache to reduce unnecessary re-parsing. Obviously the `Glyph`-instances are being cached *per* font, however in most documents multiple fonts are being used and in practice there's very often a fair amount of overlap between the /ToUnicode-data in different fonts[1]. Consider for example loading and rendering the entire `tracemonkey.pdf` document (from the test-suite), which isn't a particularily large document. In that case the `getCharUnicodeCategory` function is being called a total of `601` times, however there's only `106` *unique* unicode-chars being checked. *Please note:* In practice I suppose that this won't have a *huge* effect on overall performance, however given the relative simplicity of this patch I figured that it'd not hurt to submit it for review. --- [1] Consider e.g. how there's usually different fonts used for regular, bold, respectively italic text.
This commit is contained in:
parent
9367d54009
commit
8836593b9e
@ -13,19 +13,6 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import {
|
||||
clearPrimitiveCaches,
|
||||
Dict,
|
||||
isDict,
|
||||
isName,
|
||||
isRef,
|
||||
isRefsEqual,
|
||||
isStream,
|
||||
Name,
|
||||
Ref,
|
||||
RefSet,
|
||||
RefSetCache,
|
||||
} from "./primitives.js";
|
||||
import {
|
||||
collectActions,
|
||||
MissingDataException,
|
||||
@ -48,8 +35,21 @@ import {
|
||||
stringToUTF8String,
|
||||
warn,
|
||||
} from "../shared/util.js";
|
||||
import {
|
||||
Dict,
|
||||
isDict,
|
||||
isName,
|
||||
isRef,
|
||||
isRefsEqual,
|
||||
isStream,
|
||||
Name,
|
||||
Ref,
|
||||
RefSet,
|
||||
RefSetCache,
|
||||
} from "./primitives.js";
|
||||
import { NameTree, NumberTree } from "./name_number_tree.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { clearGlobalCaches } from "./cleanup_helper.js";
|
||||
import { ColorSpace } from "./colorspace.js";
|
||||
import { FileSpec } from "./file_spec.js";
|
||||
import { GlobalImageCache } from "./image_utils.js";
|
||||
@ -1069,7 +1069,7 @@ class Catalog {
|
||||
}
|
||||
|
||||
cleanup(manuallyTriggered = false) {
|
||||
clearPrimitiveCaches();
|
||||
clearGlobalCaches();
|
||||
this.globalImageCache.clear(/* onlyData = */ manuallyTriggered);
|
||||
this.pageKidsCountCache.clear();
|
||||
this.pageIndexCache.clear();
|
||||
|
24
src/core/cleanup_helper.js
Normal file
24
src/core/cleanup_helper.js
Normal file
@ -0,0 +1,24 @@
|
||||
/* Copyright 2022 Mozilla Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { clearPrimitiveCaches } from "./primitives.js";
|
||||
import { clearUnicodeCaches } from "./unicode.js";
|
||||
|
||||
function clearGlobalCaches() {
|
||||
clearPrimitiveCaches();
|
||||
clearUnicodeCaches();
|
||||
}
|
||||
|
||||
export { clearGlobalCaches };
|
@ -35,16 +35,6 @@ import {
|
||||
Util,
|
||||
warn,
|
||||
} from "../shared/util.js";
|
||||
import {
|
||||
clearPrimitiveCaches,
|
||||
Dict,
|
||||
isDict,
|
||||
isName,
|
||||
isRef,
|
||||
isStream,
|
||||
Name,
|
||||
Ref,
|
||||
} from "./primitives.js";
|
||||
import {
|
||||
collectActions,
|
||||
getInheritableProperty,
|
||||
@ -54,12 +44,22 @@ import {
|
||||
XRefEntryException,
|
||||
XRefParseException,
|
||||
} from "./core_utils.js";
|
||||
import {
|
||||
Dict,
|
||||
isDict,
|
||||
isName,
|
||||
isRef,
|
||||
isStream,
|
||||
Name,
|
||||
Ref,
|
||||
} from "./primitives.js";
|
||||
import { getXfaFontDict, getXfaFontName } from "./xfa_fonts.js";
|
||||
import { NullStream, Stream } from "./stream.js";
|
||||
import { AnnotationFactory } from "./annotation.js";
|
||||
import { BaseStream } from "./base_stream.js";
|
||||
import { calculateMD5 } from "./crypto.js";
|
||||
import { Catalog } from "./catalog.js";
|
||||
import { clearGlobalCaches } from "./cleanup_helper.js";
|
||||
import { Linearization } from "./parser.js";
|
||||
import { ObjectLoader } from "./object_loader.js";
|
||||
import { OperatorList } from "./operator_list.js";
|
||||
@ -1449,7 +1449,7 @@ class PDFDocument {
|
||||
async cleanup(manuallyTriggered = false) {
|
||||
return this.catalog
|
||||
? this.catalog.cleanup(manuallyTriggered)
|
||||
: clearPrimitiveCaches();
|
||||
: clearGlobalCaches();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1641,16 +1641,29 @@ function reverseIfRtl(chars) {
|
||||
}
|
||||
|
||||
const SpecialCharRegExp = new RegExp("^(\\s)|(\\p{Mn})|(\\p{Cf})$", "u");
|
||||
const CategoryCache = new Map();
|
||||
|
||||
function getCharUnicodeCategory(char) {
|
||||
const cachedCategory = CategoryCache.get(char);
|
||||
if (cachedCategory) {
|
||||
return cachedCategory;
|
||||
}
|
||||
const groups = char.match(SpecialCharRegExp);
|
||||
return {
|
||||
const category = {
|
||||
isWhitespace: !!(groups && groups[1]),
|
||||
isZeroWidthDiacritic: !!(groups && groups[2]),
|
||||
isInvisibleFormatMark: !!(groups && groups[3]),
|
||||
};
|
||||
CategoryCache.set(char, category);
|
||||
return category;
|
||||
}
|
||||
|
||||
function clearUnicodeCaches() {
|
||||
CategoryCache.clear();
|
||||
}
|
||||
|
||||
export {
|
||||
clearUnicodeCaches,
|
||||
getCharUnicodeCategory,
|
||||
getNormalizedUnicodes,
|
||||
getUnicodeForGlyph,
|
||||
|
@ -32,8 +32,9 @@ import {
|
||||
VerbosityLevel,
|
||||
warn,
|
||||
} from "../shared/util.js";
|
||||
import { clearPrimitiveCaches, Dict, Ref } from "./primitives.js";
|
||||
import { Dict, Ref } from "./primitives.js";
|
||||
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
|
||||
import { clearGlobalCaches } from "./cleanup_helper.js";
|
||||
import { incrementalUpdate } from "./writer.js";
|
||||
import { isNodeJS } from "../shared/is_node.js";
|
||||
import { MessageHandler } from "../shared/message_handler.js";
|
||||
@ -795,7 +796,7 @@ class WorkerMessageHandler {
|
||||
|
||||
pdfManager = null;
|
||||
} else {
|
||||
clearPrimitiveCaches();
|
||||
clearGlobalCaches();
|
||||
}
|
||||
if (cancelXHRs) {
|
||||
cancelXHRs(new AbortException("Worker was terminated."));
|
||||
|
Loading…
Reference in New Issue
Block a user