diff --git a/examples/node/pdf2png/pdf2png.js b/examples/node/pdf2png/pdf2png.js index aa61ef57e..4165533a6 100644 --- a/examples/node/pdf2png/pdf2png.js +++ b/examples/node/pdf2png/pdf2png.js @@ -54,6 +54,10 @@ const pdfjsLib = require("pdfjs-dist/legacy/build/pdf.js"); const CMAP_URL = "../../../node_modules/pdfjs-dist/cmaps/"; const CMAP_PACKED = true; +// Where the standard fonts are located. +const STANDARD_FONT_DATA_URL = + "../../../node_modules/pdfjs-dist/standard_fonts/"; + // Loading file from file system into typed array. const pdfPath = process.argv[2] || "../../../web/compressed.tracemonkey-pldi-09.pdf"; @@ -64,6 +68,7 @@ const loadingTask = pdfjsLib.getDocument({ data, cMapUrl: CMAP_URL, cMapPacked: CMAP_PACKED, + standardFontDataUrl: STANDARD_FONT_DATA_URL, }); loadingTask.promise .then(function (pdfDocument) { diff --git a/external/standard_fonts/FoxitDingbats.pfb b/external/standard_fonts/FoxitDingbats.pfb new file mode 100644 index 000000000..30d52963e Binary files /dev/null and b/external/standard_fonts/FoxitDingbats.pfb differ diff --git a/external/standard_fonts/FoxitFixed.pfb b/external/standard_fonts/FoxitFixed.pfb new file mode 100644 index 000000000..f12dcbce5 Binary files /dev/null and b/external/standard_fonts/FoxitFixed.pfb differ diff --git a/external/standard_fonts/FoxitFixedBold.pfb b/external/standard_fonts/FoxitFixedBold.pfb new file mode 100644 index 000000000..cf8e24aee Binary files /dev/null and b/external/standard_fonts/FoxitFixedBold.pfb differ diff --git a/external/standard_fonts/FoxitFixedBoldItalic.pfb b/external/standard_fonts/FoxitFixedBoldItalic.pfb new file mode 100644 index 000000000..d2880017c Binary files /dev/null and b/external/standard_fonts/FoxitFixedBoldItalic.pfb differ diff --git a/external/standard_fonts/FoxitFixedItalic.pfb b/external/standard_fonts/FoxitFixedItalic.pfb new file mode 100644 index 000000000..d71697d4b Binary files /dev/null and b/external/standard_fonts/FoxitFixedItalic.pfb differ diff --git a/external/standard_fonts/FoxitSans.pfb b/external/standard_fonts/FoxitSans.pfb new file mode 100644 index 000000000..37f244bd9 Binary files /dev/null and b/external/standard_fonts/FoxitSans.pfb differ diff --git a/external/standard_fonts/FoxitSansBold.pfb b/external/standard_fonts/FoxitSansBold.pfb new file mode 100644 index 000000000..affcf316d Binary files /dev/null and b/external/standard_fonts/FoxitSansBold.pfb differ diff --git a/external/standard_fonts/FoxitSansBoldItalic.pfb b/external/standard_fonts/FoxitSansBoldItalic.pfb new file mode 100644 index 000000000..e1f60b754 Binary files /dev/null and b/external/standard_fonts/FoxitSansBoldItalic.pfb differ diff --git a/external/standard_fonts/FoxitSansItalic.pfb b/external/standard_fonts/FoxitSansItalic.pfb new file mode 100644 index 000000000..c04b0a5ae Binary files /dev/null and b/external/standard_fonts/FoxitSansItalic.pfb differ diff --git a/external/standard_fonts/FoxitSerif.pfb b/external/standard_fonts/FoxitSerif.pfb new file mode 100644 index 000000000..3fa682efb Binary files /dev/null and b/external/standard_fonts/FoxitSerif.pfb differ diff --git a/external/standard_fonts/FoxitSerifBold.pfb b/external/standard_fonts/FoxitSerifBold.pfb new file mode 100644 index 000000000..ff7c6ddec Binary files /dev/null and b/external/standard_fonts/FoxitSerifBold.pfb differ diff --git a/external/standard_fonts/FoxitSerifBoldItalic.pfb b/external/standard_fonts/FoxitSerifBoldItalic.pfb new file mode 100644 index 000000000..460231fb8 Binary files /dev/null and b/external/standard_fonts/FoxitSerifBoldItalic.pfb differ diff --git a/external/standard_fonts/FoxitSerifItalic.pfb b/external/standard_fonts/FoxitSerifItalic.pfb new file mode 100644 index 000000000..d03a7c781 Binary files /dev/null and b/external/standard_fonts/FoxitSerifItalic.pfb differ diff --git a/external/standard_fonts/FoxitSymbol.pfb b/external/standard_fonts/FoxitSymbol.pfb new file mode 100644 index 000000000..c8f9bca78 Binary files /dev/null and b/external/standard_fonts/FoxitSymbol.pfb differ diff --git a/external/standard_fonts/LICENSE b/external/standard_fonts/LICENSE new file mode 100644 index 000000000..8b4ed6ddd --- /dev/null +++ b/external/standard_fonts/LICENSE @@ -0,0 +1,27 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/external/standard_fonts/README.md b/external/standard_fonts/README.md new file mode 100644 index 000000000..332ce47a6 --- /dev/null +++ b/external/standard_fonts/README.md @@ -0,0 +1,11 @@ +The files in this directory were extracted from Pdfium + +Original copyright notice: + +``` +Copyright 2014 PDFium Authors. All rights reserved. + Use of this source code is governed by a BSD-style license that can be + found in the LICENSE file. + +Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com +``` \ No newline at end of file diff --git a/gulpfile.js b/gulpfile.js index 7b8b46727..d49e5fcfc 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -812,6 +812,14 @@ function buildGeneric(defines, dir) { base: "external/bcmaps", }) .pipe(gulp.dest(dir + "web/cmaps")), + gulp + .src( + ["external/standard_fonts/*.pfb", "external/standard_fonts/LICENSE"], + { + base: "external/standard_fonts", + } + ) + .pipe(gulp.dest(dir + "web/standard_fonts")), preprocessHTML("web/viewer.html", defines).pipe(gulp.dest(dir + "web")), preprocessCSS("web/viewer.css", "generic", defines, true) .pipe(postcss([calc(), autoprefixer(AUTOPREFIXER_CONFIG)])) @@ -980,6 +988,14 @@ function buildMinified(defines, dir) { base: "external/bcmaps", }) .pipe(gulp.dest(dir + "web/cmaps")), + gulp + .src( + ["external/standard_fonts/*.pfb", "external/standard_fonts/LICENSE"], + { + base: "external/standard_fonts", + } + ) + .pipe(gulp.dest(dir + "web/standard_fonts")), preprocessHTML("web/viewer.html", defines).pipe(gulp.dest(dir + "web")), preprocessCSS("web/viewer.css", "minified", defines, true) @@ -1214,7 +1230,17 @@ gulp.task( base: "external/bcmaps", }) .pipe(gulp.dest(MOZCENTRAL_CONTENT_DIR + "web/cmaps")), - + gulp + .src( + [ + "external/standard_fonts/*.pfb", + "external/standard_fonts/LICENSE", + ], + { + base: "external/standard_fonts", + } + ) + .pipe(gulp.dest(MOZCENTRAL_CONTENT_DIR + "web/standard_fonts")), preprocessHTML("web/viewer.html", defines).pipe( gulp.dest(MOZCENTRAL_CONTENT_DIR + "web") ), @@ -1305,6 +1331,17 @@ gulp.task( base: "external/bcmaps", }) .pipe(gulp.dest(CHROME_BUILD_CONTENT_DIR + "web/cmaps")), + gulp + .src( + [ + "external/standard_fonts/*.pfb", + "external/standard_fonts/LICENSE", + ], + { + base: "external/standard_fonts", + } + ) + .pipe(gulp.dest(CHROME_BUILD_CONTENT_DIR + "web/standard_fonts")), preprocessHTML("web/viewer.html", defines).pipe( gulp.dest(CHROME_BUILD_CONTENT_DIR + "web") @@ -2051,6 +2088,11 @@ gulp.task( gulp .src(GENERIC_DIR + "web/cmaps/**/*", { base: GENERIC_DIR + "web" }) .pipe(gulp.dest(DIST_DIR)), + gulp + .src(GENERIC_DIR + "web/standard_fonts/**/*", { + base: GENERIC_DIR + "web", + }) + .pipe(gulp.dest(DIST_DIR)), gulp .src([ GENERIC_DIR + "build/{pdf,pdf.worker,pdf.sandbox}.js", diff --git a/src/core/cff_font.js b/src/core/cff_font.js index e7caa8afa..33805586a 100644 --- a/src/core/cff_font.js +++ b/src/core/cff_font.js @@ -74,7 +74,10 @@ class CFFFont { return charCodeToGlyphId; } - const encoding = cff.encoding ? cff.encoding.encoding : null; + let encoding = cff.encoding ? cff.encoding.encoding : null; + if (properties.isInternalFont) { + encoding = properties.defaultEncoding; + } charCodeToGlyphId = type1FontGlyphMapping(properties, encoding, charsets); return charCodeToGlyphId; } diff --git a/src/core/evaluator.js b/src/core/evaluator.js index e7705418f..1b1dc1b73 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -64,7 +64,9 @@ import { } from "./unicode.js"; import { getSerifFonts, + getStandardFontName, getStdFontMap, + getStdFontNameToFileMap, getSymbolsFonts, } from "./standard_fonts.js"; import { getTilingPatternIR, Pattern } from "./pattern.js"; @@ -77,6 +79,7 @@ import { LocalImageCache, LocalTilingPatternCache, } from "./image_utils.js"; +import { NullStream, Stream } from "./stream.js"; import { bidi } from "./bidi.js"; import { ColorSpace } from "./colorspace.js"; import { DecodeStream } from "./decode_stream.js"; @@ -84,7 +87,6 @@ import { getGlyphsUnicode } from "./glyphlist.js"; import { getLookupTableFactory } from "./core_utils.js"; import { getMetrics } from "./metrics.js"; import { MurmurHash3_64 } from "./murmurhash3.js"; -import { NullStream } from "./stream.js"; import { OperatorList } from "./operator_list.js"; import { PDFImage } from "./image.js"; @@ -94,6 +96,8 @@ const DefaultPartialEvaluatorOptions = Object.freeze({ ignoreErrors: false, isEvalSupported: true, fontExtraProperties: false, + standardFontDataUrl: null, + useSystemFonts: true, }); const PatternType = { @@ -381,6 +385,43 @@ class PartialEvaluator { return data; } + async fetchStandardFontData(name) { + // The symbol fonts are not consistent across platforms, always load the + // font data for them. + if ( + this.options.useSystemFonts && + name !== "Symbol" && + name !== "ZapfDingbats" + ) { + return null; + } + const standardFontNameToFileName = getStdFontNameToFileMap(); + const filename = standardFontNameToFileName[name]; + if (this.options.standardFontDataUrl !== null) { + const url = `${this.options.standardFontDataUrl}${filename}.pfb`; + const response = await fetch(url); + if (!response.ok) { + warn( + `fetchStandardFontData failed to fetch file "${url}" with "${response.statusText}".` + ); + return null; + } + return new Stream(await response.arrayBuffer()); + } + // Get the data on the main thread instead. + try { + const data = await this.handler.sendWithPromise("FetchStandardFontData", { + filename, + }); + return new Stream(data); + } catch (e) { + warn( + `fetchStandardFontData failed to fetch file "${filename}" with "${e}".` + ); + } + return null; + } + async buildFormXObject( resources, xobj, @@ -3725,6 +3766,7 @@ class PartialEvaluator { properties = { type, name: baseFontName, + loadedName: baseDict.loadedName, widths: metrics.widths, defaultWidth: metrics.defaultWidth, flags, @@ -3734,6 +3776,13 @@ class PartialEvaluator { isType3Font, }; const widths = dict.get("Widths"); + const standardFontName = getStandardFontName(baseFontName); + let file = null; + if (standardFontName) { + properties.isStandardFont = true; + file = await this.fetchStandardFontData(standardFontName); + properties.isInternalFont = !!file; + } return this.extractDataStructures(dict, dict, properties).then( newProperties => { if (widths) { @@ -3749,7 +3798,7 @@ class PartialEvaluator { newProperties ); } - return new Font(baseFontName, null, newProperties); + return new Font(baseFontName, file, newProperties); } ); } @@ -3802,6 +3851,8 @@ class PartialEvaluator { warn(`translateFont - fetching "${fontName.name}" font file: "${ex}".`); fontFile = new NullStream(); } + let isStandardFont = false; + let isInternalFont = false; if (fontFile) { if (fontFile.dict) { const subtypeEntry = fontFile.dict.get("Subtype"); @@ -3812,6 +3863,13 @@ class PartialEvaluator { length2 = fontFile.dict.get("Length2"); length3 = fontFile.dict.get("Length3"); } + } else if (type === "Type1") { + const standardFontName = getStandardFontName(fontName.name); + if (standardFontName) { + isStandardFont = true; + fontFile = await this.fetchStandardFontData(standardFontName); + isInternalFont = !!fontFile; + } } properties = { @@ -3822,6 +3880,8 @@ class PartialEvaluator { length1, length2, length3, + isStandardFont, + isInternalFont, loadedName: baseDict.loadedName, composite, fixedPitch: false, diff --git a/src/core/fonts.js b/src/core/fonts.js index 874f903a8..ebf84af21 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -30,6 +30,7 @@ import { FontFlags, getFontType, MacStandardGlyphOrdering, + normalizeFontName, recoverGlyphName, SEAC_ANALYSIS_ENABLED, } from "./fonts_utils.js"; @@ -130,6 +131,9 @@ function adjustWidths(properties) { } function adjustToUnicode(properties, builtInEncoding) { + if (properties.isInternalFont) { + return; + } if (properties.hasIncludedToUnicodeMap) { return; // The font dictionary has a `ToUnicode` entry. } @@ -932,7 +936,7 @@ class Font { } this.data = data; - this.fontType = getFontType(type, subtype); + this.fontType = getFontType(type, subtype, properties.isStandardFont); // Transfer some properties again that could change during font conversion this.fontMatrix = properties.fontMatrix; @@ -971,7 +975,7 @@ class Font { const name = this.name; const type = this.type; const subtype = this.subtype; - let fontName = name.replace(/[,_]/g, "-").replace(/\s/g, ""); + let fontName = normalizeFontName(name); const stdFontMap = getStdFontMap(), nonStdFontMap = getNonStdFontMap(); const isStandardFont = !!stdFontMap[fontName]; @@ -1090,7 +1094,7 @@ class Font { this.toFontChar = map; } this.loadedName = fontName.split("-")[0]; - this.fontType = getFontType(type, subtype); + this.fontType = getFontType(type, subtype, properties.isStandardFont); } checkAndRepair(name, font, properties) { diff --git a/src/core/fonts_utils.js b/src/core/fonts_utils.js index c4b3f3808..b67dfe01d 100644 --- a/src/core/fonts_utils.js +++ b/src/core/fonts_utils.js @@ -78,9 +78,12 @@ const MacStandardGlyphOrdering = [ "threequarters", "franc", "Gbreve", "gbreve", "Idotaccent", "Scedilla", "scedilla", "Cacute", "cacute", "Ccaron", "ccaron", "dcroat"]; -function getFontType(type, subtype) { +function getFontType(type, subtype, isStandardFont = false) { switch (type) { case "Type1": + if (isStandardFont) { + return FontType.TYPE1STANDARD; + } return subtype === "Type1C" ? FontType.TYPE1C : FontType.TYPE1; case "CIDFontType0": return subtype === "CIDFontType0C" @@ -135,7 +138,17 @@ function type1FontGlyphMapping(properties, builtInEncoding, glyphNames) { let glyphId, charCode, baseEncoding; const isSymbolicFont = !!(properties.flags & FontFlags.Symbolic); - if (properties.baseEncodingName) { + if (properties.isInternalFont) { + baseEncoding = builtInEncoding; + for (charCode = 0; charCode < baseEncoding.length; charCode++) { + glyphId = glyphNames.indexOf(baseEncoding[charCode]); + if (glyphId >= 0) { + charCodeToGlyphId[charCode] = glyphId; + } else { + charCodeToGlyphId[charCode] = 0; // notdef + } + } + } else if (properties.baseEncodingName) { // If a valid base encoding name was used, the mapping is initialized with // that. baseEncoding = getEncoding(properties.baseEncodingName); @@ -193,10 +206,15 @@ function type1FontGlyphMapping(properties, builtInEncoding, glyphNames) { return charCodeToGlyphId; } +function normalizeFontName(name) { + return name.replace(/[,_]/g, "-").replace(/\s/g, ""); +} + export { FontFlags, getFontType, MacStandardGlyphOrdering, + normalizeFontName, recoverGlyphName, SEAC_ANALYSIS_ENABLED, type1FontGlyphMapping, diff --git a/src/core/standard_fonts.js b/src/core/standard_fonts.js index 65892dd14..fb5f843d0 100644 --- a/src/core/standard_fonts.js +++ b/src/core/standard_fonts.js @@ -14,12 +14,30 @@ */ import { getLookupTableFactory } from "./core_utils.js"; +import { normalizeFontName } from "./fonts_utils.js"; /** * Hold a map of decoded fonts and of the standard fourteen Type1 * fonts and their acronyms. */ const getStdFontMap = getLookupTableFactory(function (t) { + // The standard 14 fonts: + t["Times-Roman"] = "Times-Roman"; + t.Helvetica = "Helvetica"; + t.Courier = "Courier"; + t.Symbol = "Symbol"; + t["Times-Bold"] = "Times-Bold"; + t["Helvetica-Bold"] = "Helvetica-Bold"; + t["Courier-Bold"] = "Courier-Bold"; + t.ZapfDingbats = "ZapfDingbats"; + t["Times-Italic"] = "Times-Italic"; + t["Helvetica-Oblique"] = "Helvetica-Oblique"; + t["Courier-Oblique"] = "Courier-Oblique"; + t["Times-BoldItalic"] = "Times-BoldItalic"; + t["Helvetica-BoldOblique"] = "Helvetica-BoldOblique"; + t["Courier-BoldOblique"] = "Courier-BoldOblique"; + + // Extra mappings t.ArialNarrow = "Helvetica"; t["ArialNarrow-Bold"] = "Helvetica-Bold"; t["ArialNarrow-BoldItalic"] = "Helvetica-BoldOblique"; @@ -40,7 +58,6 @@ const getStdFontMap = getLookupTableFactory(function (t) { t["Arial-BoldMT"] = "Helvetica-Bold"; t["Arial-ItalicMT"] = "Helvetica-Oblique"; t.ArialMT = "Helvetica"; - t["Courier-Bold"] = "Courier-Bold"; t["Courier-BoldItalic"] = "Courier-BoldOblique"; t["Courier-Italic"] = "Courier-Oblique"; t.CourierNew = "Courier"; @@ -51,12 +68,8 @@ const getStdFontMap = getLookupTableFactory(function (t) { t["CourierNewPS-BoldMT"] = "Courier-Bold"; t["CourierNewPS-ItalicMT"] = "Courier-Oblique"; t.CourierNewPSMT = "Courier"; - t.Helvetica = "Helvetica"; - t["Helvetica-Bold"] = "Helvetica-Bold"; t["Helvetica-BoldItalic"] = "Helvetica-BoldOblique"; - t["Helvetica-BoldOblique"] = "Helvetica-BoldOblique"; t["Helvetica-Italic"] = "Helvetica-Oblique"; - t["Helvetica-Oblique"] = "Helvetica-Oblique"; t["Symbol-Bold"] = "Symbol"; t["Symbol-BoldItalic"] = "Symbol"; t["Symbol-Italic"] = "Symbol"; @@ -77,6 +90,23 @@ const getStdFontMap = getLookupTableFactory(function (t) { t["TimesNewRomanPSMT-Italic"] = "Times-Italic"; }); +const getStdFontNameToFileMap = getLookupTableFactory(function (t) { + t.Courier = "FoxitFixed"; + t["Courier-Bold"] = "FoxitFixedBold"; + t["Courier-BoldOblique"] = "FoxitFixedBoldItalic"; + t["Courier-Oblique"] = "FoxitFixedItalic"; + t.Helvetica = "FoxitSans"; + t["Helvetica-Bold"] = "FoxitSansBold"; + t["Helvetica-BoldOblique"] = "FoxitSansBoldItalic"; + t["Helvetica-Oblique"] = "FoxitSansItalic"; + t["Times-Roman"] = "FoxitSerif"; + t["Times-Bold"] = "FoxitSerifBold"; + t["Times-BoldItalic"] = "FoxitSerifBoldItalic"; + t["Times-Italic"] = "FoxitSerifItalic"; + t.Symbol = "FoxitSymbol"; + t.ZapfDingbats = "FoxitDingbats"; +}); + /** * Holds the map of the non-standard fonts that might be included as * a standard fonts without glyph data. @@ -763,11 +793,19 @@ const getSupplementalGlyphMapForCalibri = getLookupTableFactory(function (t) { t[1086] = 45; }); +function getStandardFontName(name) { + const fontName = normalizeFontName(name); + const stdFontMap = getStdFontMap(); + return stdFontMap[fontName]; +} + export { getGlyphMapForStandardFonts, getNonStdFontMap, getSerifFonts, + getStandardFontName, getStdFontMap, + getStdFontNameToFileMap, getSupplementalGlyphMapForArialBlack, getSupplementalGlyphMapForCalibri, getSymbolsFonts, diff --git a/src/core/worker.js b/src/core/worker.js index 4c121485c..a69b76743 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -411,6 +411,8 @@ class WorkerMessageHandler { ignoreErrors: data.ignoreErrors, isEvalSupported: data.isEvalSupported, fontExtraProperties: data.fontExtraProperties, + useSystemFonts: data.useSystemFonts, + standardFontDataUrl: data.standardFontDataUrl, }; getPdfManager(data, evaluatorOptions, data.enableXfa) diff --git a/src/display/api.js b/src/display/api.js index 23a27f70b..ee2830699 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -40,6 +40,7 @@ import { deprecated, DOMCanvasFactory, DOMCMapReaderFactory, + DOMStandardFontDataFactory, isDataScheme, loadScript, PageViewport, @@ -47,7 +48,11 @@ import { StatTimer, } from "./display_utils.js"; import { FontFaceObject, FontLoader } from "./font_loader.js"; -import { NodeCanvasFactory, NodeCMapReaderFactory } from "./node_utils.js"; +import { + NodeCanvasFactory, + NodeCMapReaderFactory, + NodeStandardFontDataFactory, +} from "./node_utils.js"; import { AnnotationStorage } from "./annotation_storage.js"; import { apiCompatibilityParams } from "./api_compatibility.js"; import { CanvasGraphics } from "./canvas.js"; @@ -69,6 +74,10 @@ const DefaultCMapReaderFactory = (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) && isNodeJS ? NodeCMapReaderFactory : DOMCMapReaderFactory; +const DefaultStandardFontDataFactory = + (typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) && isNodeJS + ? NodeStandardFontDataFactory + : DOMStandardFontDataFactory; /** * @typedef {function} IPDFStreamFactory @@ -143,6 +152,19 @@ function setPDFNetworkStreamFactory(pdfNetworkStreamFactory) { * reading built-in CMap files. Providing a custom factory is useful for * environments without Fetch API or `XMLHttpRequest` support, such as * Node.js. The default value is {DOMCMapReaderFactory}. + * @property {boolean} [useSystemFonts] - When true, fonts that aren't embedded + * in the PDF will fallback to a system font. Defaults to true for web + * environments and false for node. + * @property {string} [standardFontDataUrl] - The URL where the standard font + * files are located. Include the trailing slash. + * @property {boolean} [useWorkerFetch] - Enable using fetch in the worker for + * resources. This currently only used for fetching the font data from the + * worker thread. When `true`, StandardFontDataFactory will be ignored. The + * default value is `true` in web environment and `false` for Node. + * @property {Object} [StandardFontDataFactory] - The factory that will be used + * when reading the standard font files. Providing a custom factory is useful + * for environments without Fetch API or `XMLHttpRequest` support, such as + * Node.js. The default value is {DOMStandardFontDataFactory}. * @property {boolean} [stopAtErrors] - Reject certain promises, e.g. * `getOperatorList`, `getTextContent`, and `RenderTask`, when the associated * PDF data cannot be successfully parsed, instead of attempting to recover @@ -287,6 +309,8 @@ function getDocument(src) { params.rangeChunkSize = params.rangeChunkSize || DEFAULT_RANGE_CHUNK_SIZE; params.CMapReaderFactory = params.CMapReaderFactory || DefaultCMapReaderFactory; + params.StandardFontDataFactory = + params.StandardFontDataFactory || DefaultStandardFontDataFactory; params.ignoreErrors = params.stopAtErrors !== true; params.fontExtraProperties = params.fontExtraProperties === true; params.pdfBug = params.pdfBug === true; @@ -304,6 +328,13 @@ function getDocument(src) { if (!Number.isInteger(params.maxImageSize)) { params.maxImageSize = -1; } + if (typeof params.useSystemFonts !== "boolean") { + params.useSystemFonts = !isNodeJS; + } + if (typeof params.useWorkerFetch !== "boolean") { + params.useWorkerFetch = + params.StandardFontDataFactory === DOMStandardFontDataFactory; + } if (typeof params.isEvalSupported !== "boolean") { params.isEvalSupported = true; } @@ -455,6 +486,10 @@ function _fetchDocument(worker, source, pdfDataRangeTransport, docId) { isEvalSupported: source.isEvalSupported, fontExtraProperties: source.fontExtraProperties, enableXfa: source.enableXfa, + useSystemFonts: source.useSystemFonts, + standardFontDataUrl: source.useWorkerFetch + ? source.standardFontDataUrl + : null, }) .then(function (workerId) { if (worker.destroyed) { @@ -2243,6 +2278,9 @@ class WorkerTransport { baseUrl: params.cMapUrl, isCompressed: params.cMapPacked, }); + this.StandardFontDataFactory = new params.StandardFontDataFactory({ + baseUrl: params.standardFontDataUrl, + }); this.destroyed = false; this.destroyCapability = null; @@ -2641,6 +2679,13 @@ class WorkerTransport { this._onUnsupportedFeature.bind(this) ); + messageHandler.on("FetchStandardFontData", data => { + if (this.destroyed) { + return Promise.reject(new Error("Worker was destroyed")); + } + return this.StandardFontDataFactory.fetch(data); + }); + messageHandler.on("FetchBuiltInCMap", (data, sink) => { if (this.destroyed) { sink.error(new Error("Worker was destroyed")); @@ -3183,6 +3228,7 @@ export { build, DefaultCanvasFactory, DefaultCMapReaderFactory, + DefaultStandardFontDataFactory, getDocument, LoopbackPort, PDFDataRangeTransport, diff --git a/src/display/display_utils.js b/src/display/display_utils.js index d09e778f5..06b6f6e67 100644 --- a/src/display/display_utils.js +++ b/src/display/display_utils.js @@ -84,6 +84,56 @@ class DOMCanvasFactory extends BaseCanvasFactory { } } +function fetchData(url, asTypedArray) { + if ( + (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) || + (isFetchSupported() && isValidFetchUrl(url, document.baseURI)) + ) { + return fetch(url).then(async response => { + if (!response.ok) { + throw new Error(response.statusText); + } + let data; + if (asTypedArray) { + data = new Uint8Array(await response.arrayBuffer()); + } else { + data = stringToBytes(await response.text()); + } + return data; + }); + } + + // The Fetch API is not supported. + return new Promise((resolve, reject) => { + const request = new XMLHttpRequest(); + request.open("GET", url, /* asTypedArray = */ true); + + if (asTypedArray) { + request.responseType = "arraybuffer"; + } + request.onreadystatechange = () => { + if (request.readyState !== XMLHttpRequest.DONE) { + return; + } + if (request.status === 200 || request.status === 0) { + let data; + if (asTypedArray && request.response) { + data = new Uint8Array(request.response); + } else if (!asTypedArray && request.responseText) { + data = stringToBytes(request.responseText); + } + if (data) { + resolve(data); + return; + } + } + reject(new Error(request.statusText)); + }; + + request.send(null); + }); +} + class BaseCMapReaderFactory { constructor({ baseUrl = null, isCompressed = false }) { if (this.constructor === BaseCMapReaderFactory) { @@ -125,56 +175,44 @@ class BaseCMapReaderFactory { class DOMCMapReaderFactory extends BaseCMapReaderFactory { _fetchData(url, compressionType) { - if ( - (typeof PDFJSDev !== "undefined" && PDFJSDev.test("MOZCENTRAL")) || - (isFetchSupported() && isValidFetchUrl(url, document.baseURI)) - ) { - return fetch(url).then(async response => { - if (!response.ok) { - throw new Error(response.statusText); - } - let cMapData; - if (this.isCompressed) { - cMapData = new Uint8Array(await response.arrayBuffer()); - } else { - cMapData = stringToBytes(await response.text()); - } - return { cMapData, compressionType }; - }); - } - - // The Fetch API is not supported. - return new Promise((resolve, reject) => { - const request = new XMLHttpRequest(); - request.open("GET", url, true); - - if (this.isCompressed) { - request.responseType = "arraybuffer"; - } - request.onreadystatechange = () => { - if (request.readyState !== XMLHttpRequest.DONE) { - return; - } - if (request.status === 200 || request.status === 0) { - let cMapData; - if (this.isCompressed && request.response) { - cMapData = new Uint8Array(request.response); - } else if (!this.isCompressed && request.responseText) { - cMapData = stringToBytes(request.responseText); - } - if (cMapData) { - resolve({ cMapData, compressionType }); - return; - } - } - reject(new Error(request.statusText)); - }; - - request.send(null); + return fetchData(url, /* asTypedArray = */ this.isCompressed).then(data => { + return { cMapData: data, compressionType }; }); } } +class BaseStandardFontDataFactory { + constructor({ baseUrl = null }) { + if (this.constructor === BaseStandardFontDataFactory) { + unreachable("Cannot initialize BaseStandardFontDataFactory."); + } + this.baseUrl = baseUrl; + } + + async fetch({ filename }) { + if (!this.baseUrl) { + throw new Error( + 'The standard font "baseUrl" parameter must be specified, ensure that ' + + 'the "standardFontDataUrl" API parameter is provided.' + ); + } + if (!filename) { + throw new Error("Font filename must be specified."); + } + const url = this.baseUrl + filename + ".pfb"; + + return this._fetchData(url).catch(reason => { + throw new Error(`Unable to load font data at: ${url}`); + }); + } +} + +class DOMStandardFontDataFactory extends BaseStandardFontDataFactory { + _fetchData(url) { + return fetchData(url, /* asTypedArray = */ true); + } +} + class DOMSVGFactory { create(width, height) { assert(width > 0 && height > 0, "Invalid SVG dimensions"); @@ -704,10 +742,12 @@ export { addLinkAttributes, BaseCanvasFactory, BaseCMapReaderFactory, + BaseStandardFontDataFactory, DEFAULT_LINK_REL, deprecated, DOMCanvasFactory, DOMCMapReaderFactory, + DOMStandardFontDataFactory, DOMSVGFactory, getFilenameFromUrl, getPdfFilenameFromUrl, diff --git a/src/display/node_utils.js b/src/display/node_utils.js index ad27906d0..f40155b83 100644 --- a/src/display/node_utils.js +++ b/src/display/node_utils.js @@ -14,10 +14,27 @@ */ /* globals __non_webpack_require__ */ -import { BaseCanvasFactory, BaseCMapReaderFactory } from "./display_utils.js"; +import { + BaseCanvasFactory, + BaseCMapReaderFactory, + BaseStandardFontDataFactory, +} from "./display_utils.js"; import { isNodeJS } from "../shared/is_node.js"; import { unreachable } from "../shared/util.js"; +function fetchData(url) { + return new Promise((resolve, reject) => { + const fs = __non_webpack_require__("fs"); + fs.readFile(url, (error, data) => { + if (error || !data) { + reject(new Error(error)); + return; + } + resolve(new Uint8Array(data)); + }); + }); +} + let NodeCanvasFactory = class { constructor() { unreachable("Not implemented: NodeCanvasFactory"); @@ -30,6 +47,12 @@ let NodeCMapReaderFactory = class { } }; +let NodeStandardFontDataFactory = class { + constructor() { + unreachable("Not implemented: NodeStandardFontDataFactory"); + } +}; + if ((typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) && isNodeJS) { NodeCanvasFactory = class extends BaseCanvasFactory { create(width, height) { @@ -47,18 +70,21 @@ if ((typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) && isNodeJS) { NodeCMapReaderFactory = class extends BaseCMapReaderFactory { _fetchData(url, compressionType) { - return new Promise((resolve, reject) => { - const fs = __non_webpack_require__("fs"); - fs.readFile(url, (error, data) => { - if (error || !data) { - reject(new Error(error)); - return; - } - resolve({ cMapData: new Uint8Array(data), compressionType }); - }); + return fetchData(url).then(data => { + return { cMapData: data, compressionType }; }); } }; + + NodeStandardFontDataFactory = class extends BaseStandardFontDataFactory { + _fetchData(url) { + return fetchData(url); + } + }; } -export { NodeCanvasFactory, NodeCMapReaderFactory }; +export { + NodeCanvasFactory, + NodeCMapReaderFactory, + NodeStandardFontDataFactory, +}; diff --git a/src/shared/util.js b/src/shared/util.js index 8ca3601e3..bf3c45c4a 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -190,6 +190,7 @@ const StreamType = { const FontType = { UNKNOWN: "UNKNOWN", TYPE1: "TYPE1", + TYPE1STANDARD: "TYPE1STANDARD", TYPE1C: "TYPE1C", CIDFONTTYPE0: "CIDFONTTYPE0", CIDFONTTYPE0C: "CIDFONTTYPE0C", diff --git a/test/driver.js b/test/driver.js index 052d7999c..80ad61157 100644 --- a/test/driver.js +++ b/test/driver.js @@ -21,6 +21,7 @@ const WAITING_TIME = 100; // ms const PDF_TO_CSS_UNITS = 96.0 / 72.0; const CMAP_URL = "../external/bcmaps/"; const CMAP_PACKED = true; +const STANDARD_FONT_DATA_URL = "/build/generic/web/standard_fonts/"; const IMAGE_RESOURCES_PATH = "/web/images/"; const WORKER_SRC = "../build/generic/build/pdf.worker.js"; const RENDER_TASK_ON_CONTINUE_DELAY = 5; // ms @@ -415,9 +416,12 @@ var Driver = (function DriverClosure() { password: task.password, cMapUrl: CMAP_URL, cMapPacked: CMAP_PACKED, + standardFontDataUrl: STANDARD_FONT_DATA_URL, disableRange: task.disableRange, disableAutoFetch: !task.enableAutoFetch, pdfBug: true, + useSystemFonts: task.useSystemFonts, + useWorkerFetch: task.useWorkerFetch, }); loadingTask.promise.then( doc => { diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 30b829844..40bf67330 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -216,6 +216,7 @@ !bug1473809.pdf !issue12120_reduced.pdf !pdfjsbad1586.pdf +!standard_fonts.pdf !freeculture.pdf !issue6006.pdf !pdfkit_compressed.pdf diff --git a/test/pdfs/standard_fonts.pdf b/test/pdfs/standard_fonts.pdf new file mode 100644 index 000000000..348b551a1 Binary files /dev/null and b/test/pdfs/standard_fonts.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index f089bfd05..df9a9c57b 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1974,6 +1974,28 @@ "link": true, "type": "eq" }, + { "id": "standard_fonts_system_fonts", + "file": "pdfs/standard_fonts.pdf", + "md5": "bb3a9ab3322328be983e8b4e8089843a", + "rounds": 1, + "type": "eq", + "useSystemFonts": true + }, + { "id": "standard_fonts_no_system_fonts", + "file": "pdfs/standard_fonts.pdf", + "md5": "bb3a9ab3322328be983e8b4e8089843a", + "rounds": 1, + "type": "eq", + "useSystemFonts": false + }, + { "id": "standard_fonts_main_thread_fetch", + "file": "pdfs/standard_fonts.pdf", + "md5": "bb3a9ab3322328be983e8b4e8089843a", + "rounds": 1, + "type": "eq", + "useSystemFonts": false, + "useWorkerFetch": false + }, { "id": "issue4573", "file": "pdfs/issue4573.pdf", "md5": "34b0c4fdee19e57033275b766c5f57a3", diff --git a/test/unit/annotation_spec.js b/test/unit/annotation_spec.js index ec88f8b65..f4e7149aa 100644 --- a/test/unit/annotation_spec.js +++ b/test/unit/annotation_spec.js @@ -29,10 +29,18 @@ import { stringToBytes, stringToUTF8String, } from "../../src/shared/util.js"; -import { CMAP_PARAMS, createIdFactory, XRefMock } from "./test_utils.js"; +import { + CMAP_PARAMS, + createIdFactory, + STANDARD_FONT_DATA_URL, + XRefMock, +} from "./test_utils.js"; +import { + DefaultCMapReaderFactory, + DefaultStandardFontDataFactory, +} from "../../src/display/api.js"; import { Dict, Name, Ref, RefSetCache } from "../../src/core/primitives.js"; import { Lexer, Parser } from "../../src/core/parser.js"; -import { DefaultCMapReaderFactory } from "../../src/display/api.js"; import { PartialEvaluator } from "../../src/core/evaluator.js"; import { StringStream } from "../../src/core/stream.js"; import { WorkerTask } from "../../src/core/worker.js"; @@ -68,6 +76,10 @@ describe("annotation", function () { } } + const fontDataReader = new DefaultStandardFontDataFactory({ + baseUrl: STANDARD_FONT_DATA_URL, + }); + function HandlerMock() { this.inputs = []; } @@ -75,6 +87,12 @@ describe("annotation", function () { send(name, data) { this.inputs.push({ name, data }); }, + sendWithPromise(name, data) { + if (name !== "FetchStandardFontData") { + return Promise.reject(new Error(`Unsupported mock ${name}.`)); + } + return fontDataReader.fetch(data); + }, }; let pdfManagerMock, idFactoryMock, partialEvaluator; @@ -2282,7 +2300,6 @@ describe("annotation", function () { ]); const task = new WorkerTask("test print"); const checkboxEvaluator = partialEvaluator.clone({ ignoreErrors: true }); - const annotation = await AnnotationFactory.create( xref, buttonWidgetRef, @@ -2306,7 +2323,7 @@ describe("annotation", function () { OPS.showText, OPS.endAnnotation, ]); - expect(operatorList.argsArray[3][0][0].fontChar).toEqual("✔"); + expect(operatorList.argsArray[3][0][0].unicode).toEqual("4"); }); it("should render checkboxes for printing", async function () { diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index 07b8b77bb..4c7b09a16 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -1518,18 +1518,20 @@ describe("api", function () { const pdfPage = await pdfDoc.getPage(1); const { items, styles } = await pdfPage.getTextContent(); expect(items.length).toEqual(1); - expect(Object.keys(styles)).toEqual(["Times"]); + // Font name will a random object id. + const fontName = items[0].fontName; + expect(Object.keys(styles)).toEqual([fontName]); expect(items[0]).toEqual({ dir: "ltr", - fontName: "Times", + fontName, height: 18, str: "Issue 8276", transform: [18, 0, 0, 18, 441.81, 708.4499999999999], width: 77.49, hasEOL: false, }); - expect(styles.Times).toEqual({ + expect(styles[fontName]).toEqual({ fontFamily: "serif", ascent: NaN, descent: NaN, @@ -1678,7 +1680,7 @@ describe("api", function () { const expectedStreamTypes = {}; expectedStreamTypes[StreamType.FLATE] = true; const expectedFontTypes = {}; - expectedFontTypes[FontType.TYPE1] = true; + expectedFontTypes[FontType.TYPE1STANDARD] = true; expectedFontTypes[FontType.CIDFONTTYPE2] = true; expect(stats).toEqual({ diff --git a/test/unit/test_utils.js b/test/unit/test_utils.js index 6ee980520..85e4c0d28 100644 --- a/test/unit/test_utils.js +++ b/test/unit/test_utils.js @@ -26,6 +26,10 @@ const CMAP_PARAMS = { cMapPacked: true, }; +const STANDARD_FONT_DATA_URL = isNodeJS + ? "./external/standard_fonts/" + : "../../external/standard_fonts/"; + class DOMFileReaderFactory { static async fetch(params) { const response = await fetch(params.path); @@ -61,6 +65,7 @@ function buildGetDocumentParams(filename, options) { params.url = isNodeJS ? TEST_PDFS_PATH + filename : new URL(TEST_PDFS_PATH + filename, window.location).href; + params.standardFontDataUrl = STANDARD_FONT_DATA_URL; for (const option in options) { params[option] = options[option]; @@ -146,6 +151,7 @@ export { createIdFactory, DefaultFileReaderFactory, isEmptyObj, + STANDARD_FONT_DATA_URL, TEST_PDFS_PATH, XRefMock, }; diff --git a/web/app_options.js b/web/app_options.js index b52b4edcf..7b5273a0a 100644 --- a/web/app_options.js +++ b/web/app_options.js @@ -217,6 +217,14 @@ const defaultOptions = { value: false, kind: OptionKind.API, }, + standardFontDataUrl: { + /** @type {string} */ + value: + typeof PDFJSDev === "undefined" || !PDFJSDev.test("PRODUCTION") + ? "../external/standard_fonts/" + : "../web/standard_fonts/", + kind: OptionKind.API, + }, verbosity: { /** @type {number} */ value: 1,