From 77b258440b9a6b65f1477fbf151cb4edb28514a4 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sun, 2 May 2021 16:11:01 +0200 Subject: [PATCH] Move some constants and helper functions `from src/core/fonts.js` and into their own file - `FontFlags`, is used in both `src/core/fonts.js` and `src/core/evaluator.js`. - `getFontType`, same as the above. - `MacStandardGlyphOrdering`, is a fairly large data-structure and `src/core/fonts.js` is already a *very* large file. - `recoverGlyphName`, a dependency of `type1FontGlyphMapping`; please see below. - `SEAC_ANALYSIS_ENABLED`, is used by both `Type1Font`, `CFFFont`, and unit-tests; please see below. - `type1FontGlyphMapping`, is used by both `Type1Font` and `CFFFont` which a later patch will move to their own files. --- src/core/evaluator.js | 3 +- src/core/fonts.js | 185 ++---------------------------- src/core/fonts_utils.js | 204 +++++++++++++++++++++++++++++++++ test/unit/cff_parser_spec.js | 2 +- test/unit/type1_parser_spec.js | 2 +- 5 files changed, 217 insertions(+), 179 deletions(-) create mode 100644 src/core/fonts_utils.js diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 18d7923ec..16f902728 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -47,7 +47,8 @@ import { Ref, RefSet, } from "./primitives.js"; -import { ErrorFont, Font, FontFlags, getFontType } from "./fonts.js"; +import { ErrorFont, Font } from "./fonts.js"; +import { FontFlags, getFontType } from "./fonts_utils.js"; import { getEncoding, MacRomanEncoding, diff --git a/src/core/fonts.js b/src/core/fonts.js index b0992820c..28d2ea97c 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -38,6 +38,14 @@ import { CFFStrings, CFFTopDict, } from "./cff_parser.js"; +import { + FontFlags, + getFontType, + MacStandardGlyphOrdering, + recoverGlyphName, + SEAC_ANALYSIS_ENABLED, + type1FontGlyphMapping, +} from "./fonts_utils.js"; import { getDingbatsGlyphsUnicode, getGlyphsUnicode } from "./glyphlist.js"; import { getEncoding, @@ -80,15 +88,6 @@ const PRIVATE_USE_AREAS = [ // except for Type 3 fonts var PDF_GLYPH_SPACE_UNITS = 1000; -// Accented characters have issues on Windows and Linux. When this flag is -// enabled glyphs that use seac and seac style endchar operators are truncated -// and we instead just store the glyph id's of the base glyph and its accent to -// be drawn individually. -// Linux (freetype) requires that when a seac style endchar is used -// that the charset must be a predefined one, however we build a -// custom one. Windows just refuses to draw glyphs with seac operators. -var SEAC_ANALYSIS_ENABLED = true; - const EXPORT_DATA_PROPERTIES = [ "ascent", "bbox", @@ -130,57 +129,6 @@ const EXPORT_DATA_EXTRA_PROPERTIES = [ "widths", ]; -var FontFlags = { - FixedPitch: 1, - Serif: 2, - Symbolic: 4, - Script: 8, - Nonsymbolic: 32, - Italic: 64, - AllCap: 65536, - SmallCap: 131072, - ForceBold: 262144, -}; - -// prettier-ignore -var MacStandardGlyphOrdering = [ - ".notdef", ".null", "nonmarkingreturn", "space", "exclam", "quotedbl", - "numbersign", "dollar", "percent", "ampersand", "quotesingle", "parenleft", - "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash", - "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", - "nine", "colon", "semicolon", "less", "equal", "greater", "question", "at", - "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", - "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft", - "backslash", "bracketright", "asciicircum", "underscore", "grave", "a", "b", - "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", - "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft", "bar", "braceright", - "asciitilde", "Adieresis", "Aring", "Ccedilla", "Eacute", "Ntilde", - "Odieresis", "Udieresis", "aacute", "agrave", "acircumflex", "adieresis", - "atilde", "aring", "ccedilla", "eacute", "egrave", "ecircumflex", "edieresis", - "iacute", "igrave", "icircumflex", "idieresis", "ntilde", "oacute", "ograve", - "ocircumflex", "odieresis", "otilde", "uacute", "ugrave", "ucircumflex", - "udieresis", "dagger", "degree", "cent", "sterling", "section", "bullet", - "paragraph", "germandbls", "registered", "copyright", "trademark", "acute", - "dieresis", "notequal", "AE", "Oslash", "infinity", "plusminus", "lessequal", - "greaterequal", "yen", "mu", "partialdiff", "summation", "product", "pi", - "integral", "ordfeminine", "ordmasculine", "Omega", "ae", "oslash", - "questiondown", "exclamdown", "logicalnot", "radical", "florin", - "approxequal", "Delta", "guillemotleft", "guillemotright", "ellipsis", - "nonbreakingspace", "Agrave", "Atilde", "Otilde", "OE", "oe", "endash", - "emdash", "quotedblleft", "quotedblright", "quoteleft", "quoteright", - "divide", "lozenge", "ydieresis", "Ydieresis", "fraction", "currency", - "guilsinglleft", "guilsinglright", "fi", "fl", "daggerdbl", "periodcentered", - "quotesinglbase", "quotedblbase", "perthousand", "Acircumflex", - "Ecircumflex", "Aacute", "Edieresis", "Egrave", "Iacute", "Icircumflex", - "Idieresis", "Igrave", "Oacute", "Ocircumflex", "apple", "Ograve", "Uacute", - "Ucircumflex", "Ugrave", "dotlessi", "circumflex", "tilde", "macron", - "breve", "dotaccent", "ring", "cedilla", "hungarumlaut", "ogonek", "caron", - "Lslash", "lslash", "Scaron", "scaron", "Zcaron", "zcaron", "brokenbar", - "Eth", "eth", "Yacute", "yacute", "Thorn", "thorn", "minus", "multiply", - "onesuperior", "twosuperior", "threesuperior", "onehalf", "onequarter", - "threequarters", "franc", "Gbreve", "gbreve", "Idotaccent", "Scedilla", - "scedilla", "Cacute", "cacute", "Ccaron", "ccaron", "dcroat"]; - function adjustWidths(properties) { if (!properties.fontMatrix) { return; @@ -225,48 +173,6 @@ function adjustToUnicode(properties, builtInEncoding) { properties.toUnicode.amend(toUnicode); } -function getFontType(type, subtype) { - switch (type) { - case "Type1": - return subtype === "Type1C" ? FontType.TYPE1C : FontType.TYPE1; - case "CIDFontType0": - return subtype === "CIDFontType0C" - ? FontType.CIDFONTTYPE0C - : FontType.CIDFONTTYPE0; - case "OpenType": - return FontType.OPENTYPE; - case "TrueType": - return FontType.TRUETYPE; - case "CIDFontType2": - return FontType.CIDFONTTYPE2; - case "MMType1": - return FontType.MMTYPE1; - case "Type0": - return FontType.TYPE0; - default: - return FontType.UNKNOWN; - } -} - -// Some bad PDF generators, e.g. Scribus PDF, include glyph names -// in a 'uniXXXX' format -- attempting to recover proper ones. -function recoverGlyphName(name, glyphsUnicodeMap) { - if (glyphsUnicodeMap[name] !== undefined) { - return name; - } - // The glyph name is non-standard, trying to recover. - var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap); - if (unicode !== -1) { - for (var key in glyphsUnicodeMap) { - if (glyphsUnicodeMap[key] === unicode) { - return key; - } - } - } - info("Unable to recover a standard glyph name for: " + name); - return name; -} - var Glyph = (function GlyphClosure() { // eslint-disable-next-line no-shadow function Glyph( @@ -3282,79 +3188,6 @@ var ErrorFont = (function ErrorFontClosure() { return ErrorFont; })(); -/** - * Shared logic for building a char code to glyph id mapping for Type1 and - * simple CFF fonts. See section 9.6.6.2 of the spec. - * @param {Object} properties Font properties object. - * @param {Object} builtInEncoding The encoding contained within the actual font - * data. - * @param {Array} glyphNames Array of glyph names where the index is the - * glyph ID. - * @returns {Object} A char code to glyph ID map. - */ -function type1FontGlyphMapping(properties, builtInEncoding, glyphNames) { - var charCodeToGlyphId = Object.create(null); - var glyphId, charCode, baseEncoding; - var isSymbolicFont = !!(properties.flags & FontFlags.Symbolic); - - if (properties.baseEncodingName) { - // If a valid base encoding name was used, the mapping is initialized with - // that. - baseEncoding = getEncoding(properties.baseEncodingName); - for (charCode = 0; charCode < baseEncoding.length; charCode++) { - glyphId = glyphNames.indexOf(baseEncoding[charCode]); - if (glyphId >= 0) { - charCodeToGlyphId[charCode] = glyphId; - } else { - charCodeToGlyphId[charCode] = 0; // notdef - } - } - } else if (isSymbolicFont) { - // For a symbolic font the encoding should be the fonts built-in encoding. - for (charCode in builtInEncoding) { - charCodeToGlyphId[charCode] = builtInEncoding[charCode]; - } - } else { - // For non-symbolic fonts that don't have a base encoding the standard - // encoding should be used. - baseEncoding = StandardEncoding; - for (charCode = 0; charCode < baseEncoding.length; charCode++) { - glyphId = glyphNames.indexOf(baseEncoding[charCode]); - if (glyphId >= 0) { - charCodeToGlyphId[charCode] = glyphId; - } else { - charCodeToGlyphId[charCode] = 0; // notdef - } - } - } - - // Lastly, merge in the differences. - var differences = properties.differences, - glyphsUnicodeMap; - if (differences) { - for (charCode in differences) { - var glyphName = differences[charCode]; - glyphId = glyphNames.indexOf(glyphName); - - if (glyphId === -1) { - if (!glyphsUnicodeMap) { - glyphsUnicodeMap = getGlyphsUnicode(); - } - var standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap); - if (standardGlyphName !== glyphName) { - glyphId = glyphNames.indexOf(standardGlyphName); - } - } - if (glyphId >= 0) { - charCodeToGlyphId[charCode] = glyphId; - } else { - charCodeToGlyphId[charCode] = 0; // notdef - } - } - } - return charCodeToGlyphId; -} - // Type1Font is also a CIDFontType0. var Type1Font = (function Type1FontClosure() { function findBlock(streamBytes, signature, startIndex) { @@ -3854,4 +3687,4 @@ var CFFFont = (function CFFFontClosure() { return CFFFont; })(); -export { ErrorFont, Font, FontFlags, getFontType, SEAC_ANALYSIS_ENABLED }; +export { ErrorFont, Font }; diff --git a/src/core/fonts_utils.js b/src/core/fonts_utils.js new file mode 100644 index 000000000..6d051106d --- /dev/null +++ b/src/core/fonts_utils.js @@ -0,0 +1,204 @@ +/* Copyright 2012 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* eslint-disable no-var */ + +import { FontType, info } from "../shared/util.js"; +import { getEncoding, StandardEncoding } from "./encodings.js"; +import { getGlyphsUnicode } from "./glyphlist.js"; +import { getUnicodeForGlyph } from "./unicode.js"; + +// Accented characters have issues on Windows and Linux. When this flag is +// enabled glyphs that use seac and seac style endchar operators are truncated +// and we instead just store the glyph id's of the base glyph and its accent to +// be drawn individually. +// Linux (freetype) requires that when a seac style endchar is used +// that the charset must be a predefined one, however we build a +// custom one. Windows just refuses to draw glyphs with seac operators. +var SEAC_ANALYSIS_ENABLED = true; + +var FontFlags = { + FixedPitch: 1, + Serif: 2, + Symbolic: 4, + Script: 8, + Nonsymbolic: 32, + Italic: 64, + AllCap: 65536, + SmallCap: 131072, + ForceBold: 262144, +}; + +// prettier-ignore +var MacStandardGlyphOrdering = [ + ".notdef", ".null", "nonmarkingreturn", "space", "exclam", "quotedbl", + "numbersign", "dollar", "percent", "ampersand", "quotesingle", "parenleft", + "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash", + "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", + "nine", "colon", "semicolon", "less", "equal", "greater", "question", "at", + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", + "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft", + "backslash", "bracketright", "asciicircum", "underscore", "grave", "a", "b", + "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", + "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft", "bar", "braceright", + "asciitilde", "Adieresis", "Aring", "Ccedilla", "Eacute", "Ntilde", + "Odieresis", "Udieresis", "aacute", "agrave", "acircumflex", "adieresis", + "atilde", "aring", "ccedilla", "eacute", "egrave", "ecircumflex", "edieresis", + "iacute", "igrave", "icircumflex", "idieresis", "ntilde", "oacute", "ograve", + "ocircumflex", "odieresis", "otilde", "uacute", "ugrave", "ucircumflex", + "udieresis", "dagger", "degree", "cent", "sterling", "section", "bullet", + "paragraph", "germandbls", "registered", "copyright", "trademark", "acute", + "dieresis", "notequal", "AE", "Oslash", "infinity", "plusminus", "lessequal", + "greaterequal", "yen", "mu", "partialdiff", "summation", "product", "pi", + "integral", "ordfeminine", "ordmasculine", "Omega", "ae", "oslash", + "questiondown", "exclamdown", "logicalnot", "radical", "florin", + "approxequal", "Delta", "guillemotleft", "guillemotright", "ellipsis", + "nonbreakingspace", "Agrave", "Atilde", "Otilde", "OE", "oe", "endash", + "emdash", "quotedblleft", "quotedblright", "quoteleft", "quoteright", + "divide", "lozenge", "ydieresis", "Ydieresis", "fraction", "currency", + "guilsinglleft", "guilsinglright", "fi", "fl", "daggerdbl", "periodcentered", + "quotesinglbase", "quotedblbase", "perthousand", "Acircumflex", + "Ecircumflex", "Aacute", "Edieresis", "Egrave", "Iacute", "Icircumflex", + "Idieresis", "Igrave", "Oacute", "Ocircumflex", "apple", "Ograve", "Uacute", + "Ucircumflex", "Ugrave", "dotlessi", "circumflex", "tilde", "macron", + "breve", "dotaccent", "ring", "cedilla", "hungarumlaut", "ogonek", "caron", + "Lslash", "lslash", "Scaron", "scaron", "Zcaron", "zcaron", "brokenbar", + "Eth", "eth", "Yacute", "yacute", "Thorn", "thorn", "minus", "multiply", + "onesuperior", "twosuperior", "threesuperior", "onehalf", "onequarter", + "threequarters", "franc", "Gbreve", "gbreve", "Idotaccent", "Scedilla", + "scedilla", "Cacute", "cacute", "Ccaron", "ccaron", "dcroat"]; + +function getFontType(type, subtype) { + switch (type) { + case "Type1": + return subtype === "Type1C" ? FontType.TYPE1C : FontType.TYPE1; + case "CIDFontType0": + return subtype === "CIDFontType0C" + ? FontType.CIDFONTTYPE0C + : FontType.CIDFONTTYPE0; + case "OpenType": + return FontType.OPENTYPE; + case "TrueType": + return FontType.TRUETYPE; + case "CIDFontType2": + return FontType.CIDFONTTYPE2; + case "MMType1": + return FontType.MMTYPE1; + case "Type0": + return FontType.TYPE0; + default: + return FontType.UNKNOWN; + } +} + +// Some bad PDF generators, e.g. Scribus PDF, include glyph names +// in a 'uniXXXX' format -- attempting to recover proper ones. +function recoverGlyphName(name, glyphsUnicodeMap) { + if (glyphsUnicodeMap[name] !== undefined) { + return name; + } + // The glyph name is non-standard, trying to recover. + var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap); + if (unicode !== -1) { + for (var key in glyphsUnicodeMap) { + if (glyphsUnicodeMap[key] === unicode) { + return key; + } + } + } + info("Unable to recover a standard glyph name for: " + name); + return name; +} + +/** + * Shared logic for building a char code to glyph id mapping for Type1 and + * simple CFF fonts. See section 9.6.6.2 of the spec. + * @param {Object} properties Font properties object. + * @param {Object} builtInEncoding The encoding contained within the actual font + * data. + * @param {Array} glyphNames Array of glyph names where the index is the + * glyph ID. + * @returns {Object} A char code to glyph ID map. + */ +function type1FontGlyphMapping(properties, builtInEncoding, glyphNames) { + var charCodeToGlyphId = Object.create(null); + var glyphId, charCode, baseEncoding; + var isSymbolicFont = !!(properties.flags & FontFlags.Symbolic); + + if (properties.baseEncodingName) { + // If a valid base encoding name was used, the mapping is initialized with + // that. + baseEncoding = getEncoding(properties.baseEncodingName); + for (charCode = 0; charCode < baseEncoding.length; charCode++) { + glyphId = glyphNames.indexOf(baseEncoding[charCode]); + if (glyphId >= 0) { + charCodeToGlyphId[charCode] = glyphId; + } else { + charCodeToGlyphId[charCode] = 0; // notdef + } + } + } else if (isSymbolicFont) { + // For a symbolic font the encoding should be the fonts built-in encoding. + for (charCode in builtInEncoding) { + charCodeToGlyphId[charCode] = builtInEncoding[charCode]; + } + } else { + // For non-symbolic fonts that don't have a base encoding the standard + // encoding should be used. + baseEncoding = StandardEncoding; + for (charCode = 0; charCode < baseEncoding.length; charCode++) { + glyphId = glyphNames.indexOf(baseEncoding[charCode]); + if (glyphId >= 0) { + charCodeToGlyphId[charCode] = glyphId; + } else { + charCodeToGlyphId[charCode] = 0; // notdef + } + } + } + + // Lastly, merge in the differences. + var differences = properties.differences, + glyphsUnicodeMap; + if (differences) { + for (charCode in differences) { + var glyphName = differences[charCode]; + glyphId = glyphNames.indexOf(glyphName); + + if (glyphId === -1) { + if (!glyphsUnicodeMap) { + glyphsUnicodeMap = getGlyphsUnicode(); + } + var standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap); + if (standardGlyphName !== glyphName) { + glyphId = glyphNames.indexOf(standardGlyphName); + } + } + if (glyphId >= 0) { + charCodeToGlyphId[charCode] = glyphId; + } else { + charCodeToGlyphId[charCode] = 0; // notdef + } + } + } + return charCodeToGlyphId; +} + +export { + FontFlags, + getFontType, + MacStandardGlyphOrdering, + recoverGlyphName, + SEAC_ANALYSIS_ENABLED, + type1FontGlyphMapping, +}; diff --git a/test/unit/cff_parser_spec.js b/test/unit/cff_parser_spec.js index 6eaa0a0b8..43a979269 100644 --- a/test/unit/cff_parser_spec.js +++ b/test/unit/cff_parser_spec.js @@ -20,7 +20,7 @@ import { CFFParser, CFFStrings, } from "../../src/core/cff_parser.js"; -import { SEAC_ANALYSIS_ENABLED } from "../../src/core/fonts.js"; +import { SEAC_ANALYSIS_ENABLED } from "../../src/core/fonts_utils.js"; import { Stream } from "../../src/core/stream.js"; describe("CFFParser", function () { diff --git a/test/unit/type1_parser_spec.js b/test/unit/type1_parser_spec.js index db02bc0e8..00929a33c 100644 --- a/test/unit/type1_parser_spec.js +++ b/test/unit/type1_parser_spec.js @@ -13,7 +13,7 @@ * limitations under the License. */ -import { SEAC_ANALYSIS_ENABLED } from "../../src/core/fonts.js"; +import { SEAC_ANALYSIS_ENABLED } from "../../src/core/fonts_utils.js"; import { StringStream } from "../../src/core/stream.js"; import { Type1Parser } from "../../src/core/type1_parser.js";