From c42887221a268a6e84f41ab8c046d0f7ae93effe Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Thu, 2 Sep 2021 11:41:20 +0200 Subject: [PATCH] Simplify some regular expressions There's a fair number of regular expressions througout the code-base which are slightly more verbose than strictly necessary, in particular: - We have a lot of regular expressions that use `[0-9]` explicitly, and those can be simplified to use `\d` instead. - We have one instance of a regular expression containing a `A-Za-z0-9_` sequence, which can be simplified to use `\w` instead. --- src/core/core_utils.js | 7 ++----- src/core/document.js | 4 ++-- src/core/xfa/formcalc_lexer.js | 4 ++-- src/core/xfa/template.js | 4 ++-- src/core/xfa/utils.js | 2 +- src/scripting_api/aform.js | 10 +++++----- src/scripting_api/util.js | 30 +++++++++++++++--------------- test/unit/annotation_spec.js | 12 ++++++------ 8 files changed, 35 insertions(+), 38 deletions(-) diff --git a/src/core/core_utils.js b/src/core/core_utils.js index 7282eb0e5..d91455296 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -208,7 +208,7 @@ function isWhiteSpace(ch) { * each part of the path. */ function parseXFAPath(path) { - const positionPattern = /(.+)\[([0-9]+)\]$/; + const positionPattern = /(.+)\[(\d+)\]$/; return path.split(".").map(component => { const m = component.match(positionPattern); if (m) { @@ -428,10 +428,7 @@ function validateCSSFont(cssFontInfo) { } else { // See https://developer.mozilla.org/en-US/docs/Web/CSS/custom-ident. for (const ident of fontFamily.split(/[ \t]+/)) { - if ( - /^([0-9]|(-([0-9]|-)))/.test(ident) || - !/^[a-zA-Z0-9\-_\\]+$/.test(ident) - ) { + if (/^(\d|(-(\d|-)))/.test(ident) || !/^[\w-\\]+$/.test(ident)) { warn( `XFA - FontFamily contains some invalid : ${fontFamily}.` ); diff --git a/src/core/document.js b/src/core/document.js index 9d35432a5..9262a0cba 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -576,7 +576,7 @@ const FINGERPRINT_FIRST_BYTES = 1024; const EMPTY_FINGERPRINT = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; -const PDF_HEADER_VERSION_REGEXP = /^[1-9]\.[0-9]$/; +const PDF_HEADER_VERSION_REGEXP = /^[1-9]\.\d$/; function find(stream, signature, limit = 1024, backwards = false) { if ( @@ -988,7 +988,7 @@ class PDFDocument { } let fontFamily = descriptor.get("FontFamily"); // For example, "Wingdings 3" is not a valid font name in the css specs. - fontFamily = fontFamily.replace(/[ ]+([0-9])/g, "$1"); + fontFamily = fontFamily.replace(/[ ]+(\d)/g, "$1"); const fontWeight = descriptor.get("FontWeight"); // Angle is expressed in degrees counterclockwise in PDF diff --git a/src/core/xfa/formcalc_lexer.js b/src/core/xfa/formcalc_lexer.js index edafeee8a..559fda0a5 100644 --- a/src/core/xfa/formcalc_lexer.js +++ b/src/core/xfa/formcalc_lexer.js @@ -117,8 +117,8 @@ const TOKEN = { }; const hexPattern = /^[uU]([0-9a-fA-F]{4,8})/; -const numberPattern = /^[0-9]*(?:\.[0-9]*)?(?:[Ee][+-]?[0-9]+)?/; -const dotNumberPattern = /^[0-9]*(?:[Ee][+-]?[0-9]+)?/; +const numberPattern = /^\d*(?:\.\d*)?(?:[Ee][+-]?\d+)?/; +const dotNumberPattern = /^\d*(?:[Ee][+-]?\d+)?/; const eolPattern = /[\r\n]+/; const identifierPattern = new RegExp("^[\\p{L}_$!][\\p{L}\\p{N}_$]*", "u"); diff --git a/src/core/xfa/template.js b/src/core/xfa/template.js index 0c63c98bc..d1f64a016 100644 --- a/src/core/xfa/template.js +++ b/src/core/xfa/template.js @@ -657,7 +657,7 @@ class Barcode extends XFAObject { "shift-jis", "ucs-2", "utf-16", - ].includes(k) || k.match(/iso-8859-[0-9]{2}/), + ].includes(k) || k.match(/iso-8859-\d{2}/), }); this.checksum = getStringOption(attributes.checksum, [ "none", @@ -5274,7 +5274,7 @@ class Submit extends XFAObject { "shift-jis", "ucs-2", "utf-16", - ].includes(k) || k.match(/iso-8859-[0-9]{2}/), + ].includes(k) || k.match(/iso-8859-\d{2}/), }); this.use = attributes.use || ""; this.usehref = attributes.usehref || ""; diff --git a/src/core/xfa/utils.js b/src/core/xfa/utils.js index 4a4ebc742..5d5cf4cc7 100644 --- a/src/core/xfa/utils.js +++ b/src/core/xfa/utils.js @@ -22,7 +22,7 @@ const dimConverters = { in: x => x * 72, px: x => x, }; -const measurementPattern = /([+-]?[0-9]+\.?[0-9]*)(.*)/; +const measurementPattern = /([+-]?\d+\.?\d*)(.*)/; function stripQuotes(str) { if (str.startsWith("'") || str.startsWith('"')) { diff --git a/src/scripting_api/aform.js b/src/scripting_api/aform.js index 6dae73109..c91d72786 100644 --- a/src/scripting_api/aform.js +++ b/src/scripting_api/aform.js @@ -90,7 +90,7 @@ class AForm { str = `0${str}`; } - const numbers = str.match(/([0-9]+)/g); + const numbers = str.match(/(\d+)/g); if (numbers.length === 0) { return null; } @@ -202,13 +202,13 @@ class AForm { if (sepStyle > 1) { // comma sep pattern = event.willCommit - ? /^[+-]?([0-9]+(,[0-9]*)?|,[0-9]+)$/ - : /^[+-]?[0-9]*,?[0-9]*$/; + ? /^[+-]?(\d+(,\d*)?|,\d+)$/ + : /^[+-]?\d*,?\d*$/; } else { // dot sep pattern = event.willCommit - ? /^[+-]?([0-9]+(\.[0-9]*)?|\.[0-9]+)$/ - : /^[+-]?[0-9]*\.?[0-9]*$/; + ? /^[+-]?(\d+(\.\d*)?|\.\d+)$/ + : /^[+-]?\d*\.?\d*$/; } if (!pattern.test(value)) { diff --git a/src/scripting_api/util.js b/src/scripting_api/util.js index f903d7cba..613d5fe93 100644 --- a/src/scripting_api/util.js +++ b/src/scripting_api/util.js @@ -59,7 +59,7 @@ class Util extends PDFObject { throw new TypeError("First argument of printf must be a string"); } - const pattern = /%(,[0-4])?([+ 0#]+)?([0-9]+)?(\.[0-9]+)?(.)/g; + const pattern = /%(,[0-4])?([+ 0#]+)?(\d+)?(\.\d+)?(.)/g; const PLUS = 1; const SPACE = 2; const ZERO = 4; @@ -406,13 +406,13 @@ class Util extends PDFObject { }, }, mm: { - pattern: `([0-9]{2})`, + pattern: `(\\d{2})`, action: (value, data) => { data.month = parseInt(value) - 1; }, }, m: { - pattern: `([0-9]{1,2})`, + pattern: `(\\d{1,2})`, action: (value, data) => { data.month = parseInt(value) - 1; }, @@ -430,73 +430,73 @@ class Util extends PDFObject { }, }, dd: { - pattern: "([0-9]{2})", + pattern: "(\\d{2})", action: (value, data) => { data.day = parseInt(value); }, }, d: { - pattern: "([0-9]{1,2})", + pattern: "(\\d{1,2})", action: (value, data) => { data.day = parseInt(value); }, }, yyyy: { - pattern: "([0-9]{4})", + pattern: "(\\d{4})", action: (value, data) => { data.year = parseInt(value); }, }, yy: { - pattern: "([0-9]{2})", + pattern: "(\\d{2})", action: (value, data) => { data.year = 2000 + parseInt(value); }, }, HH: { - pattern: "([0-9]{2})", + pattern: "(\\d{2})", action: (value, data) => { data.hours = parseInt(value); }, }, H: { - pattern: "([0-9]{1,2})", + pattern: "(\\d{1,2})", action: (value, data) => { data.hours = parseInt(value); }, }, hh: { - pattern: "([0-9]{2})", + pattern: "(\\d{2})", action: (value, data) => { data.hours = parseInt(value); }, }, h: { - pattern: "([0-9]{1,2})", + pattern: "(\\d{1,2})", action: (value, data) => { data.hours = parseInt(value); }, }, MM: { - pattern: "([0-9]{2})", + pattern: "(\\d{2})", action: (value, data) => { data.minutes = parseInt(value); }, }, M: { - pattern: "([0-9]{1,2})", + pattern: "(\\d{1,2})", action: (value, data) => { data.minutes = parseInt(value); }, }, ss: { - pattern: "([0-9]{2})", + pattern: "(\\d{2})", action: (value, data) => { data.seconds = parseInt(value); }, }, s: { - pattern: "([0-9]{1,2})", + pattern: "(\\d{1,2})", action: (value, data) => { data.seconds = parseInt(value); }, diff --git a/test/unit/annotation_spec.js b/test/unit/annotation_spec.js index 8fb5fcffd..6434b19c6 100644 --- a/test/unit/annotation_spec.js +++ b/test/unit/annotation_spec.js @@ -2036,7 +2036,7 @@ describe("annotation", function () { expect(oldData.ref).toEqual(Ref.get(123, 0)); expect(newData.ref).toEqual(Ref.get(2, 0)); - oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)"); + oldData.data = oldData.data.replace(/\(D:\d+\)/, "(date)"); expect(oldData.data).toEqual( "123 0 obj\n" + "<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Helv 5 Tf) /DR " + @@ -2167,7 +2167,7 @@ describe("annotation", function () { expect(oldData.ref).toEqual(Ref.get(123, 0)); expect(newData.ref).toEqual(Ref.get(2, 0)); - oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)"); + oldData.data = oldData.data.replace(/\(D:\d+\)/, "(date)"); expect(oldData.data).toEqual( "123 0 obj\n" + "<< /Type /Annot /Subtype /Widget /FT /Tx /DA (/Goth 5 Tf) /DR " + @@ -2576,7 +2576,7 @@ describe("annotation", function () { task, annotationStorage ); - oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)"); + oldData.data = oldData.data.replace(/\(D:\d+\)/, "(date)"); expect(oldData.ref).toEqual(Ref.get(123, 0)); expect(oldData.data).toEqual( "123 0 obj\n" + @@ -2876,7 +2876,7 @@ describe("annotation", function () { ); expect(data.length).toEqual(2); const [radioData, parentData] = data; - radioData.data = radioData.data.replace(/\(D:[0-9]+\)/, "(date)"); + radioData.data = radioData.data.replace(/\(D:\d+\)/, "(date)"); expect(radioData.ref).toEqual(Ref.get(123, 0)); expect(radioData.data).toEqual( "123 0 obj\n" + @@ -2939,7 +2939,7 @@ describe("annotation", function () { ); expect(data.length).toEqual(2); const [radioData, parentData] = data; - radioData.data = radioData.data.replace(/\(D:[0-9]+\)/, "(date)"); + radioData.data = radioData.data.replace(/\(D:\d+\)/, "(date)"); expect(radioData.ref).toEqual(Ref.get(123, 0)); expect(radioData.data).toEqual( "123 0 obj\n" + @@ -3389,7 +3389,7 @@ describe("annotation", function () { expect(oldData.ref).toEqual(Ref.get(123, 0)); expect(newData.ref).toEqual(Ref.get(1, 0)); - oldData.data = oldData.data.replace(/\(D:[0-9]+\)/, "(date)"); + oldData.data = oldData.data.replace(/\(D:\d+\)/, "(date)"); expect(oldData.data).toEqual( "123 0 obj\n" + "<< /Type /Annot /Subtype /Widget /FT /Ch /DA (/Helv 5 Tf) /DR " +