From 0cd28ebfa35d334d25523ca0994c3cf1b984944f Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Mon, 16 Jun 2014 09:52:04 -0500 Subject: [PATCH] Telemetry for used stream and font types --- .../firefox/content/PdfJsTelemetry-addon.jsm | 7 +++- extensions/firefox/content/PdfJsTelemetry.jsm | 4 +++ .../firefox/content/PdfStreamConverter.jsm | 35 +++++++++++++----- src/core/evaluator.js | 35 +++++++++++++----- src/core/fonts.js | 36 +++++++++++++++++-- src/core/obj.js | 8 +++-- src/core/parser.js | 13 ++++++- src/core/worker.js | 6 ++++ src/display/api.js | 19 ++++++++++ src/shared/util.js | 27 ++++++++++++++ web/page_view.js | 8 ++++- 11 files changed, 174 insertions(+), 24 deletions(-) diff --git a/extensions/firefox/content/PdfJsTelemetry-addon.jsm b/extensions/firefox/content/PdfJsTelemetry-addon.jsm index c29d5ab83..e22903d97 100644 --- a/extensions/firefox/content/PdfJsTelemetry-addon.jsm +++ b/extensions/firefox/content/PdfJsTelemetry-addon.jsm @@ -31,9 +31,10 @@ Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_FALLBACK_SHOWN", 1, 2, 3, Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_VERSION", 1, 10, 11, Telemetry.HISTOGRAM_LINEAR); Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_GENERATOR", 1, 25, 26, Telemetry.HISTOGRAM_LINEAR); Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_SIZE_KB", 2, 64 * 1024, 20, Telemetry.HISTOGRAM_EXPONENTIAL); +Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_FONT_TYPES", 1, 19, 20, Telemetry.HISTOGRAM_LINEAR); Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_FORM", 1, 2, 3, Telemetry.HISTOGRAM_BOOLEAN); Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_PRINT", 1, 2, 3, Telemetry.HISTOGRAM_BOOLEAN); -Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_STREAM_TYPES", 1, 9, 10, Telemetry.HISTOGRAM_LINEAR); +Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_STREAM_TYPES", 1, 19, 20, Telemetry.HISTOGRAM_LINEAR); Telemetry.registerAddonHistogram(ADDON_ID, "PDF_VIEWER_TIME_TO_VIEW_MS", 1, 10000, 50, Telemetry.HISTOGRAM_EXPONENTIAL); @@ -58,6 +59,10 @@ this.PdfJsTelemetry = { let histogram = Telemetry.getAddonHistogram(ADDON_ID, "PDF_VIEWER_DOCUMENT_GENERATOR"); histogram.add(generatorId); }, + onFontType: function (fontTypeId) { + let histogram = Telemetry.getAddonHistogram(ADDON_ID, "PDF_VIEWER_FONT_TYPES"); + histogram.add(fontTypeId); + }, onForm: function (isAcroform) { let histogram = Telemetry.getAddonHistogram(ADDON_ID, "PDF_VIEWER_FORM"); histogram.add(isAcroform); diff --git a/extensions/firefox/content/PdfJsTelemetry.jsm b/extensions/firefox/content/PdfJsTelemetry.jsm index 5d1691d9d..dd5c0c063 100644 --- a/extensions/firefox/content/PdfJsTelemetry.jsm +++ b/extensions/firefox/content/PdfJsTelemetry.jsm @@ -44,6 +44,10 @@ this.PdfJsTelemetry = { let histogram = Services.telemetry.getHistogramById("PDF_VIEWER_DOCUMENT_GENERATOR"); histogram.add(generatorId); }, + onFontType: function (fontTypeId) { + let histogram = Services.telemetry.getHistogramById("PDF_VIEWER_FONT_TYPES"); + histogram.add(fontTypeId); + }, onForm: function (isAcroform) { let histogram = Services.telemetry.getHistogramById("PDF_VIEWER_FORM"); histogram.add(isAcroform); diff --git a/extensions/firefox/content/PdfStreamConverter.jsm b/extensions/firefox/content/PdfStreamConverter.jsm index 7e7aed78d..03b319e07 100644 --- a/extensions/firefox/content/PdfStreamConverter.jsm +++ b/extensions/firefox/content/PdfStreamConverter.jsm @@ -247,6 +247,7 @@ function ChromeActions(domWindow, contentDispositionFilename) { documentInfo: false, firstPageInfo: false, streamTypesUsed: [], + fontTypesUsed: [], startAt: Date.now() }; } @@ -388,16 +389,34 @@ ChromeActions.prototype = { this.telemetryState.firstPageInfo = true; } break; - case 'streamInfo': - if (!Array.isArray(probeInfo.streamTypes)) { + case 'documentStats': + // documentStats can be called several times for one documents. + // if stream/font types are reported, trying not to submit the same + // enumeration value multiple times. + var documentStats = probeInfo.stats; + if (!documentStats || typeof documentStats !== 'object') { break; } - for (var i = 0; i < probeInfo.streamTypes.length; i++) { - var streamTypeId = probeInfo.streamTypes[i] | 0; - if (streamTypeId >= 0 && streamTypeId < 10 && - !this.telemetryState.streamTypesUsed[streamTypeId]) { - PdfJsTelemetry.onStreamType(streamTypeId); - this.telemetryState.streamTypesUsed[streamTypeId] = true; + var streamTypes = documentStats.streamTypes; + if (Array.isArray(streamTypes)) { + var STREAM_TYPE_ID_LIMIT = 20; + for (var i = 0; i < STREAM_TYPE_ID_LIMIT; i++) { + if (streamTypes[i] && + !this.telemetryState.streamTypesUsed[i]) { + PdfJsTelemetry.onStreamType(i); + this.telemetryState.streamTypesUsed[i] = true; + } + } + } + var fontTypes = documentStats.fontTypes; + if (Array.isArray(fontTypes)) { + var FONT_TYPE_ID_LIMIT = 20; + for (var i = 0; i < FONT_TYPE_ID_LIMIT; i++) { + if (fontTypes[i] && + !this.telemetryState.fontTypesUsed[i]) { + PdfJsTelemetry.onFontType(i); + this.telemetryState.fontTypesUsed[i] = true; + } } } break; diff --git a/src/core/evaluator.js b/src/core/evaluator.js index e353280cd..0fb8df28f 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -22,7 +22,8 @@ stdFontMap, symbolsFonts, getTilingPatternIR, warn, Util, Promise, RefSetCache, isRef, TextRenderingMode, CMapFactory, OPS, UNSUPPORTED_FEATURES, UnsupportedManager, NormalizedUnicodes, - IDENTITY_MATRIX, reverseIfRtl, createPromiseCapability */ + IDENTITY_MATRIX, reverseIfRtl, createPromiseCapability, + getFontType */ 'use strict'; @@ -546,11 +547,28 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } translatedPromise.then(function (translatedFont) { + if (translatedFont.fontType !== undefined) { + var xrefFontStats = xref.stats.fontTypes; + xrefFontStats[translatedFont.fontType] = true; + } + fontCapability.resolve(new TranslatedFont(font.loadedName, translatedFont, font)); }, function (reason) { // TODO fontCapability.reject? UnsupportedManager.notify(UNSUPPORTED_FEATURES.font); + + try { + // error, but it's still nice to have font type reported + var descriptor = preEvaluatedFont.descriptor; + var fontFile3 = descriptor && descriptor.get('FontFile3'); + var subtype = fontFile3 && fontFile3.get('Subtype'); + var fontType = getFontType(preEvaluatedFont.type, + subtype && subtype.name); + var xrefFontStats = xref.stats.fontTypes; + xrefFontStats[fontType] = true; + } catch (ex) { } + fontCapability.resolve(new TranslatedFont(font.loadedName, new ErrorFont(reason instanceof Error ? reason.message : reason), font)); @@ -1542,6 +1560,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { dict: dict, baseDict: baseDict, composite: composite, + type: type.name, hash: hash ? hash.hexdigest() : '' }; }, @@ -1552,16 +1571,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var dict = preEvaluatedFont.dict; var composite = preEvaluatedFont.composite; var descriptor = preEvaluatedFont.descriptor; - var type = dict.get('Subtype'); + var type = preEvaluatedFont.type; var maxCharIndex = (composite ? 0xFFFF : 0xFF); var properties; if (!descriptor) { - if (type.name === 'Type3') { + if (type === 'Type3') { // FontDescriptor is only required for Type3 fonts when the document // is a tagged pdf. Create a barbebones one to get by. descriptor = new Dict(null); - descriptor.set('FontName', Name.get(type.name)); + descriptor.set('FontName', Name.get(type)); } else { // Before PDF 1.5 if the font was one of the base 14 fonts, having a // FontDescriptor was not required. @@ -1584,7 +1603,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { FontFlags.Nonsymbolic); properties = { - type: type.name, + type: type, name: baseFontName, widths: metrics.widths, defaultWidth: metrics.defaultWidth, @@ -1617,7 +1636,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { baseFont = Name.get(baseFont); } - if (type.name !== 'Type3') { + if (type !== 'Type3') { var fontNameStr = fontName && fontName.name; var baseFontStr = baseFont && baseFont.name; if (fontNameStr !== baseFontStr) { @@ -1649,7 +1668,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } properties = { - type: type.name, + type: type, name: fontName.name, subtype: subtype, file: fontFile, @@ -1684,7 +1703,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { this.extractDataStructures(dict, baseDict, xref, properties); this.extractWidths(dict, xref, descriptor, properties); - if (type.name === 'Type3') { + if (type === 'Type3') { properties.isType3Font = true; } diff --git a/src/core/fonts.js b/src/core/fonts.js index 1e18e23ad..9f38c85fb 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -19,7 +19,7 @@ isNum, ISOAdobeCharset, Stream, stringToArray, stringToBytes, string32, TextDecoder, warn, Lexer, Util, FONT_IDENTITY_MATRIX, FontRendererFactory, shadow, isString, IdentityCMap, Name, - CMapFactory, PDFJS, readUint32 */ + CMapFactory, PDFJS, readUint32, FontType */ 'use strict'; @@ -2117,6 +2117,28 @@ function adjustWidths(properties) { properties.defaultWidth *= scale; } +function getFontType(type, subtype) { + switch (type) { + case 'Type1': + return subtype === 'Type1C' ? FontType.TYPE1C : FontType.TYPE1; + case 'CIDFontType0': + return subtype === 'CIDFontType0C' ? FontType.CIDFONTTYPE0C : + FontType.CIDFONTTYPE0; + case 'OpenType': + return FontType.OPENTYPE; + case 'TrueType': + return FontType.TRUETYPE; + case 'CIDFontType2': + return FontType.CIDFONTTYPE2; + case 'MMType1': + return FontType.MMTYPE1; + case 'Type0': + return FontType.TYPE0; + default: + return FontType.UNKNOWN; + } +} + var Glyph = (function GlyphClosure() { function Glyph(fontChar, unicode, accent, width, vmetric, operatorListId) { this.fontChar = fontChar; @@ -2167,6 +2189,7 @@ var Font = (function FontClosure() { this.isMonospace = !!(properties.flags & FontFlags.FixedPitch); var type = properties.type; + var subtype = properties.subtype; this.type = type; this.fallbackName = (this.isMonospace ? 'monospace' : @@ -2193,6 +2216,7 @@ var Font = (function FontClosure() { this.toFontChar[charCode] = (this.differences[charCode] || properties.defaultEncoding[charCode]); } + this.fontType = FontType.TYPE3; return; } @@ -2260,11 +2284,11 @@ var Font = (function FontClosure() { } this.loadedName = fontName.split('-')[0]; this.loading = false; + this.fontType = getFontType(type, subtype); return; } // Some fonts might use wrong font types for Type1C or CIDFontType0C - var subtype = properties.subtype; if (subtype == 'Type1C' && (type != 'Type1' && type != 'MMType1')) { // Some TrueType fonts by mistake claim Type1C if (isTrueTypeFile(file)) { @@ -2288,7 +2312,7 @@ var Font = (function FontClosure() { case 'CIDFontType0': this.mimetype = 'font/opentype'; - var cff = (subtype == 'Type1C' || subtype == 'CIDFontType0C') ? + var cff = (subtype === 'Type1C' || subtype === 'CIDFontType0C') ? new CFFFont(file, properties) : new Type1Font(name, file, properties); adjustWidths(properties); @@ -2305,6 +2329,9 @@ var Font = (function FontClosure() { // Repair the TrueType file. It is can be damaged in the point of // view of the sanitizer data = this.checkAndRepair(name, file, properties); + if (this.isOpenType) { + type = 'OpenType'; + } break; default: @@ -2313,6 +2340,7 @@ var Font = (function FontClosure() { } this.data = data; + this.fontType = getFontType(type, subtype); // Transfer some properties again that could change during font conversion this.fontMatrix = properties.fontMatrix; @@ -3752,10 +3780,12 @@ var Font = (function FontClosure() { delete tables.fpgm; delete tables.prep; delete tables['cvt ']; + this.isOpenType = true; } else { if (!tables.glyf || !tables.loca) { error('Required "glyf" or "loca" tables are not found'); } + this.isOpenType = false; } if (!tables.maxp) { diff --git a/src/core/obj.js b/src/core/obj.js index 858e006f4..af13e5a26 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -692,6 +692,10 @@ var XRef = (function XRefClosure() { // prepare the XRef cache this.cache = []; this.password = password; + this.stats = { + streamTypes: [], + fontTypes: [] + }; } XRef.prototype = { @@ -1040,7 +1044,7 @@ var XRef = (function XRefClosure() { var dict; for (i = 0, ii = trailers.length; i < ii; ++i) { stream.pos = trailers[i]; - var parser = new Parser(new Lexer(stream), true, null); + var parser = new Parser(new Lexer(stream), true, this); var obj = parser.getObj(); if (!isCmd(obj, 'trailer')) { continue; @@ -1072,7 +1076,7 @@ var XRef = (function XRefClosure() { stream.pos = startXRef + stream.start; - var parser = new Parser(new Lexer(stream), true, null); + var parser = new Parser(new Lexer(stream), true, this); var obj = parser.getObj(); var dict; diff --git a/src/core/parser.js b/src/core/parser.js index e1c196166..22d8c2767 100644 --- a/src/core/parser.js +++ b/src/core/parser.js @@ -17,7 +17,8 @@ /* globals Ascii85Stream, AsciiHexStream, CCITTFaxStream, Cmd, Dict, error, FlateStream, isArray, isCmd, isDict, isInt, isName, isNum, isRef, isString, Jbig2Stream, JpegStream, JpxStream, LZWStream, Name, - NullStream, PredictorStream, Ref, RunLengthStream, warn, info */ + NullStream, PredictorStream, Ref, RunLengthStream, warn, info, + StreamType */ 'use strict'; @@ -343,7 +344,9 @@ var Parser = (function ParserClosure() { if (stream.dict.get('Length') === 0) { return new NullStream(stream); } + var xrefStreamStats = this.xref.stats.streamTypes; if (name == 'FlateDecode' || name == 'Fl') { + xrefStreamStats[StreamType.FLATE] = true; if (params) { return new PredictorStream(new FlateStream(stream, maybeLength), maybeLength, params); @@ -351,6 +354,7 @@ var Parser = (function ParserClosure() { return new FlateStream(stream, maybeLength); } if (name == 'LZWDecode' || name == 'LZW') { + xrefStreamStats[StreamType.LZW] = true; var earlyChange = 1; if (params) { if (params.has('EarlyChange')) { @@ -363,24 +367,31 @@ var Parser = (function ParserClosure() { return new LZWStream(stream, maybeLength, earlyChange); } if (name == 'DCTDecode' || name == 'DCT') { + xrefStreamStats[StreamType.DCT] = true; return new JpegStream(stream, maybeLength, stream.dict, this.xref); } if (name == 'JPXDecode' || name == 'JPX') { + xrefStreamStats[StreamType.JPX] = true; return new JpxStream(stream, maybeLength, stream.dict); } if (name == 'ASCII85Decode' || name == 'A85') { + xrefStreamStats[StreamType.A85] = true; return new Ascii85Stream(stream, maybeLength); } if (name == 'ASCIIHexDecode' || name == 'AHx') { + xrefStreamStats[StreamType.AHX] = true; return new AsciiHexStream(stream, maybeLength); } if (name == 'CCITTFaxDecode' || name == 'CCF') { + xrefStreamStats[StreamType.CCF] = true; return new CCITTFaxStream(stream, maybeLength, params); } if (name == 'RunLengthDecode' || name == 'RL') { + xrefStreamStats[StreamType.RL] = true; return new RunLengthStream(stream, maybeLength); } if (name == 'JBIG2Decode') { + xrefStreamStats[StreamType.JBIG] = true; return new Jbig2Stream(stream, maybeLength, stream.dict); } warn('filter "' + name + '" not supported yet'); diff --git a/src/core/worker.js b/src/core/worker.js index 6d9fba27d..d8cdf8705 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -319,6 +319,12 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = { }); }); + handler.on('GetStats', + function wphSetupGetStats(data) { + return pdfManager.pdfDocument.xref.stats; + } + ); + handler.on('UpdatePassword', function wphSetupUpdatePassword(data) { pdfManager.updatePassword(data); }); diff --git a/src/display/api.js b/src/display/api.js index b5e9039f2..e1ece26c3 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -164,6 +164,14 @@ PDFJS.maxCanvasPixels = (PDFJS.maxCanvasPixels === undefined ? * loaded before the switch to range requests. */ +/** + * @typedef {Object} PDFDocumentStats + * @property {Array} streamTypes - Used stream types in the document (an item + * is set to true if specific stream ID was used in the document). + * @property {Array} fontTypes - Used font type in the document (an item is set + * to true if specific font ID was used in the document). + */ + /** * This is the main entry point for loading a PDF and interacting with it. * NOTE: If a URL is used to fetch the PDF data a standard XMLHttpRequest(XHR) @@ -331,6 +339,13 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() { getDownloadInfo: function PDFDocumentProxy_getDownloadInfo() { return this.transport.downloadInfoCapability.promise; }, + /** + * @returns {Promise} A promise this is resolved with current stats about + * document structures (see {@link PDFDocumentStats}). + */ + getStats: function PDFDocumentProxy_getStats() { + return this.transport.getStats(); + }, /** * Cleans up resources allocated by the document, e.g. created @font-face. */ @@ -1056,6 +1071,10 @@ var WorkerTransport = (function WorkerTransportClosure() { }); }, + getStats: function WorkerTransport_getStats() { + return this.messageHandler.sendWithPromise('GetStats', null); + }, + startCleanup: function WorkerTransport_startCleanup() { this.messageHandler.sendWithPromise('Cleanup', null). then(function endCleanup() { diff --git a/src/shared/util.js b/src/shared/util.js index ae4d85d40..3f1facde1 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -44,6 +44,33 @@ var ImageKind = { RGBA_32BPP: 3 }; +var StreamType = { + UNKNOWN: 0, + FLATE: 1, + LZW: 2, + DCT: 3, + JPX: 4, + JBIG: 5, + A85: 6, + AHX: 7, + CCF: 8, + RL: 9 +}; + +var FontType = { + UNKNOWN: 0, + TYPE1: 1, + TYPE1C: 2, + CIDFONTTYPE0: 3, + CIDFONTTYPE0C: 4, + TRUETYPE: 5, + CIDFONTTYPE2: 6, + TYPE3: 7, + OPENTYPE: 8, + TYPE0: 9, + MMTYPE1: 10 +}; + // The global PDFJS object exposes the API // In production, it will be declared outside a global wrapper // In development, it will be declared here diff --git a/web/page_view.js b/web/page_view.js index 856b1580f..636e4777e 100644 --- a/web/page_view.js +++ b/web/page_view.js @@ -630,7 +630,13 @@ var PageView = function pageView(container, id, scale, // FirefoxCom.request('reportTelemetry', JSON.stringify({ // type: 'pageInfo' // })); -// // TODO add stream types report here +// // It is a good time to report stream and font types +// PDFView.pdfDocument.getStats().then(function (stats) { +// FirefoxCom.request('reportTelemetry', JSON.stringify({ +// type: 'documentStats', +// stats: stats +// })); +// }); //#endif callback(); }