From 46416bb131bc35c54e8d2a381f30c7b1e3eff639 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Tue, 20 Feb 2024 09:49:20 +0100 Subject: [PATCH] [Editor] Add the possibility to query some ML stuff to guess an alt text for an image It's only for an experimental purpose. --- extensions/chromium/preferences_schema.json | 4 +++ src/display/editor/alt_text.js | 4 +++ src/display/editor/editor.js | 4 +++ src/display/editor/stamp.js | 36 +++++++++++++++++++++ src/display/editor/tools.js | 14 +++++++- web/app.js | 11 ++++++- web/app_options.js | 5 +++ web/chromecom.js | 8 ++++- web/firefoxcom.js | 8 ++++- web/genericcom.js | 8 ++++- web/pdf_viewer.js | 6 +++- 11 files changed, 102 insertions(+), 6 deletions(-) diff --git a/extensions/chromium/preferences_schema.json b/extensions/chromium/preferences_schema.json index c8500a0e1..7c00e57e6 100644 --- a/extensions/chromium/preferences_schema.json +++ b/extensions/chromium/preferences_schema.json @@ -45,6 +45,10 @@ "type": "boolean", "default": false }, + "enableML": { + "type": "boolean", + "default": false + }, "cursorToolOnLoad": { "title": "Cursor tool on load", "description": "The cursor tool that is enabled upon load.\n 0 = Text selection tool.\n 1 = Hand tool.", diff --git a/src/display/editor/alt_text.js b/src/display/editor/alt_text.js index dbe634b7e..c720b93d0 100644 --- a/src/display/editor/alt_text.js +++ b/src/display/editor/alt_text.js @@ -76,6 +76,10 @@ class AltText { this.#altTextWasFromKeyBoard = false; } + isEmpty() { + return !this.#altText && !this.#altTextDecorative; + } + get data() { return { altText: this.#altText, diff --git a/src/display/editor/editor.js b/src/display/editor/editor.js index d5547aee8..cd26b23a6 100644 --- a/src/display/editor/editor.js +++ b/src/display/editor/editor.js @@ -970,6 +970,10 @@ class AnnotationEditor { this.#altText.data = data; } + hasAltText() { + return !this.#altText?.isEmpty(); + } + /** * Render this editor in a div. * @returns {HTMLDivElement | null} diff --git a/src/display/editor/stamp.js b/src/display/editor/stamp.js index 30fa96cd0..132610f85 100644 --- a/src/display/editor/stamp.js +++ b/src/display/editor/stamp.js @@ -431,6 +431,42 @@ class StampEditor extends AnnotationEditor { const bitmap = this.#isSvg ? this.#bitmap : this.#scaleBitmap(width, height); + + if (this._uiManager.hasMLManager && !this.hasAltText()) { + const offscreen = new OffscreenCanvas(width, height); + const ctx = offscreen.getContext("2d"); + ctx.drawImage( + bitmap, + 0, + 0, + bitmap.width, + bitmap.height, + 0, + 0, + width, + height + ); + offscreen.convertToBlob().then(blob => { + const fileReader = new FileReader(); + fileReader.onload = () => { + const url = fileReader.result; + this._uiManager + .mlGuess({ + service: "image-to-text", + request: { + imageData: url, + }, + }) + .then(response => { + const altText = response?.output || ""; + if (this.parent && altText && !this.hasAltText()) { + this.altTextData = { altText, decorative: false }; + } + }); + }; + fileReader.readAsDataURL(blob); + }); + } const ctx = canvas.getContext("2d"); ctx.filter = this._uiManager.hcmFilter; ctx.drawImage( diff --git a/src/display/editor/tools.js b/src/display/editor/tools.js index 0b787e1d7..e6ee222c5 100644 --- a/src/display/editor/tools.js +++ b/src/display/editor/tools.js @@ -563,6 +563,8 @@ class AnnotationEditorUIManager { #mainHighlightColorPicker = null; + #mlManager = null; + #mode = AnnotationEditorType.NONE; #selectedEditors = new Set(); @@ -749,7 +751,8 @@ class AnnotationEditorUIManager { eventBus, pdfDocument, pageColors, - highlightColors + highlightColors, + mlManager ) { this.#container = container; this.#viewer = viewer; @@ -763,6 +766,7 @@ class AnnotationEditorUIManager { this.#filterFactory = pdfDocument.filterFactory; this.#pageColors = pageColors; this.#highlightColors = highlightColors || null; + this.#mlManager = mlManager || null; this.viewParameters = { realScale: PixelsPerInch.PDF_TO_CSS_UNITS, rotation: 0, @@ -797,6 +801,14 @@ class AnnotationEditorUIManager { } } + async mlGuess(data) { + return this.#mlManager?.guess(data) || null; + } + + get hasMLManager() { + return !!this.#mlManager; + } + get hcmFilter() { return shadow( this, diff --git a/web/app.js b/web/app.js index 1d91bd3d5..49cf9a9f5 100644 --- a/web/app.js +++ b/web/app.js @@ -53,7 +53,7 @@ import { } from "pdfjs-lib"; import { AppOptions, OptionKind } from "./app_options.js"; import { AutomationEventBus, EventBus } from "./event_utils.js"; -import { ExternalServices, initCom } from "web-external_services"; +import { ExternalServices, initCom, MLManager } from "web-external_services"; import { LinkTarget, PDFLinkService } from "./pdf_link_service.js"; import { AltTextManager } from "web-alt_text_manager"; import { AnnotationEditorParams } from "web-annotation_editor_params"; @@ -420,6 +420,7 @@ const PDFViewerApplication = { maxCanvasPixels: AppOptions.get("maxCanvasPixels"), enablePermissions: AppOptions.get("enablePermissions"), pageColors, + mlManager: this.mlManager, }); this.pdfViewer = pdfViewer; @@ -682,6 +683,14 @@ const PDFViewerApplication = { return shadow(this, "externalServices", new ExternalServices()); }, + get mlManager() { + return shadow( + this, + "mlManager", + AppOptions.get("enableML") === true ? new MLManager() : null + ); + }, + get initialized() { return this._initializedCapability.settled; }, diff --git a/web/app_options.js b/web/app_options.js index 998a6ed2c..80fdc9c24 100644 --- a/web/app_options.js +++ b/web/app_options.js @@ -143,6 +143,11 @@ const defaultOptions = { value: typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING"), kind: OptionKind.VIEWER + OptionKind.PREFERENCE, }, + enableML: { + /** @type {boolean} */ + value: false, + kind: OptionKind.VIEWER + OptionKind.PREFERENCE, + }, enablePermissions: { /** @type {boolean} */ value: false, diff --git a/web/chromecom.js b/web/chromecom.js index c1e3b3f10..403a9e1a9 100644 --- a/web/chromecom.js +++ b/web/chromecom.js @@ -435,4 +435,10 @@ class ExternalServices extends BaseExternalServices { } } -export { ExternalServices, initCom, Preferences }; +class MLManager { + async guess() { + return null; + } +} + +export { ExternalServices, initCom, MLManager, Preferences }; diff --git a/web/firefoxcom.js b/web/firefoxcom.js index e16238664..a2fb5ac7b 100644 --- a/web/firefoxcom.js +++ b/web/firefoxcom.js @@ -314,6 +314,12 @@ class FirefoxScripting { } } +class MLManager { + guess(data) { + return FirefoxCom.requestAsync("mlGuess", data); + } +} + class ExternalServices extends BaseExternalServices { updateFindControlState(data) { FirefoxCom.request("updateFindControlState", data); @@ -415,4 +421,4 @@ class ExternalServices extends BaseExternalServices { } } -export { DownloadManager, ExternalServices, initCom, Preferences }; +export { DownloadManager, ExternalServices, initCom, MLManager, Preferences }; diff --git a/web/genericcom.js b/web/genericcom.js index def4988a3..996051018 100644 --- a/web/genericcom.js +++ b/web/genericcom.js @@ -47,4 +47,10 @@ class ExternalServices extends BaseExternalServices { } } -export { ExternalServices, initCom, Preferences }; +class MLManager { + async guess() { + return null; + } +} + +export { ExternalServices, initCom, MLManager, Preferences }; diff --git a/web/pdf_viewer.js b/web/pdf_viewer.js index b1943dd9c..4f88bcb1d 100644 --- a/web/pdf_viewer.js +++ b/web/pdf_viewer.js @@ -216,6 +216,8 @@ class PDFViewer { #enablePermissions = false; + #mlManager = null; + #getAllTextInProgress = false; #hiddenCopyElement = null; @@ -292,6 +294,7 @@ class PDFViewer { } this.#enablePermissions = options.enablePermissions || false; this.pageColors = options.pageColors || null; + this.#mlManager = options.mlManager || null; this.defaultRenderingQueue = !options.renderingQueue; if ( @@ -857,7 +860,8 @@ class PDFViewer { this.eventBus, pdfDocument, this.pageColors, - this.#annotationEditorHighlightColors + this.#annotationEditorHighlightColors, + this.#mlManager ); this.eventBus.dispatch("annotationeditoruimanager", { source: this,