Merge pull request #17701 from calixteman/alt_text_ai

[Editor] Add the possibility to query some ML stuff to guess an alt text for an image
This commit is contained in:
calixteman 2024-02-21 10:14:40 +01:00 committed by GitHub
commit 72b8b29147
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 102 additions and 6 deletions

View File

@ -45,6 +45,10 @@
"type": "boolean",
"default": false
},
"enableML": {
"type": "boolean",
"default": false
},
"cursorToolOnLoad": {
"title": "Cursor tool on load",
"description": "The cursor tool that is enabled upon load.\n 0 = Text selection tool.\n 1 = Hand tool.",

View File

@ -76,6 +76,10 @@ class AltText {
this.#altTextWasFromKeyBoard = false;
}
isEmpty() {
return !this.#altText && !this.#altTextDecorative;
}
get data() {
return {
altText: this.#altText,

View File

@ -970,6 +970,10 @@ class AnnotationEditor {
this.#altText.data = data;
}
hasAltText() {
return !this.#altText?.isEmpty();
}
/**
* Render this editor in a div.
* @returns {HTMLDivElement | null}

View File

@ -431,6 +431,42 @@ class StampEditor extends AnnotationEditor {
const bitmap = this.#isSvg
? this.#bitmap
: this.#scaleBitmap(width, height);
if (this._uiManager.hasMLManager && !this.hasAltText()) {
const offscreen = new OffscreenCanvas(width, height);
const ctx = offscreen.getContext("2d");
ctx.drawImage(
bitmap,
0,
0,
bitmap.width,
bitmap.height,
0,
0,
width,
height
);
offscreen.convertToBlob().then(blob => {
const fileReader = new FileReader();
fileReader.onload = () => {
const url = fileReader.result;
this._uiManager
.mlGuess({
service: "image-to-text",
request: {
imageData: url,
},
})
.then(response => {
const altText = response?.output || "";
if (this.parent && altText && !this.hasAltText()) {
this.altTextData = { altText, decorative: false };
}
});
};
fileReader.readAsDataURL(blob);
});
}
const ctx = canvas.getContext("2d");
ctx.filter = this._uiManager.hcmFilter;
ctx.drawImage(

View File

@ -563,6 +563,8 @@ class AnnotationEditorUIManager {
#mainHighlightColorPicker = null;
#mlManager = null;
#mode = AnnotationEditorType.NONE;
#selectedEditors = new Set();
@ -749,7 +751,8 @@ class AnnotationEditorUIManager {
eventBus,
pdfDocument,
pageColors,
highlightColors
highlightColors,
mlManager
) {
this.#container = container;
this.#viewer = viewer;
@ -763,6 +766,7 @@ class AnnotationEditorUIManager {
this.#filterFactory = pdfDocument.filterFactory;
this.#pageColors = pageColors;
this.#highlightColors = highlightColors || null;
this.#mlManager = mlManager || null;
this.viewParameters = {
realScale: PixelsPerInch.PDF_TO_CSS_UNITS,
rotation: 0,
@ -797,6 +801,14 @@ class AnnotationEditorUIManager {
}
}
async mlGuess(data) {
return this.#mlManager?.guess(data) || null;
}
get hasMLManager() {
return !!this.#mlManager;
}
get hcmFilter() {
return shadow(
this,

View File

@ -53,7 +53,7 @@ import {
} from "pdfjs-lib";
import { AppOptions, OptionKind } from "./app_options.js";
import { AutomationEventBus, EventBus } from "./event_utils.js";
import { ExternalServices, initCom } from "web-external_services";
import { ExternalServices, initCom, MLManager } from "web-external_services";
import { LinkTarget, PDFLinkService } from "./pdf_link_service.js";
import { AltTextManager } from "web-alt_text_manager";
import { AnnotationEditorParams } from "web-annotation_editor_params";
@ -420,6 +420,7 @@ const PDFViewerApplication = {
maxCanvasPixels: AppOptions.get("maxCanvasPixels"),
enablePermissions: AppOptions.get("enablePermissions"),
pageColors,
mlManager: this.mlManager,
});
this.pdfViewer = pdfViewer;
@ -682,6 +683,14 @@ const PDFViewerApplication = {
return shadow(this, "externalServices", new ExternalServices());
},
get mlManager() {
return shadow(
this,
"mlManager",
AppOptions.get("enableML") === true ? new MLManager() : null
);
},
get initialized() {
return this._initializedCapability.settled;
},

View File

@ -143,6 +143,11 @@ const defaultOptions = {
value: typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING"),
kind: OptionKind.VIEWER + OptionKind.PREFERENCE,
},
enableML: {
/** @type {boolean} */
value: false,
kind: OptionKind.VIEWER + OptionKind.PREFERENCE,
},
enablePermissions: {
/** @type {boolean} */
value: false,

View File

@ -435,4 +435,10 @@ class ExternalServices extends BaseExternalServices {
}
}
export { ExternalServices, initCom, Preferences };
class MLManager {
async guess() {
return null;
}
}
export { ExternalServices, initCom, MLManager, Preferences };

View File

@ -314,6 +314,12 @@ class FirefoxScripting {
}
}
class MLManager {
guess(data) {
return FirefoxCom.requestAsync("mlGuess", data);
}
}
class ExternalServices extends BaseExternalServices {
updateFindControlState(data) {
FirefoxCom.request("updateFindControlState", data);
@ -415,4 +421,4 @@ class ExternalServices extends BaseExternalServices {
}
}
export { DownloadManager, ExternalServices, initCom, Preferences };
export { DownloadManager, ExternalServices, initCom, MLManager, Preferences };

View File

@ -47,4 +47,10 @@ class ExternalServices extends BaseExternalServices {
}
}
export { ExternalServices, initCom, Preferences };
class MLManager {
async guess() {
return null;
}
}
export { ExternalServices, initCom, MLManager, Preferences };

View File

@ -216,6 +216,8 @@ class PDFViewer {
#enablePermissions = false;
#mlManager = null;
#getAllTextInProgress = false;
#hiddenCopyElement = null;
@ -292,6 +294,7 @@ class PDFViewer {
}
this.#enablePermissions = options.enablePermissions || false;
this.pageColors = options.pageColors || null;
this.#mlManager = options.mlManager || null;
this.defaultRenderingQueue = !options.renderingQueue;
if (
@ -857,7 +860,8 @@ class PDFViewer {
this.eventBus,
pdfDocument,
this.pageColors,
this.#annotationEditorHighlightColors
this.#annotationEditorHighlightColors,
this.#mlManager
);
this.eventBus.dispatch("annotationeditoruimanager", {
source: this,