XFA - Add the possibily to layout and measure text

- some containers doesn't always have their 2 dimensions and those dimensions re based on contents;
  - so in order to measure text, we must get the glyph widths (for the xfa fonts) before starting the layout;
  - implement a word-wrap algorithm;
  - handle font change during text layout.
This commit is contained in:
Calixte Denizet 2021-06-14 19:16:42 +02:00
parent 335d4cb2fc
commit 8eeb7ab4a3
12 changed files with 416 additions and 91 deletions

View File

@ -857,6 +857,10 @@ class PDFDocument {
return shadow(this, "xfaFaxtory", null);
}
get isPureXfa() {
return this.xfaFactory && this.xfaFactory.isValid();
}
get htmlForXfa() {
if (this.xfaFactory) {
return this.xfaFactory.getPages();
@ -898,8 +902,14 @@ class PDFDocument {
options,
});
const operatorList = new OperatorList();
const pdfFonts = [];
const initialState = {
font: null,
get font() {
return pdfFonts[pdfFonts.length - 1];
},
set font(font) {
pdfFonts.push(font);
},
clone() {
return this;
},
@ -947,6 +957,7 @@ class PDFDocument {
);
}
await Promise.all(promises);
this.xfaFactory.setFonts(pdfFonts);
}
get formInfo() {

View File

@ -872,6 +872,7 @@ class Font {
this.capHeight = properties.capHeight / PDF_GLYPH_SPACE_UNITS;
this.ascent = properties.ascent / PDF_GLYPH_SPACE_UNITS;
this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS;
this.lineHeight = this.ascent - this.descent;
this.fontMatrix = properties.fontMatrix;
this.bbox = properties.bbox;
this.defaultEncoding = properties.defaultEncoding;
@ -2495,13 +2496,16 @@ class Font {
unitsPerEm: int16(tables.head.data[18], tables.head.data[19]),
yMax: int16(tables.head.data[42], tables.head.data[43]),
yMin: signedInt16(tables.head.data[38], tables.head.data[39]),
ascent: int16(tables.hhea.data[4], tables.hhea.data[5]),
ascent: signedInt16(tables.hhea.data[4], tables.hhea.data[5]),
descent: signedInt16(tables.hhea.data[6], tables.hhea.data[7]),
lineGap: signedInt16(tables.hhea.data[8], tables.hhea.data[9]),
};
// PDF FontDescriptor metrics lie -- using data from actual font.
this.ascent = metricsOverride.ascent / metricsOverride.unitsPerEm;
this.descent = metricsOverride.descent / metricsOverride.unitsPerEm;
this.lineGap = metricsOverride.lineGap / metricsOverride.unitsPerEm;
this.lineHeight = this.ascent - this.descent + this.lineGap;
// The 'post' table has glyphs names.
if (tables.post) {

View File

@ -187,13 +187,8 @@ class WorkerMessageHandler {
await pdfManager.ensureDoc("checkFirstPage");
}
const [numPages, fingerprint, htmlForXfa] = await Promise.all([
pdfManager.ensureDoc("numPages"),
pdfManager.ensureDoc("fingerprint"),
pdfManager.ensureDoc("htmlForXfa"),
]);
if (htmlForXfa) {
const isPureXfa = await pdfManager.ensureDoc("isPureXfa");
if (isPureXfa) {
const task = new WorkerTask("loadXfaFonts");
startWorkerTask(task);
await pdfManager
@ -203,6 +198,17 @@ class WorkerMessageHandler {
})
.then(() => finishWorkerTask(task));
}
const [numPages, fingerprint] = await Promise.all([
pdfManager.ensureDoc("numPages"),
pdfManager.ensureDoc("fingerprint"),
]);
// Get htmlForXfa after numPages to avoid to create HTML twice.
const htmlForXfa = isPureXfa
? await pdfManager.ensureDoc("htmlForXfa")
: null;
return { numPages, fingerprint, htmlForXfa };
}

View File

@ -13,8 +13,9 @@
* limitations under the License.
*/
import { $toHTML } from "./xfa_object.js";
import { $fonts, $toHTML } from "./xfa_object.js";
import { Binder } from "./bind.js";
import { warn } from "../../shared/util.js";
import { XFAParser } from "./parser.js";
class XFAFactory {
@ -22,18 +23,25 @@ class XFAFactory {
try {
this.root = new XFAParser().parse(XFAFactory._createDocument(data));
this.form = new Binder(this.root).bind();
this._createPages();
} catch (e) {
console.log(e);
warn(`XFA - an error occured during parsing and binding: ${e}`);
}
}
isValid() {
return this.root && this.form;
}
_createPages() {
this.pages = this.form[$toHTML]();
this.dims = this.pages.children.map(c => {
const { width, height } = c.attributes.style;
return [0, 0, parseInt(width), parseInt(height)];
});
try {
this.pages = this.form[$toHTML]();
this.dims = this.pages.children.map(c => {
const { width, height } = c.attributes.style;
return [0, 0, parseInt(width), parseInt(height)];
});
} catch (e) {
warn(`XFA - an error occured during layout: ${e}`);
}
}
getBoundingBox(pageIndex) {
@ -41,9 +49,35 @@ class XFAFactory {
}
get numberPages() {
if (!this.pages) {
this._createPages();
}
return this.dims.length;
}
setFonts(fonts) {
this.form[$fonts] = Object.create(null);
for (const font of fonts) {
const cssFontInfo = font.cssFontInfo;
const name = cssFontInfo.fontFamily;
if (!this.form[$fonts][name]) {
this.form[$fonts][name] = Object.create(null);
}
let property = "regular";
if (cssFontInfo.italicAngle !== "0") {
if (parseFloat(cssFontInfo.fontWeight) >= 700) {
property = "bolditalic";
} else {
property = "italic";
}
} else if (parseFloat(cssFontInfo.fontWeight) >= 700) {
property = "bold";
}
this.form[$fonts][name][property] = font;
}
}
getPages() {
if (!this.pages) {
this._createPages();

View File

@ -18,18 +18,14 @@ import {
$getParent,
$getSubformParent,
$nodeName,
$pushGlyphs,
$toStyle,
XFAObject,
} from "./xfa_object.js";
import { getMeasurement } from "./utils.js";
import { TextMeasure } from "./text.js";
import { warn } from "../../shared/util.js";
const wordNonWordRegex = new RegExp(
"([\\p{N}\\p{L}\\p{M}]+)|([^\\p{N}\\p{L}\\p{M}]+)",
"gu"
);
const wordFirstRegex = new RegExp("^[\\p{N}\\p{L}\\p{M}]", "u");
function measureToString(m) {
if (typeof m === "string") {
return "0px";
@ -192,65 +188,15 @@ const converters = {
},
};
function layoutText(text, fontSize, space) {
// Try to guess width and height for the given text in taking into
// account the space where the text should fit.
// The computed dimensions are just an overestimation.
// TODO: base this estimation on real metrics.
let width = 0;
let height = 0;
let totalWidth = 0;
const lineHeight = fontSize * 1.5;
const averageCharSize = fontSize * 0.4;
const maxCharOnLine = Math.floor(space.width / averageCharSize);
const chunks = text.match(wordNonWordRegex);
let treatedChars = 0;
let i = 0;
let chunk = chunks[0];
while (chunk) {
const w = chunk.length * averageCharSize;
if (width + w <= space.width) {
width += w;
treatedChars += chunk.length;
chunk = chunks[i++];
continue;
}
if (!wordFirstRegex.test(chunk) || chunk.length > maxCharOnLine) {
const numOfCharOnLine = Math.floor(
(space.width - width) / averageCharSize
);
chunk = chunk.slice(numOfCharOnLine);
treatedChars += numOfCharOnLine;
if (height + lineHeight > space.height) {
return { width: 0, height: 0, splitPos: treatedChars };
}
totalWidth = Math.max(width, totalWidth);
width = 0;
height += lineHeight;
continue;
}
if (height + lineHeight > space.height) {
return { width: 0, height: 0, splitPos: treatedChars };
}
totalWidth = Math.max(width, totalWidth);
width = w;
height += lineHeight;
chunk = chunks[i++];
function layoutText(text, xfaFont, fonts, width) {
const measure = new TextMeasure(xfaFont, fonts);
if (typeof text === "string") {
measure.addString(text);
} else {
text[$pushGlyphs](measure);
}
if (totalWidth === 0) {
totalWidth = width;
}
if (totalWidth !== 0) {
height += lineHeight;
}
return { width: totalWidth, height, splitPos: -1 };
return measure.compute(width);
}
function computeBbox(node, html, availableSpace) {

View File

@ -23,6 +23,7 @@ import {
$extra,
$finalize,
$flushHTML,
$fonts,
$getAvailableSpace,
$getChildren,
$getContainedChildren,
@ -1522,14 +1523,51 @@ class Draw extends XFAObject {
fixDimensions(this);
if (this.w !== "" && this.h === "" && this.value) {
const text = this.value[$text]();
if (text) {
const { height } = layoutText(text, this.font.size, {
width: this.w,
height: Infinity,
});
this.h = height || "";
if ((this.w === "" || this.h === "") && this.value) {
const maxWidth = this.w === "" ? availableSpace.width : this.w;
const fonts = getRoot(this)[$fonts];
let font = this.font;
if (!font) {
let parent = this[$getParent]();
while (!(parent instanceof Template)) {
if (parent.font) {
font = parent.font;
break;
}
parent = parent[$getParent]();
}
}
let height = null;
let width = null;
if (
this.value.exData &&
this.value.exData[$content] &&
this.value.exData.contentType === "text/html"
) {
const res = layoutText(
this.value.exData[$content],
font,
fonts,
maxWidth
);
width = res.width;
height = res.height;
} else {
const text = this.value[$text]();
if (text) {
const res = layoutText(text, font, fonts, maxWidth);
width = res.width;
height = res.height;
}
}
if (width !== null && this.w === "") {
this.w = width;
}
if (height !== null && this.h === "") {
this.h = height;
}
}
@ -2623,7 +2661,7 @@ class Font extends XFAObject {
]);
this.posture = getStringOption(attributes.posture, ["normal", "italic"]);
this.size = getMeasurement(attributes.size, "10pt");
this.typeface = attributes.typeface || "";
this.typeface = attributes.typeface || "Courier";
this.underline = getInteger({
data: attributes.underline,
defaultValue: 0,
@ -4484,7 +4522,6 @@ class Template extends XFAObject {
children: [],
});
}
this[$extra] = {
overflowNode: null,
pageNumber: 1,

218
src/core/xfa/text.js Normal file
View File

@ -0,0 +1,218 @@
/* Copyright 2021 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
const WIDTH_FACTOR = 1.2;
const HEIGHT_FACTOR = 1.2;
class FontInfo {
constructor(xfaFont, fonts) {
if (!xfaFont) {
[this.pdfFont, this.xfaFont] = this.defaultFont(fonts);
return;
}
this.xfaFont = xfaFont;
let typeface = fonts[xfaFont.typeface];
if (!typeface) {
typeface = fonts[`${xfaFont.typeface}-PdfJS-XFA`];
}
if (!typeface) {
[this.pdfFont, this.xfaFont] = this.defaultFont(fonts);
return;
}
this.pdfFont = null;
if (xfaFont.posture === "italic") {
if (xfaFont.weight === "bold") {
this.pdfFont = typeface.bolditalic;
} else {
this.pdfFont = typeface.italic;
}
} else if (xfaFont.weigth === "bold") {
this.pdfFont = typeface.bold;
} else {
this.pdfFont = typeface.regular;
}
if (!this.pdfFont) {
[this.pdfFont, this.xfaFont] = this.defaultFont(fonts);
}
}
defaultFont(fonts) {
// TODO: Add a default font based on Liberation.
const font =
fonts.Helvetica ||
fonts["Myriad Pro"] ||
fonts.Arial ||
fonts.ArialMT ||
Object.values(fonts)[0];
const pdfFont = font.regular;
const info = this.pdfFont.cssFontInfo;
const xfaFont = {
typeface: info.fontFamily,
posture: "normal",
weight: "normal",
size: 10,
};
return [pdfFont, xfaFont];
}
}
class FontSelector {
constructor(defaultXfaFont, fonts) {
this.fonts = fonts;
this.stack = [new FontInfo(defaultXfaFont, fonts)];
}
pushFont(xfaFont) {
const lastFont = this.stack[this.stack.length - 1];
for (const name of ["typeface", "posture", "weight", "size"]) {
if (!xfaFont[name]) {
xfaFont[name] = lastFont.xfaFont[name];
}
}
const fontInfo = new FontInfo(xfaFont, this.fonts);
if (!fontInfo.pdfFont) {
fontInfo.pdfFont = lastFont.pdfFont;
}
this.stack.push(fontInfo);
}
popFont() {
this.stack.pop();
}
topFont() {
return this.stack[this.stack.length - 1];
}
}
/**
* Compute a text area dimensions based on font metrics.
*/
class TextMeasure {
constructor(defaultXfaFont, fonts) {
this.glyphs = [];
this.fontSelector = new FontSelector(defaultXfaFont, fonts);
}
pushFont(xfaFont) {
return this.fontSelector.pushFont(xfaFont);
}
popFont(xfaFont) {
return this.fontSelector.popFont();
}
addString(str) {
if (!str) {
return;
}
const lastFont = this.fontSelector.topFont();
const pdfFont = lastFont.pdfFont;
const fontSize = lastFont.xfaFont.size;
const lineHeight = Math.round(Math.max(1, pdfFont.lineHeight) * fontSize);
const scale = fontSize / 1000;
for (const line of str.split(/[\u2029\n]/)) {
const encodedLine = pdfFont.encodeString(line).join("");
const glyphs = pdfFont.charsToGlyphs(encodedLine);
for (const glyph of glyphs) {
this.glyphs.push([
glyph.width * scale,
lineHeight,
glyph.unicode === " ",
false,
]);
}
this.glyphs.push([0, 0, false, true]);
}
this.glyphs.pop();
}
compute(maxWidth) {
let lastSpacePos = -1,
lastSpaceWidth = 0,
width = 0,
height = 0,
currentLineWidth = 0,
currentLineHeight = 0;
for (let i = 0, ii = this.glyphs.length; i < ii; i++) {
const [glyphWidth, glyphHeight, isSpace, isEOL] = this.glyphs[i];
if (isEOL) {
width = Math.max(width, currentLineWidth);
currentLineWidth = 0;
height += currentLineHeight;
currentLineHeight = glyphHeight;
lastSpacePos = -1;
lastSpaceWidth = 0;
continue;
}
if (isSpace) {
if (currentLineWidth + glyphWidth > maxWidth) {
// We can break here but the space is not taken into account.
width = Math.max(width, currentLineWidth);
currentLineWidth = 0;
height += currentLineHeight;
currentLineHeight = glyphHeight;
lastSpacePos = -1;
lastSpaceWidth = 0;
} else {
currentLineHeight = Math.max(glyphHeight, currentLineHeight);
lastSpaceWidth = currentLineWidth;
currentLineWidth += glyphWidth;
lastSpacePos = i;
}
continue;
}
if (currentLineWidth + glyphWidth > maxWidth) {
// We must break to the last white position (if available)
height += currentLineHeight;
currentLineHeight = glyphHeight;
if (lastSpacePos !== -1) {
i = lastSpacePos;
width = Math.max(width, lastSpaceWidth);
currentLineWidth = 0;
lastSpacePos = -1;
lastSpaceWidth = 0;
} else {
// Just break in the middle of the word
width = Math.max(width, currentLineWidth);
currentLineWidth = glyphWidth;
}
continue;
}
currentLineWidth += glyphWidth;
currentLineHeight = Math.max(glyphHeight, currentLineHeight);
}
width = Math.max(width, currentLineWidth);
height += currentLineHeight;
return { width: WIDTH_FACTOR * width, height: HEIGHT_FACTOR * height };
}
}
export { TextMeasure };

View File

@ -34,6 +34,7 @@ const $dump = Symbol();
const $extra = Symbol("extra");
const $finalize = Symbol();
const $flushHTML = Symbol();
const $fonts = Symbol();
const $getAttributeIt = Symbol();
const $getAvailableSpace = Symbol();
const $getChildrenByClass = Symbol();
@ -46,6 +47,7 @@ const $getContainedChildren = Symbol();
const $getNextPage = Symbol();
const $getSubformParent = Symbol();
const $getParent = Symbol();
const $pushGlyphs = Symbol();
const $global = Symbol();
const $hasItem = Symbol();
const $hasSettableValue = Symbol();
@ -970,6 +972,7 @@ export {
$extra,
$finalize,
$flushHTML,
$fonts,
$getAttributeIt,
$getAvailableSpace,
$getChildren,
@ -998,6 +1001,7 @@ export {
$onChild,
$onChildCheck,
$onText,
$pushGlyphs,
$removeChild,
$resolvePrototypes,
$root,

View File

@ -18,8 +18,10 @@ import {
$childrenToHTML,
$content,
$extra,
$getChildren,
$nodeName,
$onText,
$pushGlyphs,
$text,
$toHTML,
XmlObject,
@ -167,6 +169,39 @@ class XhtmlObject extends XmlObject {
}
}
[$pushGlyphs](measure) {
const xfaFont = Object.create(null);
for (const [key, value] of this.style
.split(";")
.map(s => s.split(":", 2))) {
if (!key.startsWith("font-")) {
continue;
}
if (key === "font-family") {
xfaFont.typeface = value;
} else if (key === "font-size") {
xfaFont.size = getMeasurement(value);
} else if (key === "font-weight") {
xfaFont.weight = value;
} else if (key === "font-style") {
xfaFont.posture = value;
}
}
measure.pushFont(xfaFont);
if (this[$content]) {
measure.addString(this[$content]);
} else {
for (const child of this[$getChildren]()) {
if (child[$nodeName] === "#text") {
measure.addString(child[$content]);
continue;
}
child[$pushGlyphs](measure);
}
}
measure.popFont();
}
[$toHTML](availableSpace) {
const children = [];
this[$extra] = {
@ -202,6 +237,12 @@ class B extends XhtmlObject {
constructor(attributes) {
super(attributes, "b");
}
[$pushGlyphs](measure) {
measure.pushFont({ weight: "bold" });
super[$pushGlyphs](measure);
measure.popFont();
}
}
class Body extends XhtmlObject {
@ -230,6 +271,10 @@ class Br extends XhtmlObject {
return "\n";
}
[$pushGlyphs](measure) {
measure.addString("\n");
}
[$toHTML](availableSpace) {
return HTMLResult.success({
name: "br",
@ -282,6 +327,12 @@ class I extends XhtmlObject {
constructor(attributes) {
super(attributes, "i");
}
[$pushGlyphs](measure) {
measure.pushFont({ posture: "italic" });
super[$pushGlyphs](measure);
measure.popFont();
}
}
class Li extends XhtmlObject {
@ -301,6 +352,11 @@ class P extends XhtmlObject {
super(attributes, "p");
}
[$pushGlyphs](measure) {
super[$pushGlyphs](measure);
measure.addString("\n");
}
[$text]() {
return super[$text]() + "\n";
}

View File

@ -0,0 +1 @@
https://github.com/mozilla/pdf.js/files/6602628/Acrobat.pdf

View File

@ -5341,5 +5341,13 @@
"type": "eq",
"forms": true,
"lastPage": 1
},
{ "id": "xfa_issue13500",
"file": "pdfs/xfa_issue13500.pdf",
"md5": "b81274a19f5a95c1466db3648f1be491",
"link": true,
"rounds": 1,
"enableXfa": true,
"type": "eq"
}
]

View File

@ -188,7 +188,7 @@ describe("XFAFactory", function () {
</pageArea>
</pageSet>
<subform name="first">
<draw><value><text>foo</text></value></draw>
<draw w="1pt" h="1pt"><value><text>foo</text></value></draw>
</subform>
</subform>
</template>