From 7839e7b495e174a34b9885dfab76c92fd5b44741 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Thu, 21 Dec 2023 22:57:58 +0100 Subject: [PATCH] Preserve the whitespaces when getting text from FreeText annotations (bug 1871353) When the text of an annotation is extracted in using getTextContent, consecutive white spaces are just replaced by one space and. So this patch add an option to make sure that white spaces are preserved when appearance is parsed. For the case where there's no appearance, we can have a fast path to get the correct string from the Content entry. When an existing FreeText is edited, space (0x20) are replaced by non-breakable (0xa0) ones to make to see all of them on screen. --- src/core/annotation.js | 76 ++++++++++++++-------- src/core/default_appearance.js | 20 ++++++ src/core/evaluator.js | 7 +- src/display/editor/freetext.js | 13 +++- test/integration/freetext_editor_spec.mjs | 70 +++++++++++++++++++- test/pdfs/.gitignore | 2 + test/pdfs/bug1871353.1.pdf | Bin 0 -> 11786 bytes test/pdfs/bug1871353.pdf | Bin 0 -> 5605 bytes 8 files changed, 152 insertions(+), 36 deletions(-) create mode 100644 test/pdfs/bug1871353.1.pdf create mode 100644 test/pdfs/bug1871353.pdf diff --git a/src/core/annotation.js b/src/core/annotation.js index d4c28cf3a..6e1bc1147 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -1207,6 +1207,7 @@ class Annotation { task, resources, includeMarkedContent: true, + keepWhiteSpace: true, sink, viewBox, }); @@ -1218,20 +1219,26 @@ class Annotation { if (text.length > 1 || text[0]) { const appearanceDict = this.appearance.dict; - const bbox = appearanceDict.getArray("BBox") || [0, 0, 1, 1]; - const matrix = appearanceDict.getArray("Matrix") || [1, 0, 0, 1, 0, 0]; - const rect = this.data.rect; - const transform = getTransformMatrix(rect, bbox, matrix); - transform[4] -= rect[0]; - transform[5] -= rect[1]; - firstPosition = Util.applyTransform(firstPosition, transform); - firstPosition = Util.applyTransform(firstPosition, matrix); - - this.data.textPosition = firstPosition; + this.data.textPosition = this._transformPoint( + firstPosition, + appearanceDict.getArray("BBox"), + appearanceDict.getArray("Matrix") + ); this.data.textContent = text; } } + _transformPoint(coords, bbox, matrix) { + const { rect } = this.data; + bbox ||= [0, 0, 1, 1]; + matrix ||= [1, 0, 0, 1, 0, 0]; + const transform = getTransformMatrix(rect, bbox, matrix); + transform[4] -= rect[0]; + transform[5] -= rect[1]; + coords = Util.applyTransform(coords, transform); + return Util.applyTransform(coords, matrix); + } + /** * Get field data for usage in JS sandbox. * @@ -3767,7 +3774,9 @@ class FreeTextAnnotation extends MarkupAnnotation { const { evaluatorOptions, xref } = params; this.data.annotationType = AnnotationType.FREETEXT; this.setDefaultAppearance(params); - if (this.appearance) { + this._hasAppearance = !!this.appearance; + + if (this._hasAppearance) { const { fontColor, fontSize } = parseAppearanceStream( this.appearance, evaluatorOptions, @@ -3775,29 +3784,40 @@ class FreeTextAnnotation extends MarkupAnnotation { ); this.data.defaultAppearanceData.fontColor = fontColor; this.data.defaultAppearanceData.fontSize = fontSize || 10; - } else if (this._isOffscreenCanvasSupported) { - const strokeAlpha = params.dict.get("CA"); - const fakeUnicodeFont = new FakeUnicodeFont(xref, "sans-serif"); + } else { this.data.defaultAppearanceData.fontSize ||= 10; const { fontColor, fontSize } = this.data.defaultAppearanceData; - this.appearance = fakeUnicodeFont.createAppearance( - this._contents.str, - this.rectangle, - this.rotation, - fontSize, - fontColor, - strokeAlpha - ); - this._streams.push(this.appearance, FakeUnicodeFont.toUnicodeStream); - } else { - warn( - "FreeTextAnnotation: OffscreenCanvas is not supported, annotation may not render correctly." - ); + if (this._contents.str) { + this.data.textContent = this._contents.str.split(/\r\n?|\n/); + const { coords, bbox, matrix } = FakeUnicodeFont.getFirstPositionInfo( + this.rectangle, + this.rotation, + fontSize + ); + this.data.textPosition = this._transformPoint(coords, bbox, matrix); + } + if (this._isOffscreenCanvasSupported) { + const strokeAlpha = params.dict.get("CA"); + const fakeUnicodeFont = new FakeUnicodeFont(xref, "sans-serif"); + this.appearance = fakeUnicodeFont.createAppearance( + this._contents.str, + this.rectangle, + this.rotation, + fontSize, + fontColor, + strokeAlpha + ); + this._streams.push(this.appearance, FakeUnicodeFont.toUnicodeStream); + } else { + warn( + "FreeTextAnnotation: OffscreenCanvas is not supported, annotation may not render correctly." + ); + } } } get hasTextContent() { - return !!this.appearance; + return this._hasAppearance; } static createNewDict(annotation, xref, { apRef, ap }) { diff --git a/src/core/default_appearance.js b/src/core/default_appearance.js index fc1635104..c01ea5987 100644 --- a/src/core/default_appearance.js +++ b/src/core/default_appearance.js @@ -390,6 +390,26 @@ endcmap CMapName currentdict /CMap defineresource pop end end`; return this.resources; } + static getFirstPositionInfo(rect, rotation, fontSize) { + // Get the position of the first char in the rect. + const [x1, y1, x2, y2] = rect; + let w = x2 - x1; + let h = y2 - y1; + + if (rotation % 180 !== 0) { + [w, h] = [h, w]; + } + const lineHeight = LINE_FACTOR * fontSize; + const lineDescent = LINE_DESCENT_FACTOR * fontSize; + + return { + coords: [0, h + lineDescent - lineHeight], + bbox: [0, 0, w, h], + matrix: + rotation !== 0 ? getRotationMatrix(rotation, h, lineHeight) : undefined, + }; + } + createAppearance(text, rect, rotation, fontSize, bgColor, strokeAlpha) { const ctx = this._createContext(); const lines = []; diff --git a/src/core/evaluator.js b/src/core/evaluator.js index 1a0ac5806..f5646b362 100644 --- a/src/core/evaluator.js +++ b/src/core/evaluator.js @@ -2281,6 +2281,7 @@ class PartialEvaluator { viewBox, markedContentData = null, disableNormalization = false, + keepWhiteSpace = false, }) { // Ensure that `resources`/`stateManager` is correctly initialized, // even if the provided parameter is e.g. `null`. @@ -2347,11 +2348,12 @@ class PartialEvaluator { twoLastChars[twoLastCharsPos] = char; twoLastCharsPos = nextPos; - return ret; + return !keepWhiteSpace && ret; } function shouldAddWhitepsace() { return ( + !keepWhiteSpace && twoLastChars[twoLastCharsPos] !== " " && twoLastChars[(twoLastCharsPos + 1) % 2] === " " ); @@ -2836,7 +2838,7 @@ class PartialEvaluator { } let scaledDim = glyphWidth * scale; - if (category.isWhitespace) { + if (!keepWhiteSpace && category.isWhitespace) { // Don't push a " " in the textContentItem // (except when it's between two non-spaces chars), // it will be done (if required) in next call to @@ -3272,6 +3274,7 @@ class PartialEvaluator { viewBox, markedContentData, disableNormalization, + keepWhiteSpace, }) .then(function () { if (!sinkWrapper.enqueueInvoked) { diff --git a/src/display/editor/freetext.js b/src/display/editor/freetext.js index 8a91f7d55..adc0e3d50 100644 --- a/src/display/editor/freetext.js +++ b/src/display/editor/freetext.js @@ -648,6 +648,14 @@ class FreeTextEditor extends AnnotationEditor { } } + #serializeContent() { + return this.#content.replaceAll("\xa0", " "); + } + + static #deserializeContent(content) { + return content.replaceAll(" ", "\xa0"); + } + /** @inheritdoc */ get contentDiv() { return this.editorDiv; @@ -690,10 +698,9 @@ class FreeTextEditor extends AnnotationEditor { }; } const editor = super.deserialize(data, parent, uiManager); - editor.#fontSize = data.fontSize; editor.#color = Util.makeHexColor(...data.color); - editor.#content = data.value; + editor.#content = FreeTextEditor.#deserializeContent(data.value); editor.annotationElementId = data.id || null; editor.#initialData = initialData; @@ -726,7 +733,7 @@ class FreeTextEditor extends AnnotationEditor { annotationType: AnnotationEditorType.FREETEXT, color, fontSize: this.#fontSize, - value: this.#content, + value: this.#serializeContent(), pageIndex: this.pageIndex, rect, rotation: this.rotation, diff --git a/test/integration/freetext_editor_spec.mjs b/test/integration/freetext_editor_spec.mjs index 5aed27bfa..74cb5134d 100644 --- a/test/integration/freetext_editor_spec.mjs +++ b/test/integration/freetext_editor_spec.mjs @@ -209,11 +209,11 @@ describe("FreeText Editor", () => { await waitForStorageEntries(page, 2); const content = await page.$eval(getEditorSelector(0), el => - el.innerText.trimEnd() + el.innerText.trimEnd().replaceAll("\xa0", " ") ); let pastedContent = await page.$eval(getEditorSelector(1), el => - el.innerText.trimEnd() + el.innerText.trimEnd().replaceAll("\xa0", " ") ); expect(pastedContent).withContext(`In ${browserName}`).toEqual(content); @@ -225,7 +225,7 @@ describe("FreeText Editor", () => { await waitForStorageEntries(page, 3); pastedContent = await page.$eval(getEditorSelector(2), el => - el.innerText.trimEnd() + el.innerText.trimEnd().replaceAll("\xa0", " ") ); expect(pastedContent).withContext(`In ${browserName}`).toEqual(content); } @@ -3182,4 +3182,68 @@ describe("FreeText Editor", () => { ); }); }); + + describe("Consecutive white spaces in Freetext without appearance", () => { + let pages; + + beforeAll(async () => { + pages = await loadAndWait("bug1871353.pdf", ".annotationEditorLayer"); + }); + + afterAll(async () => { + await closePages(pages); + }); + + it("must check that consecutive white spaces are preserved when a freetext is edited", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + await switchToFreeText(page); + await page.click(getEditorSelector(0), { count: 2 }); + await page.type(`${getEditorSelector(0)} .internal`, "C"); + + await page.click("#editorFreeText"); + await page.waitForSelector( + `.annotationEditorLayer:not(.freetextEditing)` + ); + + const [value] = await getSerialized(page, x => x.value); + expect(value) + .withContext(`In ${browserName}`) + .toEqual("CA B"); + }) + ); + }); + }); + + describe("Consecutive white spaces in Freetext with appearance", () => { + let pages; + + beforeAll(async () => { + pages = await loadAndWait("bug1871353.1.pdf", ".annotationEditorLayer"); + }); + + afterAll(async () => { + await closePages(pages); + }); + + it("must check that consecutive white spaces are preserved when a freetext is edited", async () => { + await Promise.all( + pages.map(async ([browserName, page]) => { + await switchToFreeText(page); + await page.click(getEditorSelector(0), { count: 2 }); + await page.type(`${getEditorSelector(0)} .internal`, "Z"); + + await page.click("#editorFreeText"); + await page.waitForSelector( + `.annotationEditorLayer:not(.freetextEditing)` + ); + + const [value] = await getSerialized(page, x => x.value); + expect(value) + .withContext(`In ${browserName}`) + .toEqual("ZX Y"); + }) + ); + }); + }); }); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 4d3eefaad..1387f44ba 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -621,3 +621,5 @@ !bug1863910.pdf !bug1865341.pdf !bug1872721.pdf +!bug1871353.pdf +!bug1871353.1.pdf diff --git a/test/pdfs/bug1871353.1.pdf b/test/pdfs/bug1871353.1.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f719d0f7b5a3cc76a87a1d107169ab0baf95ef9b GIT binary patch literal 11786 zcmeHNeT*B$6;DVAsgpxNLKKjyE{4FRi9IvB>$TV18GL6Sx%8Yn?i`YEL}Wk4zRj(7 z&F(sPmr^xAX^8}sqP7TuDvE%DT11k5scOnsp%BVf(zF#S1w?5jC@Q6bhL2MEX4hVC zF6QGTX#Y9u?DNjN_vX!S-g};z_g0P+1`;GGb*+5y&rkifD@F*1Ia}S<)zgE942vp` zv4iRyK|`D<^B4_tL_kF@Es1D7w3azhB5Aai8|OrUKp)wX%XLvpXCkzil>Zmdgof#G$&OdA7q5K9@rShUrlU4Va2Y0~Rz`81E4=7E;21oG4fT%ekHbNf2atfZ+Z z4iitY?-iRP81x%OL)5Bz6P$pVGTYfiGicPdJQtOIMJg2JqimmF_QB~dg4@G_J@HM+GJ8&v@|AG_nG5fk=fK-#TiJ9vujyLVl zN>>bb#a6B?4i9w2S~vo9sWx<-FyYeamfm6-7E2;%j1#0I6xzCV>*8CVU(Ss@vhy>W zPR?96ef7xljmuub?_CyqZqe%M3h}ak-m`G=$qV-V>DVP5H1ucRw2%_@6kYKYHfum^ zxZ#;#GcG6jk0VMIzRUi1AEbUDHSKEKLWt0wPQPQ<`|Rm)oCNscEfF3*(=vQ+K_##T zy z)oo*5Eeb0?hDY#5yr;Z-WVd+1?mY*loN`pudzBp0qbrF-$q@1F7Cji-J4 z*t_2O&+RMa8)xo%c>DHqyRJR=ZtljN58Yb6Wxa6K-p8-MxqQvehu$b(x%cryx4uy> z?Y{7aiGFSOr3L-ON7XN$e&xP{H{ZPJy{%t9{O{{7J^SpP3l=SS_E%>e=&(*~Nm`EF zv}7J*G}cf(-(X8a!Bkpn*q%(!In#ATo1`A6(yC$g#$WrzeR0mvd*d5ZC0wfa(+Okk z4u_8ISYOt5OlqNT(kZn|b#Zg#piu0R~1S@_PHQJnKhyvg2po^^1W zN+Vpq?NClml7xmaf#pu}3f42ayoTd3FCZ)+A|?pQ5;-eMEk}swm{0ULdL=tr7-)_M zPrdO8&#PwRPs9J2l12DfPN(XbQ6x1Yq`{c2|Vn zU%70-)*5Vu2Mf7I!_c!8wUW_QMM~&|Y63jUevmnVI}g`9TysKTPQ>%pH4oRE z5SSD3ymftIaK+k(4LAY!#;4#Y(cv%?IVXPLQ7t^FiE?l^!HzjcAJq1i@kr!&cKFGm zRo6=5lGu`mU-^3Zq}yIO?uvxjKQa7(dh0IoY~hyYzIE`W@{8B}_(zMD z;sq=AX`BFyw-(JcbbrB;@`&{j%VgGY9TCXZg_bs}Jv7 z{MzOfKlp6nqDAZWy?eY)Fn)099bY;3@^3DL3z?^u9{0HputY9o2$8}onfcT8{OP*$ zsr+M~t_h#ug)-r@>`z*a$^Y$i{UMsnpRWB=yQZfyVmebvROAdz2#T0VC>c1!OQNbn z93faxl=;*3oOT6sBc9)K%?Yo0xaQ%S69RK0p0}=fxaNewoQUVG>l1^k5Br|D&lhRT$ zEz6%)?w6&go9WR$#iawb<)O9I+)h2n?4Y6rWeJ8=<@g}8yjE2w%_ftlk5pp}sLlxU%n1 zgkgCsCE2~HZHJpzAwMey884P->n>zjf{^g+P2dHmm$NWF3uc$^#>tEfOPnNv0E#7f zl1g(qtjkOF^#H~HE9JJG~2YD z?CHf644eim2RIRxW+{`(gvzWhbyG>NNODR_V1z(nuuZlb+%7FMUCo&m$g_f(+rw?X+g_HUCp+NQZvk%Y^MckF*(%J zCYplcxt=xy-Rmb2x>b5Z;1L5v~SgX-{RVCPn$QL1@Rmy=3nQYi=z znIxYQ!4;rd$qx?zL`vMk9CBNC87zf^`jAzje-1d5vQ|{?*1&|I$G<4@V^G^!eUoo` z#70(j66uOmLiMrIlaJJd#Z5K{tCk8XE_i}bN%0&5xSM4yth9xFhnj|9Vu|L=w!nPD z%9Re@4?YUnS#6AZ<1A(%HaWQJYEKwOZ(fECtJ?s&_B9yuNC>5H!{<9C)LHm$DS?}g zo)^R16qJ4W%8(Fd5uQl~;O04p#;n*SF$M>y?tf~vkK)(}JP_~TeI9U5#rn#zNC5D= zS^!9}qH;>$%6cpsa#nHWZ81SiiqHi?13EYRO~rET{1}U9M;1=`fx5Yp1*1}-P!a@1 zK;yiKR4Ct~NK+h0*K39Y`S^-Sr;!e&L7E1U4ox(ys^TIyY1CcAMYN_X?gRpT z*}n?JFT+9=N74N1foj7reX%UQq43FJ9*Sy))o@XrI^KlcfbggXYJhkZezC#_pVe0o zdIC=WsofrF^au0VJx9?Y6giWqVt`&~$TgXU!$mX}K($#n07N#23xfs#%%rYsATuDf zDRP6J9sWP?O^k{SD6Jq+9;h`OwgfZqAi)yYlhjfbDCd(hC={&O^_f698)^n9Oc=_? zVC%snC=-LmJgBJ~hSF8myiTcrvW>k_Ob~!79Vo z3B{}gBQzUfTO;!*?;8fn2czX9#+LU@3FX77=Y0dq7g2v}_+r3Z448`%=3>Y-7|;v` zG=mYE!H}ltO`!F`aKi!la6mpBAs=oMgj}_T=^1r%1`P*`yCGoR5U_5DuxHV2*M z3EQy{*nbUdR5e|s1iVVntweaCW(GV8DWM3cF9knSz&%?qz?y<{AsR5#2s12G(;$t2 z#t3MP2#pcaP=LV-hO+~5J0Q0s2hQ)GGzR2C9`@D9I^nJ-tur2uUw&xvd!K({C6T9J?pyo*eJ7#b?ro-vPX`;f}V2f+GERa{`{c%*E>%;u#a}#`?Rq7?q9!o z!|oe?a^{}I&yKnGUyZ|0zjgZFOTY9`>V>hMdtd+M?eFh5-hmudmy1U`#Q4ajy>pK0 zlPeq+3q^@1efj=$swk3_knc;AK2hw;r+8WHOQ*xaVIF=Hp<)<5csL}%PMwgBb;J-G z{W@aEA{|{u%%_jaW6drjKJlkp)l1*K?&bR)oIG>M*G}fX`@lo@pM30XH|^YX{jSS4 uFMafeB`>}?wETywcd8Gb`|7fno?i$s=8s$Y_R%o5Q(A%1BTZ5m?|(!*UKp|Rh4cjvRubKE+1tR}>bml#qA6*=EMCtmG4 z>wHe!YKRmuv7&X_CJ+?$kE(*gG{MrsUl0j4RzX9%DkXprrJ%8Y*gr*RC_q`u_xa~F zbrF#me(-`9^HJG%NP`iblPYe=fi`rVFhRj!Gj1senX**7XA#cWkqZIx#Yh>ch50@O?y$+g$C zIt$NA?YRb097GG|Y`a3J*iI*InWZ7KI!*ilKm5v6fHA1Rco3(c8q~Y4LisSmZg`qt zWwT;w4C)CAXTr3hWDu$E5|7B?z3sdMmdU(ThP{@Wcv2w*B`?+ujc1>nw2Mwyg-Mbo zQ{-%BDZM0pa_Q{q!W;O~?kC9F!Y>9s=I_M^W5I=q7#d+pU#}xFFteb;)^FU#}{9JC-d~Vi)*Lf$)uJ({k6Hcw)9j&f9U7g zkMDYV`O3+Y2i~3e-1@(sf9mna7hT=1t3UndYKMK|ThctpjY<{>!IPD&T`SnsSUr{I z9=4N-?5UPi?GUq3md)mkLGSe+{n(50`k;3zkRqv4oXzF;9kJNtk^LF%$RSP9y?dhW zNHtt7mWogjs9G!-<#2V-tHM3N!tl(eu@}`u>_daz7+*jKQfU-7Ert|7^=X7q*lxd| z53`)ab0mNCo_>PDJ&60EFQ`I4PC4aDuLS+imfdcCc~$9*>vE0$W0 zjIzI$q+c_OxT=;g^%LAlN!MCBrHWOkg{Etm6<85!w+wo;qaI+IO0`rwx+@ktYL*;$ z+@=Q$;7-IX05}HVC!8|%eC}`p4osAT?<2C0${C`BMH-gG-9!iz62TiJ+W^&U#NpzD zJG!F{L2=q}NN%vvEXmGRHbm06f@-oG?9@D%R>m|7(R#(jG|SAYHcDG2>XRj3HgESM zLG)9BWJx1B(*shrY1g6aunU=l0T;cZG3&pD>o%Q+tqA7l$8nzOhqX2f zrxk7r+`1}iOZ;TLr(f+P-$OTBPBsPZ7F=6!Z3=-+32#-`7F?S`U{k_d)%8Dv%iV4^ zpaUNCEQS2_gapfBzkOrYI1(*G2VAf@Z!_(xL%7cO2dj|`0bs&rO> z|G#A-O5&0vOXt>PG7%){-+b{qEBH!j?85lyCrV@fmGRMmCwBC$Og*zo#MRWwz_HZ~ z5q>SMi2{*=t843*#$U~zK1Q!5PF?%TmA5i)9{<7jyLS**&js??4p}*vh0dgm^#3Jg z?jT~MPV@^NuJrWb8P3c>xHv#_kfBs=Q8G>D!X1eq+@LPdG$)RLx8SVBLDV8GHqJZg3WQsH z1>%GSyf+vNRLcxu&RHNFGYr!%qiGrh7ZZ>or;eb#cT>)_@CP2qkXmL*6Q~fpj}_*b z4NXeZOnti1{nsZc!ovp8Z+T-D;im?=mab_0d=Gla->wJ@e}}>vwRAG zQcsdIdR)0h_#r1d$X|dqpDEJ*opy|H7-IM?L-uZ9r`|_hNuH0A- z+~1G-x^@lE9((!H^_y3J`1SdW(9;jHQ-BGOXuN6~w4)G!VyM@#+vV!Ib*t;a5d3}c dRTv)f^5JhTSDQa