From 06601fd90c0e9b74d1f7ae18517f19680b93772d Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Sun, 21 Jan 2024 23:00:43 +0100 Subject: [PATCH] Print correctly documents containing chars with an unicode greater than 0xFFFF (bug 1669097) --- src/core/annotation.js | 2 +- src/core/core_utils.js | 12 +++++++++++ src/core/default_appearance.js | 38 +++++---------------------------- test/pdfs/.gitignore | 1 + test/pdfs/bug1669097.pdf | Bin 0 -> 6497 bytes test/test_manifest.json | 25 ++++++++++++++++++++++ 6 files changed, 44 insertions(+), 34 deletions(-) create mode 100755 test/pdfs/bug1669097.pdf diff --git a/src/core/annotation.js b/src/core/annotation.js index e795f8572..9d0baf1d7 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -3826,7 +3826,7 @@ class FreeTextAnnotation extends MarkupAnnotation { fontColor, strokeAlpha ); - this._streams.push(this.appearance, FakeUnicodeFont.toUnicodeStream); + this._streams.push(this.appearance); } else { warn( "FreeTextAnnotation: OffscreenCanvas is not supported, annotation may not render correctly." diff --git a/src/core/core_utils.js b/src/core/core_utils.js index 4b8fa637e..a9bd298c7 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -386,6 +386,17 @@ const XMLEntities = { /* ' */ 0x27: "'", }; +function* codePointIter(str) { + for (let i = 0, ii = str.length; i < ii; i++) { + const char = str.codePointAt(i); + if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) { + // char is represented by two u16 + i++; + } + yield char; + } +} + function encodeToXmlString(str) { const buffer = []; let start = 0; @@ -602,6 +613,7 @@ function getRotationMatrix(rotation, width, height) { export { arrayBuffersToBytes, + codePointIter, collectActions, encodeToXmlString, escapePDFName, diff --git a/src/core/default_appearance.js b/src/core/default_appearance.js index c01ea5987..cb84b1a7e 100644 --- a/src/core/default_appearance.js +++ b/src/core/default_appearance.js @@ -13,13 +13,14 @@ * limitations under the License. */ -import { Dict, Name } from "./primitives.js"; import { + codePointIter, escapePDFName, getRotationMatrix, numberToString, stringToUTF16HexString, } from "./core_utils.js"; +import { Dict, Name } from "./primitives.js"; import { LINE_DESCENT_FACTOR, LINE_FACTOR, @@ -251,35 +252,6 @@ class FakeUnicodeFont { ); } - get toUnicodeRef() { - if (!FakeUnicodeFont._toUnicodeRef) { - const toUnicode = `/CIDInit /ProcSet findresource begin -12 dict begin -begincmap -/CIDSystemInfo -<< /Registry (Adobe) -/Ordering (UCS) /Supplement 0 >> def -/CMapName /Adobe-Identity-UCS def -/CMapType 2 def -1 begincodespacerange -<0000> -endcodespacerange -1 beginbfrange -<0000> <0000> -endbfrange -endcmap CMapName currentdict /CMap defineresource pop end end`; - const toUnicodeStream = (FakeUnicodeFont.toUnicodeStream = - new StringStream(toUnicode)); - const toUnicodeDict = new Dict(this.xref); - toUnicodeStream.dict = toUnicodeDict; - toUnicodeDict.set("Length", toUnicode.length); - FakeUnicodeFont._toUnicodeRef = - this.xref.getNewPersistentRef(toUnicodeStream); - } - - return FakeUnicodeFont._toUnicodeRef; - } - get fontDescriptorRef() { if (!FakeUnicodeFont._fontDescriptorRef) { const fontDescriptor = new Dict(this.xref); @@ -350,7 +322,7 @@ endcmap CMapName currentdict /CMap defineresource pop end end`; baseFont.set("Subtype", Name.get("Type0")); baseFont.set("Encoding", Name.get("Identity-H")); baseFont.set("DescendantFonts", [this.descendantFontRef]); - baseFont.set("ToUnicode", this.toUnicodeRef); + baseFont.set("ToUnicode", Name.get("Identity-H")); return this.xref.getNewPersistentRef(baseFont); } @@ -420,8 +392,8 @@ endcmap CMapName currentdict /CMap defineresource pop end end`; // languages, like arabic, it'd be wrong because of ligatures. const lineWidth = ctx.measureText(line).width; maxWidth = Math.max(maxWidth, lineWidth); - for (const char of line.split("")) { - const code = char.charCodeAt(0); + for (const code of codePointIter(line)) { + const char = String.fromCodePoint(code); let width = this.widths.get(code); if (width === undefined) { const metrics = ctx.measureText(char); diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 2404660d8..8f0bbcad0 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -626,3 +626,4 @@ !file_pdfjs_form.pdf !issue17492.pdf !issue17540.pdf +!bug1669097.pdf diff --git a/test/pdfs/bug1669097.pdf b/test/pdfs/bug1669097.pdf new file mode 100755 index 0000000000000000000000000000000000000000..07b9ef9c4530ff1ce86f7fea3f2120d88b8d1f0a GIT binary patch literal 6497 zcmeHLdsGuw8V5n3X{d^=idG$CQCvypkx3?r5P^i0R{)WUieQq-1R_Z$P6mmJiceHr zW!2UfwU)Nn(pKecb&o5p;;RouQ9BByPmT>Xa6wgWX{}~ z@AChv zc@l~dat($iNM!_}j8&^8cp|DoB?&m1lz>YU=8 z0R#?~q4d^pUeQ^D!Ps4=Gw2ovM)2$7=S+@TC<_!EEZvY+MfG)ND_#c-q3Vh?hk@A@RUH0+$3;5O2x^BJ7PcGr} zKYJO%_w-+wv8`{|dp{0YeHaf+uE&st74dK9hlU{m?{p7y_{qWrn}=HoBtvUx#tLDW z3rj7<>Wz9<4~bpF%o5fD*yn%<*Dy<82<}n$I~Uw4AZwuuy<9^%$qwT-8_hZ(%q_2? zZ7gNu#)!+9ilOa^v@=gmh@_ZQ3P}+{MBq4tO9_#Tz##&ciR6SNAJzg2>N!b71T?{4 zielhO$iX!q2Al;}JA%rl9CQgoQh>#PuGusThzcQYYH-o}1S<0en&@jCm2f^VmvIgn zE>CXZxmk$GV6C2I%z&w$XgFz~`+V3Z2EbUF98Ix#Fj(J!j_~Lb1Iy8rGwdO3reP9A zvfk;R%tN3zd58#*3H(u94uOTrP&@~36wlvm%Ds8`A)Us|H~>2G&iGLY*f7Ly-@x@~ z*6MX#bT&3aTe&-)|7f`URGK)l_){pTwLYve?USH6`{r&VhkcV{pK#|KJ*icdcWtX29UkkCZ(R9IZYLH{ZE&_tCsZ6}epwLc0%cn0o$) zij$Spu*6sTF1~?;rWQ2L|GX?keIZ?4wJ=4tl&rcreO1cNf%fSmA6#j@xasqL*N!Y% zGQ*h`xORZ{-D?fmkGt>#%=zgv{h;RJpogZOnMJ%f_)+lLD}NhhyaQzvY)8*P8Oueo zwqzmoIRS}e*M9wGTzPo3A)>2l#@`OsT#VSTa&}_XSDAw9$n0@dbw5uWzkJ1-RpTv% zp)KdbT6oJ}N{&hG+J;v|ABg!jr8O)$J@c*T1L?xl%+}yfOBPAzeevoU3@TgMWIK)Uzn-`+6WO0~ zq&B_>louDXt2v-}dQnCHqD9;0e>bLXndRiMWxaZy*qDF4>fz($#fAx@#Yv;y+1FGT zy(($@_}5BH+arT!l&I#>#$~VmIq~C=%U?vE8nrcg=B}W-EBCaoZgVQS$KL$lU}m3_ zO{Vo%U+dNscS*7zx<35uiO1B>+GAB46Gq+N-Z1CXvA@K>Um6sB>8-YYX|v*YEKKdO z>gv(vn#?cvz1q0dwt;A$5K&XVc5$n)VQBARTMlGTo*w@B*Mo0PJ)SkRc-Wl%+oER8 z{P9$mamBAR25%@Jua+-(IO(%Xdz+m(N9-f4m~`XTmc88$tlL)EylzleS=F+4(*i?< zR{Jk&E5wnJU-n5fUE4F#@xz_SWdkJtx_WK{dZu?-w<*`{6B|ER(w5Y3z(wbR+{x{` zV&?01EpF)%yy#)m=9-3r__rG_4b0uJeBkEud-qEw&Yu3k`f3)G zq}&qe+(e{p*>w3I^>ZuR7Y$t7x4mWUu8Kk7adl_9Z(SeuinQyRw;tX|yf{1Z%AhH) zuZzhU7IEj3=o7=gQN|U_I8>Y5ueitkvqR(V9V4^8{b9lJYv}#;&0_T5*PEr;7q0V@ zrtX+$E#QBjy7h+TqhjSv97lzuW?n{gVgh>=O=~a_T1XDDFffJ#zdIwzHgkn z@^obxeCtvAS0i5@QkN-RJ2$SeKy+_hb78`Jg;#IyZAfna#IZhiP1)fQ<4*@f$E5Zt zj7c3=ovj-XbnD!0EF@xeaBXCIPfAUyNmF03dEwVcvV15rlp$8^_^Ynw3;Eh3gH*{~ zh6Entr}vz^Tz60<%RjU8pDWl0cdgRL!F>XTe0Sr0Kg8w^o+lT%SS-amH{r3`boV~k zaq4+@fY^~vyPhnjSja#Xnr(4{pLgsOKxSi{AXlPAw00F$WKN#OP@|`1?DMx-!`3Q@TNLlxlc_WT4&uJVXwj3!04TI2BtMsb27mbEKj zxU{rXR4NwHOd*WQ<#HIoUex#diz_rE2~!rz$`uAc5A~}D zFx6^1RsFiNTK#I;eeiff54-^G#D10Z#v+<5^#b{XB3CdO^lSnrlS?zjCJSBaPS`ac z9714LK*S0yZ8V#vszCvw1k;EM5mbnwIT)tEB???N96=Qb5)1!I(hIfAh6+@#pwjkZ z16N2SzBYU$e`TYC-2SXQ6^W<(bIJb7j>`khE^Bn3kVcBP_z^m8qDl4Q0<9i zA`HO^PjbF$J{FCn!USp{J*(8)?G`hs=U4}WX^DlsuYBZP98jofvV`MGidtDxVm2y_ z2!SFLCKi&U$tc9-lvt=Y8Vo`sYA~5_3^Phdy$fTl)>ro_yD2tM+S^E~!!9C`nIyPe zA~YE&Oo+=cy-+4LQ9>hb#8EvdMde<*KDs`3K_&`-i?M{HnEw{8Cv<#lc?hWhF#y5pfoJ{IQIxX+q*Zz!&(U`GMF8Qi&@`Ec9K@IU10P2>+T z?>73v$#a4G07<`tT6CmpkYU^1*|qU8cQ!Bo zuohD8{vzt8XwUNCF|Evx_ZlW`sBQaG;>?;&=OTWpx%^YaPWry(<}Bv*%)pRA60btV8lEa4hLC2>@kD}7fF8zN1iirHJ literal 0 HcmV?d00001 diff --git a/test/test_manifest.json b/test/test_manifest.json index 932d4674d..d624d1463 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -9643,5 +9643,30 @@ "structTreeParentId": null } } + }, + { + "id": "bug1669097", + "file": "pdfs/bug1669097.pdf", + "md5": "561b3abac1fe49e1c9cd265cbf4a456e", + "rounds": 1, + "type": "eq", + "save": true, + "print": true, + "annotationStorage": { + "24R": { + "value": "😇👽🖖" + }, + "pdfjs_internal_editor_0": { + "annotationType": 3, + "color": [0, 0, 0], + "fontSize": 10, + "value": "😇😇😇😇😇😇👽👽👽👽👽👽🖖", + "pageIndex": 0, + "rect": [267, 638, 452, 658], + "rotation": 0, + "structTreeParentId": null, + "id": null + } + } } ]