From efbbd8533fd5dfca8282c9465decba6450e4dd12 Mon Sep 17 00:00:00 2001 From: Brendan Dahl Date: Thu, 29 Jun 2017 17:14:58 -0700 Subject: [PATCH] Only mask char codes of (3, 0) cmap tables in the range of 0xF000 to 0xF0FF. --- src/core/fonts.js | 30 ++++++++++++++++-------------- test/pdfs/.gitignore | 1 + test/pdfs/issue8187.pdf | Bin 0 -> 2841 bytes test/test_manifest.json | 7 +++++++ 4 files changed, 24 insertions(+), 14 deletions(-) create mode 100644 test/pdfs/issue8187.pdf diff --git a/src/core/fonts.js b/src/core/fonts.js index faaf294d9..fa027eabf 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -2406,21 +2406,23 @@ var Font = (function FontClosure() { cmapMappings[i].glyphId; } } else { - // For (3, 0) cmap tables: - // The charcode key being stored in charCodeToGlyphId is the lower - // byte of the two-byte charcodes of the cmap table since according to - // the spec: 'each byte from the string shall be prepended with the - // high byte of the range [of charcodes in the cmap table], to form - // a two-byte character, which shall be used to select the - // associated glyph description from the subtable'. - // - // For (1, 0) cmap tables: - // 'single bytes from the string shall be used to look up the - // associated glyph descriptions from the subtable'. This means - // charcodes in the cmap will be single bytes, so no-op since - // glyph.charCode & 0xFF === glyph.charCode + // When there is only a (1, 0) cmap table, the char code is a single + // byte and it is used directly as the char code. + + // When a (3, 0) cmap table is present, it is used instead but the + // spec has special rules for char codes in the range of 0xF000 to + // 0xF0FF and it says the (3, 0) table should map the values from + // the (1, 0) table by prepending 0xF0 to the char codes. To reverse + // this, the upper bits of the char code are cleared, but only for the + // special range since some PDFs have char codes outside of this range + // (e.g. 0x2013) which when masked would overwrite other values in the + // cmap. for (i = 0; i < cmapMappingsLength; ++i) { - charCode = cmapMappings[i].charCode & 0xFF; + charCode = cmapMappings[i].charCode; + if (cmapPlatformId === 3 && + charCode >= 0xF000 && charCode <= 0xF0FF) { + charCode &= 0xFF; + } charCodeToGlyphId[charCode] = cmapMappings[i].glyphId; } } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 74a1f69b5..7fde92602 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -269,6 +269,7 @@ !issue6113.pdf !openoffice.pdf !issue7014.pdf +!issue8187.pdf !annotation-link-text-popup.pdf !annotation-text-without-popup.pdf !annotation-underline.pdf diff --git a/test/pdfs/issue8187.pdf b/test/pdfs/issue8187.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ea73be74b64cd8b60c72404874de8d0025f63358 GIT binary patch literal 2841 zcmZuz2{=^iAC@f+>dG$tJLy_d%w`NiFr#gEPbaTwUb2{1DQ=z%oEQn2w~7l9>!vZK=&6bn`;=n1=^ zK`a^#;?bZNfFiO&0wE9sptdn+92mu6k(e;DC@Qk`YgD8PHXU~UUIxmE&S8bHsc>OP z#a1jB68vC591aPJgcN*m(m)9Iggek*02VqxEEjTZr*I&mKZOkv3;~n{g+mv_M3J}J z5^XHk;~adAEGTR$i$-7I2f+c9bBGTlpv#3FLPv<3FJ=p!LuE69AQl_M;st3@I~X*` zAL$xzh_LeW``=w;wfIA<7uG*}{=EJ*5=^}ngZ;ff9Ck6?2q*tiKo9hT{6T#@V!(me zbV>kV;R0ZgBLrQxz(FvAKVLvw2eh%rzz2&);|vHo>p+)1fTasE8QrM$7nw90apN^w!*W{pqdBzG3g+B5kG=N3k_@+@Ph&M zjf}7WYJLS`3=EN!3?@WpgGg$KzJ)HBlOH68o*r>$Kyt3~UwgJ|GFtT+86<#m3fpFJ ztc;$Lr>LqKP_V(4*dnT=N`5X+)+UvZNnJbD<9hF202BBU#=3tVFT7HOTejI#=_6_IwU zJZ$raT0uM4!TEsNwJ+ii))(pxy_qK6jGS41{ISj?5l2OP8(WWDWgm7GdzNh{zQF^k zb;tOT8+^L&;@kAj?OQIq^aRRVqsGO3f23l9{neeHvuc$p)7kGQPkp_=`%uW#&eXYs zoAQaRh1b)cSijE<-PWCW=3~{D9=%;p-m9yBa*XlMU3XV)Z^^#c52~+6Qu>lgnB8z|ZYb*yonK+xc0$YM2eU z!X2hxUP?9B-tg^?`Gg8MSmDbVYB#MZ>C;p@Gtu9jB{w}l>9BZ$>j^%b-hIcS=%5DL zcZHeIM4VY=b=h#r5v?R{c>ggXk3e~-qy?Qhgvp;-bIBdC_Df;k|i%KYu zsOrY*wus#$mi2ba3Re&-{l_HUpUQSEX@5x>^~9LwIB=^kY8hZ0N}oL)s=n$ToLcPt zA|vVHFxDvu$PX5uONmE~D>S`POZ9FHn>-@7Otn-gyj4PSNJNsGa@3Q=Th$r6e@w5Z zUhj>@W~o)`zik~?6caf|iHcRul$DZ7crF$^#1Dmt`#b&15BO~Cse3^p#Z9O1^`&oZ zmag-D$zP+)rzj^vFJq+RZ`si5q8*c8TPa>mInr{kVPz+uu(SND2BF`q_B1a^k}on1B_fLDWWU=NX4Ib zMACUn9iw?;=!=-uQZ|2EuS-+(kyWHho_rH*5}`FN9~r9p@QT{{D=V7472NtfOn7g{ zg?HUiEov|>Y80=EN-L4g90Lq;#n)8vpN$!p_k#B28=d!bB{h$Ui8b$1Ys4IUK4cnI z-)T1`VRoBGo9?Bm1yoHrZmb@@S;rB|?i2OUeO#P1*c5eiMoMP<_P$c%a=yx%y*nlh z0_9#j>X3aQbKY!cuvgJZcTW6$<$$A4t*wU2B4gevtA(78^OUxiv1N~Y*;{*Idz&sh zc~9?KH7b=Ef5ma~zFBky_4$p{%QkZ5^}62q8dHWzs_yqa)$uYaJiwbt<(&b1{Y}X;NupZG&^&7munP1gkZMl40$LngI?WQN4 zQ;7$rl|n-ElePW61~sJNP|;q4otNs*ByuM^%Ty~YPteewue-lKjVu!GdUsDzVOQ!0 zWvv@;D>U|#&PnmDQu|J5fAT1}{(0D`VL++A>V%`g#X?t&#tZ#32l?RrHnC&{XQKS-^cWiOK)d@!wZNjTd$tJOU1r7xjn zl~DfLhIfYj5KX6prezr7(cWr)s))Ec?eRTEPP=kJPM>~omCzdd#ZhSH%P%K|0zv zuKC)#R@;1EuD({=^VOxXuZHXgsTCP!#bt~RUh!P%dGnm6*VF)>gY`^V>>7t{(uJ~< zjl&rkP5smgO)k01m)j=&rJ7t^Q2I>ruXfGauj{fhCkjJMzU}J1bE$MmgU; zZO#2DsTqVpyMevL!jUkVo(Z8vFG!9czNf#(f7zLcyVwgdHKWSR@TE6qKi(E&U~>0! z4(*lFe-}m7{Y`VX*ruL4cC)Nobt+}MxkiyQ-{j_u*Ct|fl&VMe-r89bnW9oW zZdlkZe$(L5VpCwnz!vCQXGS<3gfD9+77G@Vh-?zbL=Hq@LxR?^cmf{K(6Dr{`UfSN BV+#NP literal 0 HcmV?d00001 diff --git a/test/test_manifest.json b/test/test_manifest.json index 28518372d..067a56499 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1863,6 +1863,13 @@ "link": false, "type": "eq" }, + { "id": "issue8187", + "file": "pdfs/issue8187.pdf", + "md5": "1724dcada47b90c9217ee0139d8352a8", + "rounds": 1, + "link": false, + "type": "eq" + }, { "id": "issue5686", "file": "pdfs/issue5686.pdf", "md5": "78d16b9df07a355ad00d70504a9194f8",