Merge pull request #17558 from calixteman/bug1669097

Print correctly documents containing chars with an unicode greater than 0xFFFF (bug 1669097)
This commit is contained in:
calixteman 2024-01-22 12:23:06 +01:00 committed by GitHub
commit bba831821d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 44 additions and 34 deletions

View File

@ -3826,7 +3826,7 @@ class FreeTextAnnotation extends MarkupAnnotation {
fontColor, fontColor,
strokeAlpha strokeAlpha
); );
this._streams.push(this.appearance, FakeUnicodeFont.toUnicodeStream); this._streams.push(this.appearance);
} else { } else {
warn( warn(
"FreeTextAnnotation: OffscreenCanvas is not supported, annotation may not render correctly." "FreeTextAnnotation: OffscreenCanvas is not supported, annotation may not render correctly."

View File

@ -386,6 +386,17 @@ const XMLEntities = {
/* ' */ 0x27: "'", /* ' */ 0x27: "'",
}; };
function* codePointIter(str) {
for (let i = 0, ii = str.length; i < ii; i++) {
const char = str.codePointAt(i);
if (char > 0xd7ff && (char < 0xe000 || char > 0xfffd)) {
// char is represented by two u16
i++;
}
yield char;
}
}
function encodeToXmlString(str) { function encodeToXmlString(str) {
const buffer = []; const buffer = [];
let start = 0; let start = 0;
@ -602,6 +613,7 @@ function getRotationMatrix(rotation, width, height) {
export { export {
arrayBuffersToBytes, arrayBuffersToBytes,
codePointIter,
collectActions, collectActions,
encodeToXmlString, encodeToXmlString,
escapePDFName, escapePDFName,

View File

@ -13,13 +13,14 @@
* limitations under the License. * limitations under the License.
*/ */
import { Dict, Name } from "./primitives.js";
import { import {
codePointIter,
escapePDFName, escapePDFName,
getRotationMatrix, getRotationMatrix,
numberToString, numberToString,
stringToUTF16HexString, stringToUTF16HexString,
} from "./core_utils.js"; } from "./core_utils.js";
import { Dict, Name } from "./primitives.js";
import { import {
LINE_DESCENT_FACTOR, LINE_DESCENT_FACTOR,
LINE_FACTOR, LINE_FACTOR,
@ -251,35 +252,6 @@ class FakeUnicodeFont {
); );
} }
get toUnicodeRef() {
if (!FakeUnicodeFont._toUnicodeRef) {
const toUnicode = `/CIDInit /ProcSet findresource begin
12 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (UCS) /Supplement 0 >> def
/CMapName /Adobe-Identity-UCS def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
1 beginbfrange
<0000> <FFFF> <0000>
endbfrange
endcmap CMapName currentdict /CMap defineresource pop end end`;
const toUnicodeStream = (FakeUnicodeFont.toUnicodeStream =
new StringStream(toUnicode));
const toUnicodeDict = new Dict(this.xref);
toUnicodeStream.dict = toUnicodeDict;
toUnicodeDict.set("Length", toUnicode.length);
FakeUnicodeFont._toUnicodeRef =
this.xref.getNewPersistentRef(toUnicodeStream);
}
return FakeUnicodeFont._toUnicodeRef;
}
get fontDescriptorRef() { get fontDescriptorRef() {
if (!FakeUnicodeFont._fontDescriptorRef) { if (!FakeUnicodeFont._fontDescriptorRef) {
const fontDescriptor = new Dict(this.xref); const fontDescriptor = new Dict(this.xref);
@ -350,7 +322,7 @@ endcmap CMapName currentdict /CMap defineresource pop end end`;
baseFont.set("Subtype", Name.get("Type0")); baseFont.set("Subtype", Name.get("Type0"));
baseFont.set("Encoding", Name.get("Identity-H")); baseFont.set("Encoding", Name.get("Identity-H"));
baseFont.set("DescendantFonts", [this.descendantFontRef]); baseFont.set("DescendantFonts", [this.descendantFontRef]);
baseFont.set("ToUnicode", this.toUnicodeRef); baseFont.set("ToUnicode", Name.get("Identity-H"));
return this.xref.getNewPersistentRef(baseFont); return this.xref.getNewPersistentRef(baseFont);
} }
@ -420,8 +392,8 @@ endcmap CMapName currentdict /CMap defineresource pop end end`;
// languages, like arabic, it'd be wrong because of ligatures. // languages, like arabic, it'd be wrong because of ligatures.
const lineWidth = ctx.measureText(line).width; const lineWidth = ctx.measureText(line).width;
maxWidth = Math.max(maxWidth, lineWidth); maxWidth = Math.max(maxWidth, lineWidth);
for (const char of line.split("")) { for (const code of codePointIter(line)) {
const code = char.charCodeAt(0); const char = String.fromCodePoint(code);
let width = this.widths.get(code); let width = this.widths.get(code);
if (width === undefined) { if (width === undefined) {
const metrics = ctx.measureText(char); const metrics = ctx.measureText(char);

View File

@ -627,3 +627,4 @@
!file_pdfjs_form.pdf !file_pdfjs_form.pdf
!issue17492.pdf !issue17492.pdf
!issue17540.pdf !issue17540.pdf
!bug1669097.pdf

BIN
test/pdfs/bug1669097.pdf Executable file

Binary file not shown.

View File

@ -9650,5 +9650,30 @@
"structTreeParentId": null "structTreeParentId": null
} }
} }
},
{
"id": "bug1669097",
"file": "pdfs/bug1669097.pdf",
"md5": "561b3abac1fe49e1c9cd265cbf4a456e",
"rounds": 1,
"type": "eq",
"save": true,
"print": true,
"annotationStorage": {
"24R": {
"value": "😇👽🖖"
},
"pdfjs_internal_editor_0": {
"annotationType": 3,
"color": [0, 0, 0],
"fontSize": 10,
"value": "😇😇😇😇😇😇👽👽👽👽👽👽🖖",
"pageIndex": 0,
"rect": [267, 638, 452, 658],
"rotation": 0,
"structTreeParentId": null,
"id": null
}
}
} }
] ]