-
Notifications
You must be signed in to change notification settings - Fork 198
Labels
Description
Describe the bug
caml_utf8_of_utf16(String.fromCharCode(0xdbff, 0xdfff))
generate wrong utf8 encoding string
js_of_ocaml/runtime/js/mlBytes.js
Lines 106 to 113 in 246df64
} else if ( | |
c >= 0xdbff || | |
i + 1 === l || | |
(d = s.charCodeAt(i + 1)) < 0xdc00 || | |
d > 0xdfff | |
) { | |
// Unmatched surrogate pair, replaced by \ufffd (replacement character) | |
t += "\xef\xbf\xbd"; |
line 107 should be c > 0xdbff
since 0xdbff
is a valid surrogate
Expected behavior
expected result: b'\xf4\x8f\xbf\xbf'
(get from python bytes.fromhex('dbffdfff').decode('utf-16be').encode('utf-8')
)
actual result: ef bf bd ed bf bf
, get from
Array.from(caml_utf8_of_utf16(String.fromCharCode(0xdbff, 0xdfff)))
.map((c) => c.charCodeAt(0).toString(16).padStart(2, "0"))
.join(" ")
Versions
latest version of jsoo contains this bug.