I need to get a string / char from a unicode charcode and finally put it into a DOM TextNode to add into an HTML page using client side JavaScript.
Currently, I am doing:
String.fromCharCode(parseInt(charcode, 16));
where charcode
is a hex string containing the charcode, e.g. "1D400"
. The unicode character which should be returned is 𝐀
, but a 퐀
is returned! Characters in the 16 bit range (0000
... FFFF
) are returned as expected.
Any explanation and / or proposals for correction?
Thanks in advance!
The problem is that characters in JavaScript are (mostly) UCS-2 encoded but can represent a character outside the Basic Multilingual Plane in JavaScript as a UTF-16 surrogate pair.
The following function is adapted from Converting punycode with dash character to Unicode:
function utf16Encode(input) {
var output = [], i = 0, len = input.length, value;
while (i < len) {
value = input[i++];
if ( (value & 0xF800) === 0xD800 ) {
throw new RangeError("UTF-16(encode): Illegal UTF-16 value");
}
if (value > 0xFFFF) {
value -= 0x10000;
output.push(String.fromCharCode(((value >>>10) & 0x3FF) | 0xD800));
value = 0xDC00 | (value & 0x3FF);
}
output.push(String.fromCharCode(value));
}
return output.join("");
}
alert( utf16Encode([0x1D400]) );