Loading src/client/js-duk.c +64 −2 Original line number Diff line number Diff line Loading @@ -79,14 +79,76 @@ static void start_timeout(void) timeout_event = alarm_after(3, 0, NULL, &timeout); } /* Duktape uses a CESU-8 encoding, which allows UTF-16 surrogate pairs (themselves encoded in UTF-8), in order to be kinda-sorta compatible with ecmascript's UTF-16 requirements. This function just copies the cesu8 string, converting any surrogate pairs it finds to UTF-8. */ static char *cesu8_to_utf8(const char *cesu8) { char *utf8 = calloc(1, strlen(cesu8) + 1); const unsigned char *cc = (void *)cesu8; char *cu = utf8; uint32_t hs = 0; while (*cc != '\0') { uint32_t c = 0; uint32_t u; if (cc[0] <= 0x7F) { *cu++ = *cc++; continue; } else if (cc[0] <= 0xDF) { *cu++ = *cc++; *cu++ = *cc++; continue; } else if (cc[0] <= 0xEF) { /* Surrogates are encoded in 3 chars so convert back to a single UTF-16 value */ c = ((uint32_t)cc[0] & 0xF) << 12 | ((uint32_t)cc[1] & 0x3F) << 6 | ((uint32_t)cc[2] & 0x3F); } else { *cu++ = *cc++; *cu++ = *cc++; *cu++ = *cc++; *cu++ = *cc++; continue; } if (hs == 0 && c >= 0xD800 && c <= 0xDBFF) hs = c; else if (hs != 0 && c >= 0xDC00 && c <= 0xDFFF) { /* Have high and low surrogates - convert to code point then back to UTF-8 */ u = 0x10000 + ((((uint32_t)hs & 0x3FF) << 10) | (c & 0x3FF)); *cu++ = 0xF0 | u >> 18; *cu++ = 0x80 | (u >> 12 & 0x3F); *cu++ = 0x80 | (u >> 6 & 0x3F); *cu++ = 0x80 | (u & 0x3F); hs = 0; } else { *cu++ = cc[0]; *cu++ = cc[1]; *cu++ = cc[2]; hs = 0; } cc += 3; } *cu = '\0'; return utf8; } static duk_ret_t js_print(duk_context *cx) { int argc = duk_get_top(cx); if (argc < 1) return 0; for (int i = 0; i < argc; i++) display_message(duk_to_string(cx, i - argc), 0, 1); for (int i = 0; i < argc; i++) { const char *cesu8 = duk_to_string(cx, i - argc); char *utf8 = cesu8_to_utf8(cesu8); display_message(utf8, 0, 1); free(utf8); } return 0; } Loading Loading
src/client/js-duk.c +64 −2 Original line number Diff line number Diff line Loading @@ -79,14 +79,76 @@ static void start_timeout(void) timeout_event = alarm_after(3, 0, NULL, &timeout); } /* Duktape uses a CESU-8 encoding, which allows UTF-16 surrogate pairs (themselves encoded in UTF-8), in order to be kinda-sorta compatible with ecmascript's UTF-16 requirements. This function just copies the cesu8 string, converting any surrogate pairs it finds to UTF-8. */ static char *cesu8_to_utf8(const char *cesu8) { char *utf8 = calloc(1, strlen(cesu8) + 1); const unsigned char *cc = (void *)cesu8; char *cu = utf8; uint32_t hs = 0; while (*cc != '\0') { uint32_t c = 0; uint32_t u; if (cc[0] <= 0x7F) { *cu++ = *cc++; continue; } else if (cc[0] <= 0xDF) { *cu++ = *cc++; *cu++ = *cc++; continue; } else if (cc[0] <= 0xEF) { /* Surrogates are encoded in 3 chars so convert back to a single UTF-16 value */ c = ((uint32_t)cc[0] & 0xF) << 12 | ((uint32_t)cc[1] & 0x3F) << 6 | ((uint32_t)cc[2] & 0x3F); } else { *cu++ = *cc++; *cu++ = *cc++; *cu++ = *cc++; *cu++ = *cc++; continue; } if (hs == 0 && c >= 0xD800 && c <= 0xDBFF) hs = c; else if (hs != 0 && c >= 0xDC00 && c <= 0xDFFF) { /* Have high and low surrogates - convert to code point then back to UTF-8 */ u = 0x10000 + ((((uint32_t)hs & 0x3FF) << 10) | (c & 0x3FF)); *cu++ = 0xF0 | u >> 18; *cu++ = 0x80 | (u >> 12 & 0x3F); *cu++ = 0x80 | (u >> 6 & 0x3F); *cu++ = 0x80 | (u & 0x3F); hs = 0; } else { *cu++ = cc[0]; *cu++ = cc[1]; *cu++ = cc[2]; hs = 0; } cc += 3; } *cu = '\0'; return utf8; } static duk_ret_t js_print(duk_context *cx) { int argc = duk_get_top(cx); if (argc < 1) return 0; for (int i = 0; i < argc; i++) display_message(duk_to_string(cx, i - argc), 0, 1); for (int i = 0; i < argc; i++) { const char *cesu8 = duk_to_string(cx, i - argc); char *utf8 = cesu8_to_utf8(cesu8); display_message(utf8, 0, 1); free(utf8); } return 0; } Loading