Commit 95c3f705 authored by Andrew Price's avatar Andrew Price
Browse files

Convert duktape's cesu-8 strings to utf-8 in js_print

🔥🔥🤷🔥🔥
parent e00db6ac
Loading
Loading
Loading
Loading
Loading
+64 −2
Original line number Diff line number Diff line
@@ -79,14 +79,76 @@ static void start_timeout(void)
	timeout_event = alarm_after(3, 0, NULL, &timeout);
}

/* Duktape uses a CESU-8 encoding, which allows UTF-16 surrogate pairs
   (themselves encoded in UTF-8), in order to be kinda-sorta compatible with
   ecmascript's UTF-16 requirements. This function just copies the cesu8 string,
   converting any surrogate pairs it finds to UTF-8. */
static char *cesu8_to_utf8(const char *cesu8)
{
	char *utf8 = calloc(1, strlen(cesu8) + 1);
	const unsigned char *cc = (void *)cesu8;
	char *cu = utf8;
	uint32_t hs = 0;

	while (*cc != '\0') {
		uint32_t c = 0;
		uint32_t u;

		if (cc[0] <= 0x7F) {
			*cu++ = *cc++;
			continue;
		} else if (cc[0] <= 0xDF) {
			*cu++ = *cc++;
			*cu++ = *cc++;
			continue;
		} else if (cc[0] <= 0xEF) {
			/* Surrogates are encoded in 3 chars so convert
			   back to a single UTF-16 value */
			c = ((uint32_t)cc[0] & 0xF) << 12 |
			    ((uint32_t)cc[1] & 0x3F) << 6 |
			    ((uint32_t)cc[2] & 0x3F);
		} else {
			*cu++ = *cc++;
			*cu++ = *cc++;
			*cu++ = *cc++;
			*cu++ = *cc++;
			continue;
		}
		if (hs == 0 && c >= 0xD800 && c <= 0xDBFF)
			hs = c;
		else if (hs != 0 && c >= 0xDC00 && c <= 0xDFFF) {
			/* Have high and low surrogates - convert to code point then
			   back to UTF-8 */
			u = 0x10000 + ((((uint32_t)hs & 0x3FF) << 10) | (c & 0x3FF));
			*cu++ = 0xF0 |  u >> 18;
			*cu++ = 0x80 | (u >> 12 & 0x3F);
			*cu++ = 0x80 | (u >> 6 & 0x3F);
			*cu++ = 0x80 | (u & 0x3F);
			hs = 0;
		} else {
			*cu++ = cc[0];
			*cu++ = cc[1];
			*cu++ = cc[2];
			hs = 0;
		}
		cc += 3;
	}
	*cu = '\0';
	return utf8;
}

static duk_ret_t js_print(duk_context *cx)
{
	int argc = duk_get_top(cx);

	if (argc < 1)
		return 0;
	for (int i = 0; i < argc; i++)
		display_message(duk_to_string(cx, i - argc), 0, 1);
	for (int i = 0; i < argc; i++) {
		const char *cesu8 = duk_to_string(cx, i - argc);
		char *utf8 = cesu8_to_utf8(cesu8);
		display_message(utf8, 0, 1);
		free(utf8);
	}
	return 0;
}