From 8526ae3d0d0636e92bcc206a715b0b53546e2674 Mon Sep 17 00:00:00 2001 From: leitner Date: Tue, 25 Jul 2017 19:50:33 +0000 Subject: [PATCH] fix utf-8 json encoding for outside basic multilingual plane --- textcode/fmt_jsonescape.c | 4 ++-- textcode/scan_jsonescape.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/textcode/fmt_jsonescape.c b/textcode/fmt_jsonescape.c index dde2731..4c78046 100644 --- a/textcode/fmt_jsonescape.c +++ b/textcode/fmt_jsonescape.c @@ -47,7 +47,7 @@ escape: if (dest) { dest[written ]='\\'; dest[written+1]='u'; - fmt_xlong(dest+written+2,0xd800 + ((u>>10) & 0x3ff)); + fmt_xlong(dest+written+2,0xd800 + ((u>>10) & 0x3bf)); dest[written+6]='\\'; dest[written+7]='u'; fmt_xlong(dest+written+8,0xdc00 + (u & 0x3ff)); @@ -82,6 +82,6 @@ int main() { /* test escaping of unprintable characters */ assert(fmt_jsonescape(buf,"\001x",2)==7 && !memcmp(buf,"\\u0001x",7)); /* test conversion of large UTF-8 chars to UTF-16 surrogate pairs (poop emoji) */ - assert(fmt_jsonescape(buf,"\xf0\x9f\x92\xa9x",5)==13 && !memcmp(buf,"\\ud87d\\udca9x",13)); + assert(fmt_jsonescape(buf,"\xf0\x9f\x92\xa9x",5)==13 && !memcmp(buf,"\\ud83d\\udca9x",13)); } #endif diff --git a/textcode/scan_jsonescape.c b/textcode/scan_jsonescape.c index d28350d..30ac6fb 100644 --- a/textcode/scan_jsonescape.c +++ b/textcode/scan_jsonescape.c @@ -99,7 +99,7 @@ int main() { assert(scan_jsonescape("a\\udafd0",buf,&l)==1); /* correct surrogate pair */ assert(scan_jsonescape("a\\ud834\\udd1eb",buf,&l)==14 && l==6 && !memcmp(buf,"a\xf0\x9d\x84\x9e""b",6)); - assert(scan_jsonescape("\\ud87d\\udca9x",buf,&l)==13 && l==5 && !memcmp(buf,"\xf0\x9f\x92\xa9x",5)); + assert(scan_jsonescape("\\ud83d\\udca9x",buf,&l)==13 && l==5 && !memcmp(buf,"\xf0\x9f\x92\xa9x",5)); /* how about some incorrect UTF-8? */ assert(scan_jsonescape("a\xc0\xaf",buf,&l)==1 && l==1 && !memcmp(buf,"a",1)); return 0;