diff --git a/textcode/fmt_jsonescape.c b/textcode/fmt_jsonescape.c
index 89113f2..dde2731 100644
--- a/textcode/fmt_jsonescape.c
+++ b/textcode/fmt_jsonescape.c
@@ -1,6 +1,7 @@
 #include "fmt.h"
 #include "textcode.h"
 #include "str.h"
+#include "scan.h"
 #include "haveinline.h"
 
 /* src is UTF-8 encoded */
@@ -36,6 +37,28 @@ escape:
 	  dest[written+5]=fmt_tohex(s[i]&0xf);
 	}
 	written+=6;
+      } else if (s[i]>0x7f) {
+	/* UTF-8! Convert to surrogate pair if needed. */
+	uint32_t u;
+	size_t j=scan_utf8_sem((const char*)s+i,len-i,&u);
+	if (j==0) /* Invalid UTF-8! Abort! */
+	  return written;
+	if (u>0xffff) {
+	  if (dest) {
+	    dest[written  ]='\\';
+	    dest[written+1]='u';
+	    fmt_xlong(dest+written+2,0xd800 + ((u>>10) & 0x3ff));
+	    dest[written+6]='\\';
+	    dest[written+7]='u';
+	    fmt_xlong(dest+written+8,0xdc00 + (u & 0x3ff));
+	  }
+	  written+=12;
+	} else {
+	  if (dest) memcpy(dest+written,s+i,j);
+	  written+=j;
+	}
+	i+=j-1;	/* -1 because the for loop will also add 1 */
+	break;
       } else {
 	if (dest) dest[written]=s[i];
 	++written;
@@ -48,3 +71,17 @@ escape:
   return written;
 }
 
+#ifdef UNITTEST
+#include <assert.h>
+#include <string.h>
+
+int main() {
+  char buf[100];
+  /* test utf-8 pass-through and correct encoding of \t */
+  assert(fmt_jsonescape(buf,"\tfnörd",7)==8 && !memcmp(buf,"\\tfnörd",8));
+  /* test escaping of unprintable characters */
+  assert(fmt_jsonescape(buf,"\001x",2)==7 && !memcmp(buf,"\\u0001x",7));
+  /* test conversion of large UTF-8 chars to UTF-16 surrogate pairs (poop emoji) */
+  assert(fmt_jsonescape(buf,"\xf0\x9f\x92\xa9x",5)==13 && !memcmp(buf,"\\ud87d\\udca9x",13));
+}
+#endif
diff --git a/textcode/scan_html.c b/textcode/scan_html.c
index 97e9122..5ddb50a 100644
--- a/textcode/scan_html.c
+++ b/textcode/scan_html.c
@@ -25,9 +25,15 @@ static const char* lookup(size_t ofs,const char* t) {
   return NULL;
 }
 
-static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,int flag) {
+enum htmlmode {	/* <a href="http://example.com/&quot;foo">libowfat&lt;home</a> */
+  OUTSIDE,	/*                                        ^^^^^^^^^^^^^^^^ -> `libowfat<home` */
+  TAGARG,	/*         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -> `http://example.com/"foo´ */
+};
+
+static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,enum htmlmode mode) {
   register const unsigned char* s=(const unsigned char*) src;
   size_t written=0,i;
+  int dq=0;
   for (i=0; s[i]; ++i) {
     if (s[i]=='&') {
       const char* utf8;
@@ -58,7 +64,8 @@ static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,int fla
 	continue;
       } else
 	dest[written]='&';
-    } else if (flag && s[i]=='<') {
+    } else if (s[i]=='<') {
+      if (mode == OUTSIDE) break;
       if (case_starts((const char*)s+i+1,"br>")) {
 	dest[written]='\n';
 	i+=3;
@@ -68,7 +75,12 @@ static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,int fla
 	i+=3;
       } else
 	dest[written]=s[i];
-    } else
+    } else if (s[i]=='"' && mode==TAGARG) {
+      if (i==0) { dq=1; continue; }
+      break;
+    } else if (mode==TAGARG && !dq && (s[i]==' ' || s[i]=='\t' || s[i]=='\n'))
+      break;
+    else
       dest[written]=s[i];
     ++written;
   }
@@ -77,9 +89,22 @@ static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,int fla
 }
 
 size_t scan_html_tagarg(const char *src,char *dest,size_t *destlen) {
-  return scan_html_inner(src,dest,destlen,1);
+  return scan_html_inner(src,dest,destlen,TAGARG);
 }
 
 size_t scan_html(const char *src,char *dest,size_t *destlen) {
-  return scan_html_inner(src,dest,destlen,0);
+  return scan_html_inner(src,dest,destlen,OUTSIDE);
+}
+
+#ifdef UNITTEST
+#include <assert.h>
+
+int main() {
+  char* html="<a href=\"http://example.com/&quot;foo\">libowfat&lt;home</a>";
+  char buf[100];
+  size_t destlen;
+  assert(scan_html(html,buf,&destlen)==0 && destlen==0);
+  assert(scan_html(strchr(html,'>')+1,buf,&destlen)==16 && destlen==13 && !memcmp(buf,"libowfat<home",13));
+  assert(scan_html_tagarg(strchr(html,'"')+1,buf,&destlen)==28 && destlen==23 && !memcmp(buf,"http://example.com/\"foo",23));
 }
+#endif
diff --git a/textcode/scan_jsonescape.c b/textcode/scan_jsonescape.c
index 786f2c0..d28350d 100644
--- a/textcode/scan_jsonescape.c
+++ b/textcode/scan_jsonescape.c
@@ -99,6 +99,7 @@ int main() {
   assert(scan_jsonescape("a\\udafd0",buf,&l)==1);
   /* correct surrogate pair */
   assert(scan_jsonescape("a\\ud834\\udd1eb",buf,&l)==14 && l==6 && !memcmp(buf,"a\xf0\x9d\x84\x9e""b",6));
+  assert(scan_jsonescape("\\ud87d\\udca9x",buf,&l)==13 && l==5 && !memcmp(buf,"\xf0\x9f\x92\xa9x",5));
   /* how about some incorrect UTF-8? */
   assert(scan_jsonescape("a\xc0\xaf",buf,&l)==1 && l==1 && !memcmp(buf,"a",1));
   return 0;