diff --git a/fmt/fmt_escapecharc.c b/fmt/fmt_escapecharc.c index 99fb779..f67f12d 100644 --- a/fmt/fmt_escapecharc.c +++ b/fmt/fmt_escapecharc.c @@ -77,7 +77,7 @@ int main() { assert(fmt_escapecharc(buf,'\t')==2 && !memcmp(buf,"\\t",2)); assert(fmt_escapecharc(buf,'\v')==2 && !memcmp(buf,"\\v",2)); assert(fmt_escapecharc(buf,'\\')==2 && !memcmp(buf,"\\\\",2)); - assert(fmt_escapecharc(buf,'1')==4 && !memcmp(buf,"\\001",2)); + assert(fmt_escapecharc(buf,'1')==4 && !memcmp(buf,"\\061",4)); assert(fmt_escapecharc(buf,0xfefe)==6 && !memcmp(buf,"\\ufefe",6)); assert(fmt_escapecharc(buf,0xfefec0de)==10 && !memcmp(buf,"\\Ufefec0de",10)); return 0; diff --git a/fmt/fmt_xmlescape.c b/fmt/fmt_xmlescape.c index b3733f8..6b47d84 100644 --- a/fmt/fmt_xmlescape.c +++ b/fmt/fmt_xmlescape.c @@ -1,7 +1,11 @@ #include "fmt.h" +/* This is NOT fmt_escapexml, which will escape everything, whether it + * needs escaping or not. This will only escape what needs escaping, and + * reject invalid inputs */ size_t fmt_xmlescape(char* dest,uint32_t ch) { char* x; + char buf[6]; size_t n; /* From http://en.wikipedia.org/wiki/XML#Valid_characters @@ -12,8 +16,7 @@ Unicode code points in the following ranges are valid in XML 1.0 documents: U+10000–U+10FFFF: this includes all code points in supplementary planes, including non-characters. */ if (ch==0 || (ch>=0xd780 && ch<=0xdfff) || ch==0xfffe || ch==0xffff || ch>0x10ffff) return 0; - if ((ch&0x7f)<20 && ch!=9 && ch!=0xa && ch!=0xd && ch!=0x85) { - char buf[6]; + if (ch<0x20 && ch!=9 && ch!=0xa && ch!=0xd) { buf[0]='&'; buf[1]='#'; buf[2]='x'; @@ -38,3 +41,32 @@ Unicode code points in the following ranges are valid in XML 1.0 documents: } return n; } + +#ifdef UNITTEST +#undef UNITTEST + +#include "fmt_utf8.c" +#include "fmt_xlong.c" + +#include +#include + +int main() { + char buf[100]; + buf[0]=0x78; + assert(fmt_xmlescape(buf,0) == 0 && buf[0]==0x78); // 0 not allowed + assert(fmt_xmlescape(buf,0xd800) == 0 && buf[0]==0x78); // surrogate pairs not allowed + assert(fmt_xmlescape(buf,0xdfff) == 0 && buf[0]==0x78); // surrogate pairs not allowed + assert(fmt_xmlescape(buf,0xfffe) == 0 && buf[0]==0x78); // 0xfffe and 0xffff forbidden + assert(fmt_xmlescape(buf,0xffff) == 0 && buf[0]==0x78); + assert(fmt_xmlescape(buf,0x110000) == 0 && buf[0]==0x78); // too large + buf[1]=0x79; + assert(fmt_xmlescape(buf,9) == 1 && buf[0]==9 && buf[1]==0x79); // \t OK + assert(fmt_xmlescape(buf,10) == 1 && buf[0]==10 && buf[1]==0x79); // \n OK + assert(fmt_xmlescape(buf,13) == 1 && buf[0]==13 && buf[1]==0x79); // \r OK + buf[5]=0x77; + assert(fmt_xmlescape(buf,14) == 5 && !memcmp(buf,"\x77",6)); // other control chars not OK + assert(fmt_xmlescape(buf,'&') == 5 && !memcmp(buf,"&\x77",6)); // & -> & + assert(fmt_xmlescape(buf,'<') == 4 && !memcmp(buf,"<;\x77",6)); // < -> < +} +#endif