diff --git a/fmt/fmt_8long.c b/fmt/fmt_8long.c index 7a372f6..f44c242 100644 --- a/fmt/fmt_8long.c +++ b/fmt/fmt_8long.c @@ -19,5 +19,7 @@ size_t fmt_8long(char *dest,unsigned long i) { int main() { char buf[100]; assert(fmt_8long(buf,012345)==5 && !memcmp(buf,"12345",5)); + assert(fmt_8long(buf,0)==1 && !memcmp(buf,"0",1)); + return 0; } #endif diff --git a/fmt/fmt_8longlong.c b/fmt/fmt_8longlong.c index 97cb9bf..f2638c8 100644 --- a/fmt/fmt_8longlong.c +++ b/fmt/fmt_8longlong.c @@ -19,6 +19,8 @@ size_t fmt_8longlong(char *dest,unsigned long long i) { int main() { char buf[100]; + assert(fmt_8longlong(buf,0)==1 && !memcmp(buf,"0",1)); assert(fmt_8longlong(buf,0123456701234567)==15 && !memcmp(buf,"123456701234567",15)); + return 0; } #endif diff --git a/fmt/fmt_escapecharc.c b/fmt/fmt_escapecharc.c index 47eb5e1..99fb779 100644 --- a/fmt/fmt_escapecharc.c +++ b/fmt/fmt_escapecharc.c @@ -80,6 +80,7 @@ int main() { assert(fmt_escapecharc(buf,'1')==4 && !memcmp(buf,"\\001",2)); assert(fmt_escapecharc(buf,0xfefe)==6 && !memcmp(buf,"\\ufefe",6)); assert(fmt_escapecharc(buf,0xfefec0de)==10 && !memcmp(buf,"\\Ufefec0de",10)); + return 0; } #endif diff --git a/fmt/fmt_escapecharhtml.c b/fmt/fmt_escapecharhtml.c index 23e5699..f39345e 100644 --- a/fmt/fmt_escapecharhtml.c +++ b/fmt/fmt_escapecharhtml.c @@ -6,4 +6,6 @@ size_t fmt_escapecharhtml(char* dest,uint32_t ch) { return fmt_escapecharxml(dest,ch); } +/* unit tested via fmt_escapecharxml.c */ + #endif diff --git a/fmt/fmt_escapecharjson.c b/fmt/fmt_escapecharjson.c index 7a39ef0..b943f89 100644 --- a/fmt/fmt_escapecharjson.c +++ b/fmt/fmt_escapecharjson.c @@ -1,5 +1,11 @@ #include "fmt.h" +#ifdef UNITTEST +#undef UNITTEST +#include "fmt_tohex.c" +#define UNITTEST +#endif + static void fmt_hex4(char* dest,uint16_t w) { dest[3]=fmt_tohex(w&0xf); w>>=4; dest[2]=fmt_tohex(w&0xf); w>>=4; @@ -45,3 +51,21 @@ simple: } return n+6; } + +#ifdef UNITTEST +#include +#include + +int main() { + char buf[100]; + assert(fmt_escapecharjson(buf,'f')==6 && !memcmp(buf,"\\u0066",6)); + assert(fmt_escapecharjson(buf,'\b')==2 && !memcmp(buf,"\\b",2)); + assert(fmt_escapecharjson(buf,'\n')==2 && !memcmp(buf,"\\n",2)); + assert(fmt_escapecharjson(buf,'\r')==2 && !memcmp(buf,"\\r",2)); + assert(fmt_escapecharjson(buf,'"')==2 && !memcmp(buf,"\\\"",2)); + assert(fmt_escapecharjson(buf,'\\')==2 && !memcmp(buf,"\\\\",2)); + assert(fmt_escapecharjson(buf,'/')==2 && !memcmp(buf,"\\/",2)); /* I'm baffled as well */ + assert(fmt_escapecharjson(buf,0x1d11e)==12 && !memcmp(buf,"\\ud834\\udd1e",12)); /* utf-16 surrogate pairs */ + return 0; +} +#endif diff --git a/fmt/fmt_escapecharquotedprintable.c b/fmt/fmt_escapecharquotedprintable.c index 2b07170..790275d 100644 --- a/fmt/fmt_escapecharquotedprintable.c +++ b/fmt/fmt_escapecharquotedprintable.c @@ -1,5 +1,11 @@ #include "fmt.h" +#ifdef UNITTEST +#undef UNITTEST +#include "fmt_tohex.c" +#define UNITTEST +#endif + size_t fmt_escapecharquotedprintable(char* dest,uint32_t ch) { if (ch>0xff) return 0; if (dest) { @@ -9,3 +15,13 @@ size_t fmt_escapecharquotedprintable(char* dest,uint32_t ch) { } return 3; } + +#ifdef UNITTEST +#include +#include + +int main() { + char buf[100]; + assert(fmt_escapecharquotedprintable(buf,'f')==3 && !memcmp(buf,"=66",3)); +} +#endif diff --git a/fmt/fmt_ulong.c b/fmt/fmt_ulong.c index 7b7bc4a..ea123b3 100644 --- a/fmt/fmt_ulong.c +++ b/fmt/fmt_ulong.c @@ -17,5 +17,6 @@ size_t fmt_ulong(char *dest,unsigned long i) { int main() { char buf[100]; assert(fmt_ulong(buf,12345)==5 && !memcmp(buf,"12345",5)); + return 0; } #endif diff --git a/fmt/fmt_utf8.c b/fmt/fmt_utf8.c index 9e26eb7..de2202d 100644 --- a/fmt/fmt_utf8.c +++ b/fmt/fmt_utf8.c @@ -25,3 +25,4 @@ size_t fmt_utf8(char *dest,uint32_t n) { return 0; } +/* unit tested via scan/scan_utf8.c */ diff --git a/fmt/fmt_xlong.c b/fmt/fmt_xlong.c index 37a66f8..06c4654 100644 --- a/fmt/fmt_xlong.c +++ b/fmt/fmt_xlong.c @@ -24,5 +24,6 @@ size_t fmt_xlong(char *dest,unsigned long i) { int main() { char buf[100]; assert(fmt_xlong(buf,0x12345)==5 && !memcmp(buf,"12345",5)); + return 0; } #endif diff --git a/mult/imult16.c b/mult/imult16.c index 13d86f4..06cb797 100644 --- a/mult/imult16.c +++ b/mult/imult16.c @@ -16,3 +16,16 @@ int imult16(int16 a,int16 b,int16* c) { } #endif + +#ifdef UNITTEST +#include + +int main() { + int16 a; + assert(imult16(4,10000,&a)==0); + assert(imult16(-4,10000,&a)==0); + assert(imult16(5,10,&a)==1 && a==50); + assert(imult16(-3,10000,&a)==1 && a==-30000); + return 0; +} +#endif diff --git a/mult/imult32.c b/mult/imult32.c index 1073490..40e5b94 100644 --- a/mult/imult32.c +++ b/mult/imult32.c @@ -16,3 +16,16 @@ int imult32(int32 a,int32 b,int32* c) { } #endif + +#ifdef UNITTEST +#include + +int main() { + int32 b; + + assert(imult32(0x40000000,2,&b)==0); + assert(imult32(-0x40000000,2,&b)==1 && b==-0x80000000ll); + assert(imult32(0x3fffffff,2,&b)==1 && b==0x7ffffffe); + return 0; +} +#endif diff --git a/mult/imult64.c b/mult/imult64.c index 79f2f7b..0cb06e7 100644 --- a/mult/imult64.c +++ b/mult/imult64.c @@ -32,3 +32,18 @@ int imult64(int64 a,int64 b,int64* c) { #endif #endif + +#ifdef UNITTEST +#include + +int main() { + int64 c; + + assert(imult64(0x4000000000000000ll,2,&c)==0); + assert(imult64(-0x4000000000000000ll,2,&c)==1 && c==(int64)-0x8000000000000000ll); + assert(imult64(0x3fffffffffffffffll,2,&c)==1 && c==0x7ffffffffffffffell); + + return 0; +} + +#endif diff --git a/mult/umult16.c b/mult/umult16.c index eba66dd..9bc0170 100644 --- a/mult/umult16.c +++ b/mult/umult16.c @@ -16,3 +16,16 @@ int umult16(uint16 a,uint16 b,uint16* c) { } #endif + + +#ifdef UNITTEST +#include + +int main() { + uint16 a; + assert(umult16(7,10000,&a)==0); + assert(umult16(5,10,&a)==1 && a==50); + assert(umult16(6,10000,&a)==1 && a==60000); + return 0; +} +#endif diff --git a/mult/umult32.c b/mult/umult32.c index 948cf0f..557369d 100644 --- a/mult/umult32.c +++ b/mult/umult32.c @@ -16,3 +16,16 @@ int umult32(uint32 a,uint32 b,uint32* c) { } #endif + +#ifdef UNITTEST +#include + +int main() { + uint32 a; + assert(umult32(4,0x80000000,&a)==0); + assert(umult32(16,0x45000000,&a)==0); // make sure we don't fall for "if a*b + +int main() { + uint64 a; + assert(umult64(4,0x8000000000000000ull,&a)==0); + assert(umult64(16,0x4500000000000000ull,&a)==0); // make sure we don't fall for "if a*b + +int main() { + unsigned int i; + assert(scan_8int("1234",&i)==4 && i==01234); + assert(scan_8int("12345678",&i)==7 && i==01234567); + assert(scan_8int("37777777777",&i)==11 && i==0xffffffff); + assert(scan_8int("40000000000",&i)==10 && i==04000000000); + assert(scan_8int("-4",&i)==0 && i==0); + assert(scan_8int("01234",&i)==5 && i==01234); + return 0; +} +#endif diff --git a/scan/scan_8long.c b/scan/scan_8long.c index 06283cc..d44c750 100644 --- a/scan/scan_8long.c +++ b/scan/scan_8long.c @@ -1,5 +1,31 @@ #include "scan.h" +#ifdef UNITTEST +#undef UNITTEST +#include "scan_8longn.c" +#define UNITTEST +#endif + size_t scan_8long(const char *src,unsigned long *dest) { return scan_8longn(src,(size_t)-1,dest); } + +#ifdef UNITTEST +#include + +int main() { + unsigned long i; + assert(scan_8long("1234",&i)==4 && i==01234); + assert(scan_8long("12345678",&i)==7 && i==01234567); + if (sizeof(long)==sizeof(int)) { + assert(scan_8long("37777777777",&i)==11 && i==0xffffffff); + assert(scan_8long("40000000000",&i)==10 && i==04000000000); + } else if (sizeof(long)==sizeof(long long)) { + assert(scan_8long("1777777777777777777777",&i)==22 && i==0xffffffffffffffffull); + assert(scan_8long("2000000000000000000000",&i)==21 && i==0200000000000000000000ull); + } + assert(scan_8long("-4",&i)==0 && i==0); + assert(scan_8long("01234",&i)==5 && i==01234); + return 0; +} +#endif diff --git a/scan/scan_8longlong.c b/scan/scan_8longlong.c index 3757a17..849e8c9 100644 --- a/scan/scan_8longlong.c +++ b/scan/scan_8longlong.c @@ -20,3 +20,18 @@ size_t scan_8longlong(const char *src,unsigned long long *dest) { *dest=l; return (size_t)(tmp-src); } + +#ifdef UNITTEST +#include + +int main() { + unsigned long long i; + assert(scan_8longlong("1234",&i)==4 && i==01234); + assert(scan_8longlong("12345678",&i)==7 && i==01234567); + assert(scan_8longlong("1777777777777777777777",&i)==22 && i==0xffffffffffffffffull); + assert(scan_8longlong("2000000000000000000000",&i)==21 && i==200000000000000000000ull); + assert(scan_8longlong("-4",&i)==0 && i==0); + assert(scan_8longlong("01234",&i)==5 && i==01234); + return 0; +} +#endif diff --git a/scan/scan_8longn.c b/scan/scan_8longn.c index 31156fe..3bd2551 100644 --- a/scan/scan_8longn.c +++ b/scan/scan_8longn.c @@ -20,3 +20,24 @@ size_t scan_8longn(const char *src,size_t n,unsigned long *dest) { *dest=l; return (size_t)(tmp-src); } + +#ifdef UNITTEST +#include + +int main() { + unsigned long i; + assert(scan_8longn("1234",100,&i)==4 && i==01234); + assert(scan_8longn("12345678",100,&i)==7 && i==01234567); + if (sizeof(long)==sizeof(int)) { + assert(scan_8longn("37777777777",100,&i)==11 && i==0xffffffff); + assert(scan_8longn("40000000000",100,&i)==10 && i==04000000000); + } else if (sizeof(long)==sizeof(long long)) { + assert(scan_8longn("1777777777777777777777",100,&i)==22 && i==0xffffffffffffffffull); + assert(scan_8longn("2000000000000000000000",100,&i)==21 && i==0200000000000000000000ull); + } + assert(scan_8longn("-4",100,&i)==0 && i==0); + assert(scan_8longn("01234",100,&i)==5 && i==01234); + assert(scan_8longn("1234",2,&i)==2 && i==012); + return 0; +} +#endif diff --git a/scan/scan_8short.c b/scan/scan_8short.c index 557087c..5549da8 100644 --- a/scan/scan_8short.c +++ b/scan/scan_8short.c @@ -22,3 +22,18 @@ size_t scan_8short(const char* src,unsigned short* dest) { *dest=l; return (size_t)(tmp-src); } + +#ifdef UNITTEST +#include + +int main() { + unsigned short i; + assert(scan_8short("1234",&i)==4 && i==01234); + assert(scan_8short("5678",&i)==3 && i==0567); + assert(scan_8short("177777",&i)==6 && i==0xffff); + assert(scan_8short("200000",&i)==5 && i==020000); + assert(scan_8short("-4",&i)==0 && i==0); + assert(scan_8short("01234",&i)==5 && i==01234); + return 0; +} +#endif diff --git a/scan/scan_asn1derlength.c b/scan/scan_asn1derlength.c index 557f555..864cd53 100644 --- a/scan/scan_asn1derlength.c +++ b/scan/scan_asn1derlength.c @@ -50,3 +50,29 @@ size_t scan_asn1derlength(const char* src,size_t len,unsigned long long* value) *value=l; return i; } + +#ifdef UNITTEST +#include + +int main() { + unsigned long long i; + /* first check actual parsing */ + assert(scan_asn1derlengthvalue("\x05",1,&i)==1 && i==5); + assert(scan_asn1derlengthvalue("\x81\xc2",2,&i)==2 && i==0xc2); + assert(scan_asn1derlengthvalue("\x82\x01\x23",3,&i)==3 && i==0x123); + assert(scan_asn1derlengthvalue("\x83\x01\x23\x45",4,&i)==4 && i==0x12345); + assert(scan_asn1derlengthvalue("\x83\x01\x23\x45",5,&i)==4 && i==0x12345); + assert(scan_asn1derlengthvalue("\x80",1,&i)==0); // reject indefinite length encoding + assert(scan_asn1derlengthvalue("\x81\x05",2,&i)==0); // reject non-minimal encoding + assert(scan_asn1derlengthvalue("\x81\xc2",1,&i)==0); // reject truncated message + assert(scan_asn1derlengthvalue("\x82\xc2",2,&i)==0); // reject truncated message + assert(scan_asn1derlengthvalue("\x82\x00\xc2",3,&i)==0); // reject non-minimal encoding + assert(scan_asn1derlengthvalue("\x89\x01\x02\x03\x04\x05\x06\x07\x08\x09",10,&i)==0); // value does not fit in target integer + + /* now check buffer length checking in scan_asn1derlength */ + assert(scan_asn1derlength("\x01",2,&i)==1 && i==1); // ok + assert(scan_asn1derlength("\x02",2,&i)==0); // buffer too small + assert(scan_asn1derlength("\x88\xff\xff\xff\xff\xff\xff\xff\xff",9,&i)==0); // buffer too small, and integer overflow in naive check + return 0; +} +#endif diff --git a/scan/scan_asn1dertag.c b/scan/scan_asn1dertag.c index 769482d..9ab7fd7 100644 --- a/scan/scan_asn1dertag.c +++ b/scan/scan_asn1dertag.c @@ -25,3 +25,18 @@ size_t scan_asn1dertag(const char* src,size_t len,unsigned long long* length) { return 0; } +#ifdef UNITTEST +#include + +int main() { + unsigned long long i; + assert(scan_asn1dertag("\x00",1,&i)==1 && i==0); + assert(scan_asn1dertag("\x05",1,&i)==1 && i==5); + assert(scan_asn1dertag("\x81\x42",2,&i)==2 && i==0xc2); + assert(scan_asn1dertag("\x05",0,&i)==0); // truncated + assert(scan_asn1dertag("\x81\x42",1,&i)==0); // truncated + assert(scan_asn1dertag("\x80\x05",2,&i)==0); // non-minimal encoding + assert(scan_asn1dertag("\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",10,&i)==0); // value does not fit + return 0; +} +#endif diff --git a/scan/scan_charsetnskip.c b/scan/scan_charsetnskip.c index 5d71aa5..9bc67b0 100644 --- a/scan/scan_charsetnskip.c +++ b/scan/scan_charsetnskip.c @@ -15,3 +15,15 @@ size_t scan_charsetnskip(const char *s,const char *charset,size_t limit) { } return (size_t)(t-s); } + +#ifdef UNITTEST +#include + +int main() { + assert(scan_charsetnskip("fnord","fo",4)==1); + assert(scan_charsetnskip("fnord","nf",4)==2); + assert(scan_charsetnskip("gaord","nf",4)==0); + assert(scan_charsetnskip("fnord","fnord",4)==4); + return 0; +} +#endif diff --git a/scan/scan_fromhex.c b/scan/scan_fromhex.c index 1365b39..8b52b28 100644 --- a/scan/scan_fromhex.c +++ b/scan/scan_fromhex.c @@ -18,3 +18,20 @@ int scan_fromhex(unsigned char c) { */ } +#ifdef UNITTEST +#include +int main() { + int i; + for (i=0; i<256; ++i) { + if (i>='0' && i<='9') + assert(scan_fromhex(i)==i-'0'); + else if (i>='a' && i<='f') + assert(scan_fromhex(i)==i-'a'+10); + else if (i>='A' && i<='F') + assert(scan_fromhex(i)==i-'A'+10); + else + assert(scan_fromhex(i)==-1); + } + return 0; +} +#endif diff --git a/scan/scan_int.c b/scan/scan_int.c index 7b03548..93f809f 100644 --- a/scan/scan_int.c +++ b/scan/scan_int.c @@ -15,6 +15,10 @@ size_t scan_int(const char* src,int* dest) { } while ((c=(unsigned char)(*tmp-'0'))<10) { unsigned int n; +#if defined(__GNUC__) && (__GNUC__ >= 5) + if (__builtin_mul_overflow(l,10,&n) || __builtin_add_overflow(n,c,&n)) + break; +#else /* we want to do: l=l*10+c * but we need to check for integer overflow. * to check whether l*10 overflows, we could do @@ -27,6 +31,7 @@ size_t scan_int(const char* src,int* dest) { n+=(unsigned int)l<<1; if (n+c < n) break; n+=c; +#endif if (n > maxint+neg) break; l=(int)n; ++tmp; @@ -36,3 +41,20 @@ size_t scan_int(const char* src,int* dest) { *dest=(neg?-l:l); return (size_t)(tmp-src); } + +#ifdef UNITTEST +#include + +int main() { + int i; + assert(scan_int("1234",&i)==4 && i==1234); + assert(scan_int("-1234",&i)==5 && i==-1234); + assert(scan_int("+1234",&i)==5 && i==1234); + assert(scan_int("4294967295",&i)==9 && i==429496729); // overflow + assert(scan_int("2147483647",&i)==10 && i==2147483647); // MAX_INT + assert(scan_int("2147483648",&i)==9 && i==214748364); // overflow + assert(scan_int("-2147483648",&i)==11 && i==-2147483648); // MIN_INT + assert(scan_int("-2147483649",&i)==10 && i==-214748364); // underflow + return 0; +} +#endif diff --git a/scan/scan_long.c b/scan/scan_long.c index ae610d9..308721d 100644 --- a/scan/scan_long.c +++ b/scan/scan_long.c @@ -1,5 +1,33 @@ #include "scan.h" +#ifdef UNITTEST +#undef UNITTEST +#include "scan_longn.c" +#define UNITTEST +#endif + size_t scan_long(const char *src,long *dest) { return scan_longn(src,(size_t)(-1),dest); } + +#ifdef UNITTEST +#include + +int main() { + long i; + assert(scan_long("1234",&i)==4 && i==1234); + assert(scan_long("-1234",&i)==5 && i==-1234); + assert(scan_long("+1234",&i)==5 && i==1234); + assert(scan_long("2147483647",&i)==10 && i==2147483647); // INT_MAX + assert(scan_long("-2147483648",&i)==11 && i==-2147483648); // INT_MIN + if (sizeof(int) == sizeof(long)) { + assert(scan_long("4294967295",&i)==9 && i==429496729); // overflow + assert(scan_long("2147483648",&i)==9 && i==214748364); // overflow + assert(scan_long("-2147483649",&i)==10 && i==-214748364); // underflow + } else { + assert(scan_long("9223372036854775807",&i)==19 && i==0x7fffffffffffffffull); // LONG_MAX + assert(scan_long("-9223372036854775808",&i)==20 && i==(long long)0x8000000000000000ull); // LONG_MIN + } + return 0; +} +#endif diff --git a/scan/scan_longlong.c b/scan/scan_longlong.c index 751733c..3d0934c 100644 --- a/scan/scan_longlong.c +++ b/scan/scan_longlong.c @@ -2,6 +2,12 @@ static const unsigned long maxlong = ((unsigned long)-1)>>1; +#ifdef UNITTEST +#undef UNITTEST +#include "scan_ulonglong.c" +#define UNITTEST +#endif + size_t scan_longlong(const char* src,signed long long* dest) { size_t i,o; unsigned long long l; @@ -17,3 +23,19 @@ size_t scan_longlong(const char* src,signed long long* dest) { return i+o; } return 0; } + +#ifdef UNITTEST +#include + +int main() { + long long i; + assert(scan_longlong("1234",&i)==4 && i==1234); + assert(scan_longlong("-1234",&i)==5 && i==-1234); + assert(scan_longlong("+1234",&i)==5 && i==1234); + assert(scan_longlong("2147483647",&i)==10 && i==2147483647); // INT_MAX + assert(scan_longlong("-2147483648",&i)==11 && i==-2147483648); // INT_MIN + assert(scan_longlong("9223372036854775807",&i)==19 && i==0x7fffffffffffffffull); // LONG_MAX + assert(scan_longlong("-9223372036854775808",&i)==20 && i==(long long)0x8000000000000000ull); // LONG_MIN + return 0; +} +#endif diff --git a/scan/scan_longn.c b/scan/scan_longn.c index 5543e69..f09ac40 100644 --- a/scan/scan_longn.c +++ b/scan/scan_longn.c @@ -16,6 +16,10 @@ size_t scan_longn(const char *src,size_t n,long *dest) { } while (n-->0 && (c=(unsigned char)(*tmp-'0'))<10) { unsigned long int n; +#if defined(__GNUC__) && (__GNUC__ >= 5) + if (__builtin_mul_overflow(l,10,&n) || __builtin_add_overflow(n,c,&n)) + break; +#else /* we want to do: l=l*10+c * but we need to check for integer overflow. * to check whether l*10 overflows, we could do @@ -28,6 +32,7 @@ size_t scan_longn(const char *src,size_t n,long *dest) { n+=(unsigned long)l<<1; if (n+c < n) break; n+=c; +#endif if (n > maxlong+neg) break; l=(long)n; ++tmp; diff --git a/scan/scan_netstring.c b/scan/scan_netstring.c index d7d3e26..8ef57f4 100644 --- a/scan/scan_netstring.c +++ b/scan/scan_netstring.c @@ -23,3 +23,20 @@ size_t scan_netstring(const char* in,size_t len,char** dest,size_t* slen) { *slen=l; return n+2+l; } + +#ifdef UNITTEST +#include +#include "scan_ulongn.c" + +int main() { + char buf[]="12:hello world!,"; + char* s; + size_t l; + const char* orig; + assert(scan_netstring(buf,16,&s,&l)==16 && s==buf+3 && l==12); + + orig="3:foo,"; assert(scan_netstring(orig,6,&s,&l)==6 && s==orig+2 && l==3); + orig="4294967295:foo,"; assert(scan_netstring(orig,15,&s,&l)==0); + orig="18446744073709551615:foo,"; assert(scan_netstring(orig,25,&s,&l)==0); +} +#endif diff --git a/scan/scan_short.c b/scan/scan_short.c index b35a0aa..ec9ecc4 100644 --- a/scan/scan_short.c +++ b/scan/scan_short.c @@ -15,6 +15,10 @@ size_t scan_short(const char* src,short* dest) { } while ((c=(unsigned char)(*tmp-'0'))<10) { unsigned short int n; +#if defined(__GNUC__) && (__GNUC__ >= 5) + if (__builtin_mul_overflow(l,10,&n) || __builtin_add_overflow(n,c,&n)) + break; +#else /* we want to do: l=l*10+c * but we need to check for integer overflow. * to check whether l*10 overflows, we could do @@ -27,6 +31,7 @@ size_t scan_short(const char* src,short* dest) { n=(unsigned short)(n+(l<<1)); if (n+c < n) break; n=(unsigned short)(n+c); +#endif if (n > maxshort+neg) break; l=(short)n; ++tmp; diff --git a/scan/scan_ulonglong.c b/scan/scan_ulonglong.c index 23cd8eb..3b03a80 100644 --- a/scan/scan_ulonglong.c +++ b/scan/scan_ulonglong.c @@ -6,14 +6,34 @@ size_t scan_ulonglong(const char *src,unsigned long long *dest) { register unsigned char c; while ((c=(unsigned char)(*tmp-'0'))<10) { unsigned long long n; +#if defined(__GNUC__) && (__GNUC__ >= 5) + if (__builtin_mul_overflow(l,10,&n) || __builtin_add_overflow(n,c,&n)) + break; + l=n; +#else /* division is very slow on most architectures */ n=l<<3; if ((n>>3)!=l) break; if (n+(l<<1) < n) break; n+=l<<1; if (n+c < n) break; l=n+c; +#endif ++tmp; } if (tmp-src) *dest=l; return (size_t)(tmp-src); } + +#ifdef UNITTEST +#include + +int main() { + unsigned long long l; + assert(scan_ulonglong("0",&l)==1 && l==0); + assert(scan_ulonglong("1",&l)==1 && l==1); + assert(scan_ulonglong("4294967295",&l) == 10 && l==4294967295ull); + assert(scan_ulonglong("18446744073709551615",&l) == 20 && l==18446744073709551615ull); + assert(scan_ulonglong("18446744073709551616",&l) == 19 && l==1844674407370955161ull); + return 0; +} +#endif diff --git a/scan/scan_ulongn.c b/scan/scan_ulongn.c index 1255b9a..ad45f65 100644 --- a/scan/scan_ulongn.c +++ b/scan/scan_ulongn.c @@ -8,6 +8,21 @@ size_t scan_ulongn(const char* src,size_t n,unsigned long int* dest) { /* Since the conditions can be computed at compile time, the compiler * should only emit code for one of the implementations, depending on * which architecture the code is compiled for. */ +#if defined(__GNUC__) && (__GNUC__ >= 5) + /* implementation for recent gcc or clang */ + int ok=0; + for (; n-->0 && (c=(unsigned char)(*tmp-'0'))<10; ++tmp) { + unsigned long v; + if (__builtin_mul_overflow(l,10,&v) || __builtin_add_overflow(v,c,&v)) + break; + l=v; + ok=1; + } + if (!ok) return 0; + + *dest=l; + return (size_t)(tmp-src); +#else #ifdef HAVE_UINT128 if (sizeof(unsigned long)==sizeof(unsigned long long) && sizeof(unsigned long) + +int main() { + unsigned long l; + assert(scan_ulongn("4294967295",10,&l) == 10 && l==4294967295ul); + if (sizeof(unsigned long)==4) { + assert(scan_ulongn("4294967296",10,&l) == 9 && l==429496729); + } else { + assert(scan_ulongn("18446744073709551615",20,&l) == 20 && l==18446744073709551615ull); + assert(scan_ulongn("18446744073709551616",20,&l) == 19 && l==1844674407370955161ull); + } + assert(scan_ulongn("1234",3,&l)==3 && l==123); + return 0; +} +#endif diff --git a/scan/scan_ushort.c b/scan/scan_ushort.c index fa8aaf8..eb78e6c 100644 --- a/scan/scan_ushort.c +++ b/scan/scan_ushort.c @@ -29,3 +29,17 @@ size_t scan_ushort(const char* src,unsigned short* dest) { (void)compileerror; } } + +#ifdef UNITTEST +#include + +int main() { + unsigned short i; + assert(scan_ushort("1234",&i)==4 && i==1234); + assert(scan_ushort("-1",&i)==0); + if (sizeof(short)==2) { + assert(scan_ushort("123456",&i)==5 && i==12345); + } + return 0; +} +#endif diff --git a/scan/scan_utf8.c b/scan/scan_utf8.c index f668e0b..3942841 100644 --- a/scan/scan_utf8.c +++ b/scan/scan_utf8.c @@ -50,3 +50,74 @@ size_t scan_utf8(const char* in,size_t len,uint32_t* num) { return (size_t)(in-orig); } +#ifdef UNITTEST +#include +#include "fmt/fmt_utf8.c" + +int main() { + char buf[100]; + uint32_t l; + unsigned int i; + /* first positive testing for the various lengths */ + l=fmt_utf8(buf,0); assert(l == 1 && scan_utf8(buf,l+1,&l)==1 && l==0); + l=fmt_utf8(buf,0x80); assert(l == 2 && scan_utf8(buf,l+1,&l)==2 && l==0x80); + l=fmt_utf8(buf,0x800); assert(l == 3 && scan_utf8(buf,l+1,&l)==3 && l==0x800); + l=fmt_utf8(buf,0x10000); assert(l == 4 && scan_utf8(buf,l+1,&l)==4 && l==0x10000); + l=fmt_utf8(buf,0x200000); assert(l == 5 && scan_utf8(buf,l+1,&l)==5 && l==0x200000); + l=fmt_utf8(buf,0x4000000); assert(l == 6 && scan_utf8(buf,l+1,&l)==6 && l==0x4000000); + /* corner cases */ + l=fmt_utf8(buf,0x7f); assert(l == 1 && scan_utf8(buf,l+1,&l)==1 && l==0x7f); + l=fmt_utf8(buf,0x7ff); assert(l == 2 && scan_utf8(buf,l+1,&l)==2 && l==0x7ff); + l=fmt_utf8(buf,0xffff); assert(l == 3 && scan_utf8(buf,l+1,&l)==3 && l==0xffff); + l=fmt_utf8(buf,0x1fffff); assert(l == 4 && scan_utf8(buf,l+1,&l)==4 && l==0x1fffff); + l=fmt_utf8(buf,0x3ffffff); assert(l == 5 && scan_utf8(buf,l+1,&l)==5 && l==0x3ffffff); + l=fmt_utf8(buf,0x7fffffff); assert(l == 6 && scan_utf8(buf,l+1,&l)==6 && l==0x7fffffff); + /* more corner cases */ + l=fmt_utf8(buf,0xd7ff); assert(l == 3 && scan_utf8(buf,l+1,&l)==3 && l==0xd7ff); + l=fmt_utf8(buf,0xe000); assert(l == 3 && scan_utf8(buf,l+1,&l)==3 && l==0xe000); + l=fmt_utf8(buf,0xfffd); assert(l == 3 && scan_utf8(buf,l+1,&l)==3 && l==0xfffd); + l=fmt_utf8(buf,0x10ffff); assert(l == 4 && scan_utf8(buf,l+1,&l)==4 && l==0x10ffff); + l=fmt_utf8(buf,0x110000); assert(l == 4 && scan_utf8(buf,l+1,&l)==4 && l==0x110000); + + /* now negative testing */ + /* start off with some continuation bytes outside a sequence */ + for (i=0x80; i<=0xbf; ++i) { + buf[0]=i; + assert(scan_utf8(buf,2,&l)==0); + } + + /* now check lonely sequence start characters */ + buf[1]=' '; + for (i=0xc0; i<=0xfd; ++i) { + buf[0]=i; + assert(scan_utf8(buf,2,&l)==0); + } + + /* FE and FF are reserved for UTF-16 endianness detection*/ + assert(scan_utf8("\xfe\xff",3,&l)==0); + assert(scan_utf8("\xff\xfe",3,&l)==0); + + /* now check some truncated sequences */ + l=fmt_utf8(buf,0); assert(l == 1 && scan_utf8(buf,l-1,&l)==0); + l=fmt_utf8(buf,0x80); assert(l == 2 && scan_utf8(buf,l-1,&l)==0); + l=fmt_utf8(buf,0x800); assert(l == 3 && scan_utf8(buf,l-1,&l)==0); + l=fmt_utf8(buf,0x10000); assert(l == 4 && scan_utf8(buf,l-1,&l)==0); + l=fmt_utf8(buf,0x200000); assert(l == 5 && scan_utf8(buf,l-1,&l)==0); + l=fmt_utf8(buf,0x4000000); assert(l == 6 && scan_utf8(buf,l-1,&l)==0); + + /* now truncate in another way */ + l=fmt_utf8(buf,0x80); buf[l-1]=' '; assert(l == 2 && scan_utf8(buf,l+1,&l)==0); + l=fmt_utf8(buf,0x800); buf[l-1]=' '; assert(l == 3 && scan_utf8(buf,l+1,&l)==0); + l=fmt_utf8(buf,0x10000); buf[l-1]=' '; assert(l == 4 && scan_utf8(buf,l+1,&l)==0); + l=fmt_utf8(buf,0x200000); buf[l-1]=' '; assert(l == 5 && scan_utf8(buf,l+1,&l)==0); + l=fmt_utf8(buf,0x4000000); buf[l-1]=' '; assert(l == 6 && scan_utf8(buf,l+1,&l)==0); + + /* now some not minimally encoded utf-8 sequences */ + assert(scan_utf8("\xc0\x80",3,&l)==0); + assert(scan_utf8("\xe0\x80\x80",4,&l)==0); + assert(scan_utf8("\xf0\x80\x80\x80",5,&l)==0); + assert(scan_utf8("\xf8\x80\x80\x80\x80",6,&l)==0); + assert(scan_utf8("\xfc\x80\x80\x80\x80\x80",7,&l)==0); + return 0; +} +#endif diff --git a/scan/scan_utf8_sem.c b/scan/scan_utf8_sem.c index 451166e..36fc368 100644 --- a/scan/scan_utf8_sem.c +++ b/scan/scan_utf8_sem.c @@ -9,3 +9,13 @@ size_t scan_utf8_sem(const char* in,size_t len,uint32_t* num) { } return r; } + +#ifdef UNITTEST +#include + +int main() { + /* rest of scan_utf8 tested in scan_utf8.c unit tests */ + assert(scan_utf8_sem("\xed\xa0\x80",4,&l)==0); + return 0; +} +#endif diff --git a/scan/scan_varint.c b/scan/scan_varint.c index fd15576..ca78bd7 100644 --- a/scan/scan_varint.c +++ b/scan/scan_varint.c @@ -5,8 +5,11 @@ size_t scan_varint(const char* in,size_t len, unsigned long long* n) { unsigned long long l; if (len==0) return 0; for (l=0, i=0; isizeof(l)*8) // value too large to fit in destination integer! + return 0; l+=(unsigned long long)(in[i]&0x7f) << (i*7); if (!(in[i]&0x80)) { + if (in[i]==0) return 0; // libowfat extension: reject non-minimal encoding *n=l; return i+1; } @@ -21,3 +24,16 @@ size_t scan_pb_type0_int(const char* dest,size_t len,unsigned long long* l) { return scan_varint(dest,len,l); } #endif + +#ifdef UNITTEST +#include + +int main() { + unsigned long long n; + assert(scan_varint("\x05",1,&n)==1 && n==5); + assert(scan_varint("\x80\x00",2,&n)==0); // non-minimal encoding + assert(scan_varint("\x80\x80",2,&n)==0); // truncated (last byte in sequence has high bit 0) + assert(scan_varint("\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01",10,&n)==0); // value too large + return 0; +} +#endif diff --git a/scan/scan_whitenskip.c b/scan/scan_whitenskip.c index 3297f0a..634ae85 100644 --- a/scan/scan_whitenskip.c +++ b/scan/scan_whitenskip.c @@ -7,3 +7,15 @@ size_t scan_whitenskip(const char *s,size_t limit) { while (t + +int main() { + assert(scan_whitenskip("foo",3)==0); + assert(scan_whitenskip(" foo",4)==1); + assert(scan_whitenskip(" foo",1)==1); + assert(scan_whitenskip(" foo",2)==2); + return 0; +} +#endif diff --git a/scan/scan_xint.c b/scan/scan_xint.c index 316c059..5a11f7e 100644 --- a/scan/scan_xint.c +++ b/scan/scan_xint.c @@ -1,5 +1,11 @@ #include "scan.h" +#ifdef UNITTEST +#undef UNITTEST +#include "scan_fromhex.c" +#define UNITTEST +#endif + size_t scan_xint(const char* src,unsigned int* dest) { register const char *tmp=src; register unsigned int l=0; @@ -11,3 +17,17 @@ size_t scan_xint(const char* src,unsigned int* dest) { *dest=l; return (size_t)(tmp-src); } + +#ifdef UNITTEST +#include + +int main() { + unsigned int i; + assert(scan_xint("fefe",&i)==4 && i==0xfefe); + assert(scan_xint("0xfefe",&i)==1); // 0x not supported, will scan the 0 + assert(scan_xint("+fefe",&i)==0); + assert(scan_xint("fefec0de",&i)==8 && i==0xfefec0de); + assert(scan_xint("fefec0debaad",&i)==8 && i==0xfefec0de); // test truncation + return 0; +} +#endif diff --git a/scan/scan_xlong.c b/scan/scan_xlong.c index b0e9e05..8cda884 100644 --- a/scan/scan_xlong.c +++ b/scan/scan_xlong.c @@ -3,3 +3,5 @@ size_t scan_xlong(const char *src,unsigned long *dest) { return scan_xlongn(src,(size_t)-1,dest); } + +/* unit tested via scan_xlongn */ diff --git a/scan/scan_xlongn.c b/scan/scan_xlongn.c index e29f574..181196e 100644 --- a/scan/scan_xlongn.c +++ b/scan/scan_xlongn.c @@ -1,5 +1,11 @@ #include "scan.h" +#ifdef UNITTEST +#undef UNITTEST +#include "scan_fromhex.c" +#define UNITTEST +#endif + size_t scan_xlongn(const char *src,size_t n,unsigned long *dest) { register const char *tmp=src; register unsigned long l=0; @@ -11,3 +17,22 @@ size_t scan_xlongn(const char *src,size_t n,unsigned long *dest) { *dest=l; return (size_t)(tmp-src); } + +#ifdef UNITTEST +#include + +int main() { + unsigned long i; + assert(scan_xlongn("fefe",4,&i)==4 && i==0xfefe); + assert(scan_xlongn("0xfefe",6,&i)==1); // 0x not supported, will scan the 0 + assert(scan_xlongn("+fefe",5,&i)==0); + assert(scan_xlongn("fefec0de",7,&i)==7 && i==0xfefec0d); + assert(scan_xlongn("fefec0de",8,&i)==8 && i==0xfefec0de); + assert(scan_xlongn("fefec0de",9,&i)==8 && i==0xfefec0de); + if (sizeof(i)==4) + assert(scan_xlongn("fefec0debaad",14,&i)==8 && i==0xfefec0de); // test truncation + else if (sizeof(i)==8) + assert(scan_xlongn("fefec0debaadc0debl06",40,&i)==16 && i==0xfefec0debaadc0de); // test truncation + return 0; +} +#endif diff --git a/scan/scan_xshort.c b/scan/scan_xshort.c index 53b8f95..f7659da 100644 --- a/scan/scan_xshort.c +++ b/scan/scan_xshort.c @@ -1,5 +1,11 @@ #include "scan.h" +#ifdef UNITTEST +#undef UNITTEST +#include "scan_fromhex.c" +#define UNITTEST +#endif + size_t scan_xshort(const char* src,unsigned short* dest) { register const char *tmp=src; register unsigned short l=0; @@ -11,3 +17,16 @@ size_t scan_xshort(const char* src,unsigned short* dest) { *dest=l; return (size_t)(tmp-src); } + +#ifdef UNITTEST +#include + +int main() { + unsigned short i; + assert(scan_xshort("fefe",&i)==4 && i==0xfefe); + assert(scan_xshort("0xfefe",&i)==1); // 0x not supported, will scan the 0 + assert(scan_xshort("+fefe",&i)==0); + assert(scan_xshort("fefec0de",&i)==4 && i==0xfefe); // test truncation + return 0; +} +#endif diff --git a/textcode/scan_base64.c b/textcode/scan_base64.c index 8affddd..6fdc5c7 100644 --- a/textcode/scan_base64.c +++ b/textcode/scan_base64.c @@ -50,5 +50,6 @@ int main() { memset(buf,0,10); assert(scan_base64("Zm5vcmQ=",buf,&l)==8 && l==5 && !memcmp(buf,"fnord",6)); memset(buf,0,10); assert(scan_base64("Zm5vcmQ",buf,&l)==7 && l==5 && !memcmp(buf,"fnord",6)); memset(buf,0,10); assert(scan_base64("//8=",buf,&l)==4 && l==2 && !memcmp(buf,"\xff\xff",3)); + return 0; } #endif diff --git a/textcode/scan_jsonescape.c b/textcode/scan_jsonescape.c index ae3bc61..786f2c0 100644 --- a/textcode/scan_jsonescape.c +++ b/textcode/scan_jsonescape.c @@ -101,5 +101,6 @@ int main() { assert(scan_jsonescape("a\\ud834\\udd1eb",buf,&l)==14 && l==6 && !memcmp(buf,"a\xf0\x9d\x84\x9e""b",6)); /* how about some incorrect UTF-8? */ assert(scan_jsonescape("a\xc0\xaf",buf,&l)==1 && l==1 && !memcmp(buf,"a",1)); + return 0; } #endif diff --git a/textcode/scan_uuencoded.c b/textcode/scan_uuencoded.c index af6f2f0..703ec93 100644 --- a/textcode/scan_uuencoded.c +++ b/textcode/scan_uuencoded.c @@ -36,6 +36,7 @@ int main() { assert(scan_uuencoded("&9FYO