diff --git a/textcode/scan_base64url.c b/textcode/scan_base64url.c index 93a1ea7..8989d68 100644 --- a/textcode/scan_base64url.c +++ b/textcode/scan_base64url.c @@ -15,7 +15,7 @@ static inline int dec(unsigned char x) { size_t scan_base64url(const char *src,char *dest,size_t *destlen) { unsigned short tmp=0,bits=0; register const unsigned char* s=(const unsigned char*) src; - size_t i,j=0; + size_t i; for (i=0;;) { int a=dec(*s); if (a<0) break; /* base64url does not have padding */ diff --git a/textcode/scan_hexdump.3 b/textcode/scan_hexdump.3 index 98559fc..73708ac 100644 --- a/textcode/scan_hexdump.3 +++ b/textcode/scan_hexdump.3 @@ -8,7 +8,7 @@ size_t \fBscan_hexdump\fP(const char *\fIsrc\fR,char *\fIdest\fR,size_t* \fIdest .SH DESCRIPTION scan_hexdump decodes hexdump data from src into dest. -It will stop when it encounters any non-valid input characters. +It will stop when it encounters any invalid input characters. It will then write the number of decoded bytes in dest into *destlen, and return the number of bytes decoded from src. diff --git a/textcode/scan_urlencoded.3 b/textcode/scan_urlencoded.3 new file mode 100644 index 0000000..9af95a3 --- /dev/null +++ b/textcode/scan_urlencoded.3 @@ -0,0 +1,35 @@ +.TH scan_urlencoded 3 +.SH NAME +scan_urlencoded \- decode urlencoded data +.SH SYNTAX +.B #include + +size_t \fBscan_urlencoded\fP(const char *\fIsrc\fR,char *\fIdest\fR,size_t* \fIdestlen\fR); + +.SH DESCRIPTION +scan_urlencoded decodes urlencoded data from src into dest. +Decoding stops when it encounters any invalid input characters (including ' ', '?' and '&'). + +It will then write the number of decoded bytes in dest into *destlen, +and return the number of bytes decoded from src. + +Note that the plus character is handled differently depending on whether +we are encoding the query part or the path (application/x-www-form-urlencoded). +In "http://example.com/a+b?c+d", only the second plus can be decoded +as a space character. If you want + decoded to space, use +scan_urlencoded2. + +dest can be NULL. destlen can be NULL. + +.SH "RETURN VALUE" +scan_urlencoded returns the number of bytes successfully scanned and +processed from src. +.SH EXAMPLES +scan_urlencoded("foo%3ebar",buf,&i) -> return 9, i=7, buf="foo.bar" + +scan_urlencoded("foo+bar",buf,&i) -> return 7, i=7, buf="foo+bar" + +scan_urlencoded2("foo+bar",buf,&i) -> return 7, i=7, buf="foo bar" + +.SH "SEE ALSO" +scan_urlencoded2(3), scan_xlong(3), scan_8long(3), fmt_ulong(3) diff --git a/textcode/scan_urlencoded.c b/textcode/scan_urlencoded.c index 4b953e9..10432bf 100644 --- a/textcode/scan_urlencoded.c +++ b/textcode/scan_urlencoded.c @@ -8,14 +8,17 @@ static size_t inner_scan_urlencoded(const char *src,char *dest,size_t *destlen,i for (i=0; s[i]; ++i) { if (s[i]=='%') { int j=scan_fromhex(s[i+1]); + unsigned char c; if (j<0) break; - dest[written]=j<<4; + c=j<<4; j=scan_fromhex(s[i+2]); if (j<0) break; - dest[written]|=j; + dest[written]=c|j; i+=2; } else if (s[i]=='+' && plus) dest[written]=' '; + else if (s[i]<=' ' || s[i]=='?' || s[i]=='&') + break; /* invalid input */ else dest[written]=s[i]; ++written; @@ -31,3 +34,27 @@ size_t scan_urlencoded(const char *src,char *dest,size_t *destlen) { size_t scan_urlencoded2(const char *src,char *dest,size_t *destlen) { return inner_scan_urlencoded(src,dest,destlen,0); } + +#ifdef UNITTEST +#include +#include +#undef UNITTEST +#include + +int main() { + char buf[100]; + size_t l; + /* check base operation */ + memset(buf,'?',sizeof(buf)); + assert(scan_urlencoded("foo%2ebar",buf,&l)==9 && l==7 && !memcmp(buf,"foo.bar?",8)); + /* check + handling */ + memset(buf,'?',sizeof(buf)); + assert(scan_urlencoded("foo+bar",buf,&l)==7 && l==7 && !memcmp(buf,"foo bar?",8)); + assert(scan_urlencoded2("foo+bar",buf,&l)==7 && l==7 && !memcmp(buf,"foo+bar?",8)); + /* check that we abort on invalid sequences */ + memset(buf,'?',sizeof(buf)); + assert(scan_urlencoded("foo%2xbar",buf,&l)==3 && l==3 && !memcmp(buf,"foo?",4)); + assert(scan_urlencoded("foo\nbar",buf,&l)==3 && l==3 && !memcmp(buf,"foo?",4)); + assert(scan_urlencoded("foo bar",buf,&l)==3 && l==3 && !memcmp(buf,"foo?",4)); +} +#endif diff --git a/textcode/scan_urlencoded2.3 b/textcode/scan_urlencoded2.3 new file mode 100644 index 0000000..c50685b --- /dev/null +++ b/textcode/scan_urlencoded2.3 @@ -0,0 +1,27 @@ +.TH scan_urlencoded2 3 +.SH NAME +scan_urlencoded2 \- decode urlencoded data +.SH SYNTAX +.B #include + +size_t \fBscan_urlencoded2\fP(const char *\fIsrc\fR,char *\fIdest\fR,size_t* \fIdestlen\fR); + +.SH DESCRIPTION +scan_urlencoded2 behaves like scan_urlencoded, but decodes '+' to ' '. + +See scan_urlencoded(3) for details. + +dest can be NULL. destlen can be NULL. + +.SH "RETURN VALUE" +scan_urlencoded2 returns the number of bytes successfully scanned and +processed from src. +.SH EXAMPLES +scan_urlencoded2("foo%20bar",buf,&i) -> return 9, i=7, buf="foo bar" + +scan_urlencoded2("foo+bar",buf,&i) -> return 7, i=7, buf="foo bar" + +scan_urlencoded("foo+bar",buf,&i) -> return 7, i=7, buf="foo+bar" + +.SH "SEE ALSO" +scan_urlencoded(3), scan_xlong(3), scan_8long(3), fmt_ulong(3)