add man page and unit tests for scan_base64url
This commit is contained in:
parent
8d449d442b
commit
c8156a9841
@ -16,10 +16,6 @@ Note that real world base64 encoded data is sometimes permitted to
|
|||||||
contain whitespace characters or new lines. This function will not allow
|
contain whitespace characters or new lines. This function will not allow
|
||||||
those and return the decoded data until then.
|
those and return the decoded data until then.
|
||||||
|
|
||||||
base64 works by taking 3 bytes of binary input and converting them into
|
|
||||||
4 bytes of printable ASCII. If the input ends in the middle of a base64
|
|
||||||
4-byte-tuple, scan_base64 will disregard the whole tuple.
|
|
||||||
|
|
||||||
Many base64 variants demand padding in the last block. Some don't. This
|
Many base64 variants demand padding in the last block. Some don't. This
|
||||||
implementation will consume padding if it is there, but will not
|
implementation will consume padding if it is there, but will not
|
||||||
complain if it is not.
|
complain if it is not.
|
||||||
@ -30,7 +26,7 @@ dest can be NULL. destlen can be NULL.
|
|||||||
scan_base64 returns the number of bytes successfully scanned and
|
scan_base64 returns the number of bytes successfully scanned and
|
||||||
processed from src.
|
processed from src.
|
||||||
.SH EXAMPLES
|
.SH EXAMPLES
|
||||||
scan_base64("%9FYO<F0`",buf,&i) -> return 8, i=5, buf="fnord"
|
scan_base64("Zm5vcmQ=",buf,&i) -> return 8, i=5, buf="fnord"
|
||||||
|
|
||||||
.SH "SEE ALSO"
|
.SH "SEE ALSO"
|
||||||
scan_xlong(3), scan_8long(3), fmt_ulong(3)
|
scan_base64url(3), scan_xlong(3), scan_8long(3), fmt_ulong(3)
|
||||||
|
@ -48,8 +48,10 @@ int main() {
|
|||||||
char buf[100];
|
char buf[100];
|
||||||
size_t i,l;
|
size_t i,l;
|
||||||
memset(buf,0,10); assert(scan_base64("Zm5vcmQ=",buf,&l)==8 && l==5 && !memcmp(buf,"fnord",6));
|
memset(buf,0,10); assert(scan_base64("Zm5vcmQ=",buf,&l)==8 && l==5 && !memcmp(buf,"fnord",6));
|
||||||
|
/* check that we don't insist on the padding */
|
||||||
memset(buf,0,10); assert(scan_base64("Zm5vcmQ",buf,&l)==7 && l==5 && !memcmp(buf,"fnord",6));
|
memset(buf,0,10); assert(scan_base64("Zm5vcmQ",buf,&l)==7 && l==5 && !memcmp(buf,"fnord",6));
|
||||||
memset(buf,0,10); assert(scan_base64("//8=",buf,&l)==4 && l==2 && !memcmp(buf,"\xff\xff",3));
|
/* check the special non-isalnum chars :) */
|
||||||
|
memset(buf,0,10); assert(scan_base64("/+8=",buf,&l)==4 && l==2 && !memcmp(buf,"\xff\xef",3));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
32
textcode/scan_base64url.3
Normal file
32
textcode/scan_base64url.3
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
.TH scan_base64url 3
|
||||||
|
.SH NAME
|
||||||
|
scan_base64url \- decode base64url encoded data
|
||||||
|
.SH SYNTAX
|
||||||
|
.B #include <libowfat/textcode.h>
|
||||||
|
|
||||||
|
size_t \fBscan_base64url\fP(const char *\fIsrc\fR,char *\fIdest\fR,size_t* \fIdestlen\fR);
|
||||||
|
|
||||||
|
.SH DESCRIPTION
|
||||||
|
base64url is a variant of base64 for use in URLs (standard base64 uses /
|
||||||
|
and +, which can cause problems in URLs, so base64url uses - and _
|
||||||
|
instead; also base64url does not use = padding at the end).
|
||||||
|
|
||||||
|
scan_base64url decodes base64url encoded data from src into dest.
|
||||||
|
It will stop when it encountes any non-valid input characters.
|
||||||
|
It will then write the number of decoded bytes in dest into *destlen,
|
||||||
|
and return the number of bytes decoded from src.
|
||||||
|
|
||||||
|
Many base64 variants demand padding in the last block. Some don't. This
|
||||||
|
implementation will consume padding if it is there, but will not
|
||||||
|
complain if it is not.
|
||||||
|
|
||||||
|
dest can be NULL. destlen can be NULL.
|
||||||
|
|
||||||
|
.SH "RETURN VALUE"
|
||||||
|
scan_base64url returns the number of bytes successfully scanned and
|
||||||
|
processed from src.
|
||||||
|
.SH EXAMPLES
|
||||||
|
scan_base64url("Zm5vcmQ",buf,&i) -> return 7, i=5, buf="fnord"
|
||||||
|
|
||||||
|
.SH "SEE ALSO"
|
||||||
|
scan_base64(3), scan_xlong(3), scan_8long(3), fmt_ulong(3)
|
@ -15,17 +15,36 @@ static inline int dec(unsigned char x) {
|
|||||||
size_t scan_base64url(const char *src,char *dest,size_t *destlen) {
|
size_t scan_base64url(const char *src,char *dest,size_t *destlen) {
|
||||||
unsigned short tmp=0,bits=0;
|
unsigned short tmp=0,bits=0;
|
||||||
register const unsigned char* s=(const unsigned char*) src;
|
register const unsigned char* s=(const unsigned char*) src;
|
||||||
const char* orig=dest;
|
size_t i,j=0;
|
||||||
for (;;) {
|
for (i=0;;) {
|
||||||
int a=dec(*s);
|
int a=dec(*s);
|
||||||
if (a<0) break;
|
if (a<0) break; /* base64url does not have padding */
|
||||||
tmp=(tmp<<6)|a; bits+=6;
|
tmp=(tmp<<6)|a; bits+=6;
|
||||||
++s;
|
++s;
|
||||||
if (bits>=8) {
|
if (bits>=8) {
|
||||||
*dest=(tmp>>(bits-=8));
|
bits-=8;
|
||||||
++dest;
|
if (dest) dest[i]=(tmp>>bits);
|
||||||
|
++i;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*destlen=dest-orig;
|
if (destlen) *destlen=i;
|
||||||
return (const char*)s-src;
|
return (const char*)s-src;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef UNITTEST
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
char buf[100];
|
||||||
|
size_t i,l;
|
||||||
|
/* check that we don't consume padding */
|
||||||
|
memset(buf,0,10); assert(scan_base64url("Zm5vcmQ=",buf,&l)==7 && l==5 && !memcmp(buf,"fnord",6));
|
||||||
|
/* check that we don't insist on the padding */
|
||||||
|
memset(buf,0,10); assert(scan_base64url("Zm5vcmQ",buf,&l)==7 && l==5 && !memcmp(buf,"fnord",6));
|
||||||
|
/* check the special non-isalnum chars :) */
|
||||||
|
memset(buf,0,10); assert(scan_base64url("_-8=",buf,&l)==3 && l==2 && !memcmp(buf,"\xff\xef",3));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@ -26,8 +26,8 @@ static const char* lookup(size_t ofs,const char* t) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
enum htmlmode { /* <a href="http://example.com/"foo">libowfat<home</a> */
|
enum htmlmode { /* <a href="http://example.com/"foo">libowfat<home</a> */
|
||||||
OUTSIDE, /* ^^^^^^^^^^^^^^^^ -> `libowfat<home` */
|
OUTSIDE, /* ^^^^^^^^^^^^^^^^ -> libowfat<home */
|
||||||
TAGARG, /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -> `http://example.com/"foo´ */
|
TAGARG, /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -> http://example.com/"foo */
|
||||||
};
|
};
|
||||||
|
|
||||||
static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,enum htmlmode mode) {
|
static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,enum htmlmode mode) {
|
||||||
@ -42,49 +42,41 @@ static size_t scan_html_inner(const char *src,char *dest,size_t *destlen,enum ht
|
|||||||
size_t j;
|
size_t j;
|
||||||
if ((s[i+2]&~32)=='X') {
|
if ((s[i+2]&~32)=='X') {
|
||||||
j=scan_xlong(src+i+3,&l);
|
j=scan_xlong(src+i+3,&l);
|
||||||
if (!j) j+=3;
|
if (j) j+=3;
|
||||||
} else {
|
} else {
|
||||||
j=scan_ulong(src+i+2,&l);
|
j=scan_ulong(src+i+2,&l);
|
||||||
if (!j) j+=3;
|
if (j) j+=2;
|
||||||
}
|
}
|
||||||
if (s[i+j]==';') {
|
if (s[i+j]==';') {
|
||||||
i+=j;
|
i+=j;
|
||||||
written+=fmt_utf8(dest+written,l);
|
written+=fmt_utf8(dest?dest+written:0,l);
|
||||||
} else {
|
} else {
|
||||||
dest[written++]='&';
|
if (dest) dest[written]='&';
|
||||||
|
++written;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
utf8=lookup(1,src+i+1);
|
utf8=lookup(1,src+i+1);
|
||||||
if (utf8) {
|
if (utf8) {
|
||||||
size_t l=strlen(utf8);
|
size_t l=strlen(utf8);
|
||||||
memcpy(dest+written,utf8,l);
|
if (dest) memcpy(dest+written,utf8,l);
|
||||||
written+=l;
|
written+=l;
|
||||||
i+=2+str_chr(src+i+2,';');
|
i+=2+str_chr(src+i+2,';');
|
||||||
continue;
|
continue;
|
||||||
} else
|
} else
|
||||||
dest[written]='&';
|
if (dest) dest[written]='&';
|
||||||
} else if (s[i]=='<') {
|
} else if (s[i]=='<') {
|
||||||
if (mode == OUTSIDE) break;
|
break;
|
||||||
if (case_starts((const char*)s+i+1,"br>")) {
|
|
||||||
dest[written]='\n';
|
|
||||||
i+=3;
|
|
||||||
} else if (case_starts((const char*)s+i+1,"p>")) {
|
|
||||||
dest[written]='\n'; ++written;
|
|
||||||
dest[written]='\n';
|
|
||||||
i+=3;
|
|
||||||
} else
|
|
||||||
dest[written]=s[i];
|
|
||||||
} else if (s[i]=='"' && mode==TAGARG) {
|
} else if (s[i]=='"' && mode==TAGARG) {
|
||||||
if (i==0) { dq=1; continue; }
|
if (i==0) { dq=1; continue; }
|
||||||
break;
|
break;
|
||||||
} else if (mode==TAGARG && !dq && (s[i]==' ' || s[i]=='\t' || s[i]=='\n'))
|
} else if (mode==TAGARG && !dq && (s[i]==' ' || s[i]=='\t' || s[i]=='\n'))
|
||||||
break;
|
break;
|
||||||
else
|
else
|
||||||
dest[written]=s[i];
|
if (dest) dest[written]=s[i];
|
||||||
++written;
|
++written;
|
||||||
}
|
}
|
||||||
*destlen=written;
|
if (destlen) *destlen=written;
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,13 +90,40 @@ size_t scan_html(const char *src,char *dest,size_t *destlen) {
|
|||||||
|
|
||||||
#ifdef UNITTEST
|
#ifdef UNITTEST
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#undef UNITTEST
|
||||||
|
#include <scan/scan_fromhex.c>
|
||||||
|
#include <scan/scan_xlongn.c>
|
||||||
|
#include <scan/scan_xlong.c>
|
||||||
|
#include <scan/scan_ulongn.c>
|
||||||
|
#include <scan/scan_ulong.c>
|
||||||
|
#include <str/str_chr.c>
|
||||||
|
#include <fmt/fmt_utf8.c>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
char* html="<a href=\"http://example.com/"foo\">libowfat<home</a>";
|
char* html="<a href=\"http://example.com/"foo\">libowfat<home</a>";
|
||||||
char buf[100];
|
char buf[100];
|
||||||
size_t destlen;
|
size_t destlen;
|
||||||
|
/* check that we stop at < */
|
||||||
assert(scan_html(html,buf,&destlen)==0 && destlen==0);
|
assert(scan_html(html,buf,&destlen)==0 && destlen==0);
|
||||||
assert(scan_html(strchr(html,'>')+1,buf,&destlen)==16 && destlen==13 && !memcmp(buf,"libowfat<home",13));
|
/* check that we properly decode < */
|
||||||
assert(scan_html_tagarg(strchr(html,'"')+1,buf,&destlen)==28 && destlen==23 && !memcmp(buf,"http://example.com/\"foo",23));
|
memset(buf,'?',sizeof(buf));
|
||||||
|
assert(scan_html(strchr(html,'>')+1,buf,&destlen)==16 && destlen==13 && !memcmp(buf,"libowfat<home?",14));
|
||||||
|
/* check that we stop at " and properly decode " */
|
||||||
|
memset(buf,'?',sizeof(buf));
|
||||||
|
assert(scan_html_tagarg(strchr(html,'"')+1,buf,&destlen)==28 && destlen==23 && !memcmp(buf,"http://example.com/\"foo?",24));
|
||||||
|
/* check that we pass through invalid escapes */
|
||||||
|
memset(buf,'?',sizeof(buf));
|
||||||
|
assert(scan_html("&fnord;",buf,&destlen)==7 && destlen==7 && !memcmp(buf,"&fnord;?",8));
|
||||||
|
memset(buf,'?',sizeof(buf));
|
||||||
|
assert(scan_html("&#x;",buf,&destlen)==4 && destlen==4 && !memcmp(buf,"&#x;?",5));
|
||||||
|
memset(buf,'?',sizeof(buf));
|
||||||
|
assert(scan_html("&#;",buf,&destlen)==3 && destlen==3 && !memcmp(buf,"&#;?",4));
|
||||||
|
/* check that &#x[hex]; is decoded properly */
|
||||||
|
memset(buf,'?',sizeof(buf));
|
||||||
|
assert(scan_html("",buf,&destlen)==5 && destlen==1 && buf[0]==1 && buf[1]=='?');
|
||||||
|
/* check that &#[decimal]; is decoded properly */
|
||||||
|
memset(buf,'?',sizeof(buf));
|
||||||
|
assert(scan_html("",buf,&destlen)==4 && destlen==1 && buf[0]==1 && buf[1]=='?');
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user