Add documentation to our uri scanner

dynamic-accesslists
erdgeist 17 years ago
parent ac078bccf2
commit 0cfd1e575d

@ -14,6 +14,16 @@
relax = "+" | "," | "/" | ";" | "<" | ">" | ":" relax = "+" | "," | "/" | ";" | "<" | ">" | ":"
*/ */
/* This matrix holds for each ascii character the information,
whether it is a non-terminating character for on of the three
scan states we are in, that is 'path', 'param' and 'value' from
/path?param=value&param=value, it is encoded in bit 0, 1 and 2
respectively
The top bit of lower nibble indicates, whether this character is
a hard terminator, ie. \0, \n or \s, where the whole scanning
process should terminate
*/
static const unsigned char is_unreserved[256] = { static const unsigned char is_unreserved[256] = {
8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 8,0,0,0,0,0,0,0,0,0,8,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6, 0,7,8,8,8,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,4,7,6,
@ -25,6 +35,7 @@ static const unsigned char is_unreserved[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
}; };
/* Do a fast nibble to hex representation conversion */
static unsigned char fromhex(unsigned char x) { static unsigned char fromhex(unsigned char x) {
x-='0'; if( x<=9) return x; x-='0'; if( x<=9) return x;
x&=~0x20; x-='A'-'0'; x&=~0x20; x-='A'-'0';
@ -32,12 +43,19 @@ static unsigned char fromhex(unsigned char x) {
return 0xff; return 0xff;
} }
/* Skip the value of a param=value pair */
void scan_urlencoded_skipvalue( char **string ) { void scan_urlencoded_skipvalue( char **string ) {
const unsigned char* s=*(const unsigned char**) string; const unsigned char* s=*(const unsigned char**) string;
unsigned char f; unsigned char f;
/* Since we are asked to skip the 'value', we assume to stop at
terminators for a 'value' string position */
while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE ); while( ( f = is_unreserved[ *s++ ] ) & SCAN_SEARCHPATH_VALUE );
/* If we stopped at a hard terminator like \0 or \n, make the
next scan_urlencoded_query encounter it again */
if( f & SCAN_SEARCHPATH_TERMINATOR ) --s; if( f & SCAN_SEARCHPATH_TERMINATOR ) --s;
*string = (char*)s; *string = (char*)s;
} }
@ -46,21 +64,35 @@ ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_F
unsigned char *d = (unsigned char*)deststring; unsigned char *d = (unsigned char*)deststring;
unsigned char b, c, f; unsigned char b, c, f;
/* This is the main decoding loop.
'flag' determines, which characters are non-terminating in current context
(ie. stop at '=' and '&' if scanning for a 'param'; stop at '?' if scanning for the path )
*/
while( ( f = is_unreserved[ c = *s++ ] ) & flags ) { while( ( f = is_unreserved[ c = *s++ ] ) & flags ) {
/* When encountering an url escaped character, try to decode */
if( c=='%') { if( c=='%') {
if( ( b = fromhex(*s++) ) == 0xff ) return -1; if( ( b = fromhex(*s++) ) == 0xff ) return -1;
if( ( c = fromhex(*s++) ) == 0xff ) return -1; if( ( c = fromhex(*s++) ) == 0xff ) return -1;
c|=(b<<4); c|=(b<<4);
} }
/* Write (possibly decoded) character to output */
*d++ = c; *d++ = c;
} }
switch( c ) { switch( c ) {
case 0: case '\r': case '\n': case ' ': case 0: case '\r': case '\n': case ' ':
/* If we started scanning on a hard terminator, indicate we've finished */
if( d == (unsigned char*)deststring ) return -2; if( d == (unsigned char*)deststring ) return -2;
/* Else make the next call to scan_urlencoded_param encounter it again */
--s; --s;
break; break;
case '?': case '?':
/* XXX to help us parse path?param=value?param=value?... sent by µTorrent 1600
do not return an error but silently terminate
if( flags != SCAN_PATH ) return -1; */
break; break;
case '=': case '=':
if( flags != SCAN_SEARCHPATH_PARAM ) return -1; if( flags != SCAN_SEARCHPATH_PARAM ) return -1;

@ -11,7 +11,8 @@ typedef enum {
SCAN_SEARCHPATH_TERMINATOR = 8 SCAN_SEARCHPATH_TERMINATOR = 8
} SCAN_SEARCHPATH_FLAG; } SCAN_SEARCHPATH_FLAG;
/* string pointer to source, pointer to next scan position on return /* string in: pointer to source
out: pointer to next scan position
deststring pointer to destination deststring pointer to destination
flags determines, what to parse flags determines, what to parse
returns number of valid converted characters in deststring returns number of valid converted characters in deststring
@ -19,7 +20,8 @@ typedef enum {
*/ */
ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags); ssize_t scan_urlencoded_query(char **string, char *deststring, SCAN_SEARCHPATH_FLAG flags);
/* string pointer to source, pointer to next scan position on return /* string in: pointer to value of a param=value pair to skip
out: pointer to next scan position on return
*/ */
void scan_urlencoded_skipvalue( char **string ); void scan_urlencoded_skipvalue( char **string );

Loading…
Cancel
Save