bring scan_cescape up to speed
This commit is contained in:
parent
8526ae3d0d
commit
ac2df2bf20
35
textcode/scan_cescape.3
Normal file
35
textcode/scan_cescape.3
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
.TH scan_cescape 3
|
||||||
|
.SH NAME
|
||||||
|
scan_cescape \- parse C escaped string
|
||||||
|
.SH SYNTAX
|
||||||
|
.B #include <libowfat/textcode.h>
|
||||||
|
|
||||||
|
size_t \fBscan_cescape\fP(const char *\fIsrc\fR,char *\fIdest\fR,size_t* \fIdestlen\fR);
|
||||||
|
|
||||||
|
.SH DESCRIPTION
|
||||||
|
scan_cescape parses C escaped text from src into dest.
|
||||||
|
Parsing stops at the 0 terminator, invalid input characters or a double
|
||||||
|
quote that was not escaped.
|
||||||
|
|
||||||
|
C escape sequences like \\n and \\x0a and \\012 are translated into
|
||||||
|
their binary counterparts. The C99 escape sequences \\u and \\U are
|
||||||
|
supported and lead to UTF-8 sequences being output.
|
||||||
|
|
||||||
|
scan_cescape will then write the number of bytes in dest into *destlen,
|
||||||
|
and return the number of bytes decoded from src.
|
||||||
|
|
||||||
|
dest can be NULL. destlen can be NULL.
|
||||||
|
|
||||||
|
To make sure dest is large enough, either allocate strlen(src)+1 bytes
|
||||||
|
or call scan_cescape twice, the first time with dest == NULL (*destlen
|
||||||
|
will still be written).
|
||||||
|
|
||||||
|
.SH "RETURN VALUE"
|
||||||
|
scan_cescape returns the number of bytes successfully parsed
|
||||||
|
from src.
|
||||||
|
|
||||||
|
.SH EXAMPLES
|
||||||
|
scan_cescape("test\\n\");",buf,&i) -> return 7, i=5, buf="test\n".
|
||||||
|
|
||||||
|
.SH "SEE ALSO"
|
||||||
|
fmt_jsonescape(3), fmt_cescape(3), scan_ldapescape(3)
|
@ -18,25 +18,84 @@ size_t scan_cescape(const char *src,char *dest,size_t *destlen) {
|
|||||||
case 't': c='\t'; break;
|
case 't': c='\t'; break;
|
||||||
case 'v': c='\v';
|
case 'v': c='\v';
|
||||||
case '\\': break;
|
case '\\': break;
|
||||||
case 'x':
|
case 'x': // hex escape; \x0a -> 10
|
||||||
{
|
{
|
||||||
unsigned char a,b;
|
unsigned char a,b;
|
||||||
a=scan_fromhex(s[i+2]);
|
a=scan_fromhex(s[i+2]);
|
||||||
b=scan_fromhex(s[i+3]);
|
b=scan_fromhex(s[i+3]);
|
||||||
if (a<16 && b<16) {
|
if (a<16) {
|
||||||
c=(a<<4)+b;
|
if (b<16) {
|
||||||
i+=2;
|
c=(a<<4)+b;
|
||||||
|
i+=2;
|
||||||
|
} else {
|
||||||
|
c=a;
|
||||||
|
i+=1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 'u': // C99 unicode escape: \u000a -> 10
|
||||||
|
case 'U': // C99 unicode escape: \U0000000a -> 10
|
||||||
|
{
|
||||||
|
unsigned int j,k=0,l=(s[i+1]=='U'?10:6);
|
||||||
|
for (j=2; j<l; ++j) {
|
||||||
|
unsigned char c=scan_fromhex(s[i+j]);
|
||||||
|
if (c>=16) // error
|
||||||
|
goto error; // don't allow short sequences
|
||||||
|
k=k*16+c;
|
||||||
|
}
|
||||||
|
written+=fmt_utf8(dest?dest+written:0,k);
|
||||||
|
i+=j-1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
|
if (s[i+1]>='0' && s[i+1]<='7') { // octal escape; \012 -> 10
|
||||||
|
unsigned int j,k;
|
||||||
|
for (k=0,j=1; j<4; ++j) {
|
||||||
|
unsigned int l=s[i+j]-'0';
|
||||||
|
if (l<8)
|
||||||
|
k=k*8+l;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (dest) dest[written++]=k;
|
||||||
|
i+=j-1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
--i;
|
--i;
|
||||||
}
|
}
|
||||||
++i;
|
++i;
|
||||||
}
|
} else if (c=='"')
|
||||||
dest[written]=c;
|
break;
|
||||||
|
if (dest) dest[written]=c;
|
||||||
++written;
|
++written;
|
||||||
}
|
}
|
||||||
*destlen=written;
|
error:
|
||||||
|
if (destlen) *destlen=written;
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef UNITTEST
|
||||||
|
#include <assert.h>
|
||||||
|
#undef UNITTEST
|
||||||
|
#include <scan/scan_fromhex.c>
|
||||||
|
#include <fmt/fmt_utf8.c>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
size_t dl;
|
||||||
|
char buf[100];
|
||||||
|
assert(scan_cescape("test\\n\");",buf,&dl)==6 && dl==5 && !memcmp(buf,"test\n",5));
|
||||||
|
/* check hex and octal escaping */
|
||||||
|
assert(scan_cescape("test\\x0a\");",buf,&dl)==8 && dl==5 && !memcmp(buf,"test\n",5));
|
||||||
|
assert(scan_cescape("test\\012\");",buf,&dl)==8 && dl==5 && !memcmp(buf,"test\n",5));
|
||||||
|
/* check short escape sequences */
|
||||||
|
assert(scan_cescape("test\\xa\");",buf,&dl)==7 && dl==5 && !memcmp(buf,"test\n",5));
|
||||||
|
assert(scan_cescape("test\\12\");",buf,&dl)==7 && dl==5 && !memcmp(buf,"test\n",5));
|
||||||
|
assert(scan_cescape("test\\1\");",buf,&dl)==6 && dl==5 && !memcmp(buf,"test\1",5));
|
||||||
|
/* check unicode */
|
||||||
|
assert(scan_cescape("test\\u000a\");",buf,&dl)==10 && dl==5 && !memcmp(buf,"test\n",5));
|
||||||
|
assert(scan_cescape("test\\U0000000a\");",buf,&dl)==14 && dl==5 && !memcmp(buf,"test\n",5));
|
||||||
|
/* check that short sequences are rejected */
|
||||||
|
assert(scan_cescape("test\\Ua\");",buf,&dl)==4 && dl==4 && !memcmp(buf,"test",4));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
31
textcode/scan_jsonescape.3
Normal file
31
textcode/scan_jsonescape.3
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
.TH scan_jsonescape 3
|
||||||
|
.SH NAME
|
||||||
|
scan_jsonescape \- parse JSON escaped string
|
||||||
|
.SH SYNTAX
|
||||||
|
.B #include <libowfat/textcode.h>
|
||||||
|
|
||||||
|
size_t \fBscan_jsonescape\fP(const char *\fIsrc\fR,char *\fIdest\fR,size_t* \fIdestlen\fR);
|
||||||
|
|
||||||
|
.SH DESCRIPTION
|
||||||
|
scan_jsonescape parses JSON escaped text from src into dest, leaving a
|
||||||
|
UTF-8 string in dest. Parsing stops at the 0 terminator, invalid input
|
||||||
|
characters or a double quote that was not escaped.
|
||||||
|
|
||||||
|
It will then write the number of bytes in dest into *destlen,
|
||||||
|
and return the number of bytes decoded from src.
|
||||||
|
|
||||||
|
dest can be NULL. destlen can be NULL.
|
||||||
|
|
||||||
|
To make sure dest is large enough, either allocate strlen(src)+1 bytes
|
||||||
|
or call scan_jsonescape twice, the first time with dest == NULL (*destlen
|
||||||
|
will still be written).
|
||||||
|
|
||||||
|
.SH "RETURN VALUE"
|
||||||
|
scan_jsonescape returns the number of bytes successfully parsed
|
||||||
|
from src.
|
||||||
|
|
||||||
|
.SH EXAMPLES
|
||||||
|
scan_jsonescape("test\n\");",buf,&i) -> return 6, i=5, buf="test\n".
|
||||||
|
|
||||||
|
.SH "SEE ALSO"
|
||||||
|
fmt_jsonescape(3), scan_cescape(3), scan_ldapescape(3)
|
@ -73,6 +73,10 @@ abort:
|
|||||||
#ifdef UNITTEST
|
#ifdef UNITTEST
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#undef UNITTEST
|
||||||
|
#include <scan/scan_fromhex.c>
|
||||||
|
#include <scan/scan_utf8.c>
|
||||||
|
#include <fmt/fmt_utf8.c>
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
char buf[100];
|
char buf[100];
|
||||||
@ -105,3 +109,5 @@ int main() {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user