add asciiz parsing
parent
50ceb217ab
commit
9ddab2153f
@ -0,0 +1,95 @@
|
|||||||
|
#include "parse.h"
|
||||||
|
|
||||||
|
unsigned char bs_peek(struct bytestream* bs) {
|
||||||
|
unsigned char r;
|
||||||
|
char c;
|
||||||
|
if (bs->cur>=bs->max) { // EOF or already error state?
|
||||||
|
bs->max=0; // signal error
|
||||||
|
bs->cur=1;
|
||||||
|
return 0; // return 0
|
||||||
|
}
|
||||||
|
switch (bs->type) {
|
||||||
|
|
||||||
|
case MEMBUF:
|
||||||
|
r=bs->u.base[bs->cur];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case IOBUF:
|
||||||
|
{
|
||||||
|
int ret=buffer_peekc(bs->u.b, &c);
|
||||||
|
if (ret==1) {
|
||||||
|
r=c;
|
||||||
|
} else {
|
||||||
|
bs->max=0;
|
||||||
|
bs->cur=1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BSTREAM:
|
||||||
|
r=bs_peek(bs->u.bs);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
r=0; // cannot happen
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef UNITTEST
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
struct bytestream bs = BS_FROM_MEMBUF("fx", 1);
|
||||||
|
|
||||||
|
/* first test: membuf.
|
||||||
|
* See if we get all the bytes we put in and then error is signaled */
|
||||||
|
assert(bs_peek(&bs) == 'f');
|
||||||
|
assert(bs_peek(&bs) == 'f');
|
||||||
|
assert(!bs_err(&bs));
|
||||||
|
assert(bs_get(&bs) == 'f');
|
||||||
|
assert(!bs_err(&bs));
|
||||||
|
assert(bs_peek(&bs) == 0);
|
||||||
|
assert(bs_err(&bs));
|
||||||
|
|
||||||
|
/* second test: iobuf with no limit. Otherwise the same. */
|
||||||
|
struct buffer b;
|
||||||
|
buffer_init_staticcontents(&b, "fx", 1);
|
||||||
|
bs_init_iobuf(&bs, &b);
|
||||||
|
|
||||||
|
assert(bs_peek(&bs) == 'f');
|
||||||
|
assert(bs_peek(&bs) == 'f');
|
||||||
|
assert(!bs_err(&bs));
|
||||||
|
assert(bs_get(&bs) == 'f');
|
||||||
|
assert(!bs_err(&bs));
|
||||||
|
assert(bs_peek(&bs) == 0);
|
||||||
|
assert(bs_err(&bs));
|
||||||
|
|
||||||
|
/* third test: iobuf with limit. Otherwise the same. */
|
||||||
|
buffer_init_staticcontents(&b, "fx", 2);
|
||||||
|
bs_init_iobuf_size(&bs, &b, 1);
|
||||||
|
|
||||||
|
assert(bs_peek(&bs) == 'f');
|
||||||
|
assert(bs_peek(&bs) == 'f');
|
||||||
|
assert(!bs_err(&bs));
|
||||||
|
assert(bs_get(&bs) == 'f');
|
||||||
|
assert(!bs_err(&bs));
|
||||||
|
assert(bs_peek(&bs) == 0);
|
||||||
|
assert(bs_err(&bs));
|
||||||
|
|
||||||
|
/* fourth test: iobuf with EOF */
|
||||||
|
buffer_init_staticcontents(&b, "fx", 1);
|
||||||
|
bs_init_iobuf(&bs, &b); // bytestream has no limit but will hit EOF in backing buffer
|
||||||
|
|
||||||
|
assert(bs_peek(&bs) == 'f');
|
||||||
|
assert(bs_peek(&bs) == 'f');
|
||||||
|
assert(!bs_err(&bs));
|
||||||
|
assert(bs_get(&bs) == 'f');
|
||||||
|
assert(!bs_err(&bs));
|
||||||
|
assert(bs_peek(&bs) == 0);
|
||||||
|
assert(bs_err(&bs));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,66 @@
|
|||||||
|
#include "parse.h"
|
||||||
|
|
||||||
|
static const size_t max_ssize_t = (((size_t)1) << (sizeof(size_t)*8-1))-1;
|
||||||
|
|
||||||
|
/* Read an asciiz string from the byte stream, up to len bytes (including the 0 terminator). */
|
||||||
|
/* Return number of bytes consumed (excluding the 0 terminator), i.e. strlen(dest) */
|
||||||
|
/* If there is no 0 byte in these len bytes, set error flag in stream and return -1. */
|
||||||
|
/* Calling this function with len==0 is an error. */
|
||||||
|
/* destsize will be clamped to the maximum number representable in ssize_t */
|
||||||
|
ssize_t prs_asciiz(struct bytestream* bs, char* dest, size_t len) {
|
||||||
|
size_t i;
|
||||||
|
/* The maximum value of ssize_t is half that of size_t.
|
||||||
|
* So we arbitrarily decide to limit len to it here. */
|
||||||
|
if (len>max_ssize_t) len=max_ssize_t;
|
||||||
|
if (len==0) {
|
||||||
|
bs->cur = 1; // mark bytestream state as erroneous
|
||||||
|
bs->max = 0;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i=0; i+1<len; ++i)
|
||||||
|
if ((dest[i] = bs_get(bs)) == 0) {
|
||||||
|
/* we might get here either because there actually was a 0 byte in
|
||||||
|
* the stream, or because we hit EOF. So check if we hit EOF here
|
||||||
|
* and return -1 then. */
|
||||||
|
return (bs->cur<=bs->max) ? (ssize_t)i : -1;
|
||||||
|
}
|
||||||
|
/* if we get here, we read len-1 bytes and there was no 0 byte. */
|
||||||
|
if ((dest[i] = bs_peek(bs))) { // the loop went till i+1<len and we checked that len!=0
|
||||||
|
// the next byte was not 0, so signal error
|
||||||
|
bs->cur = 1;
|
||||||
|
bs->max = 0;
|
||||||
|
// but still write 0 terminator to dest
|
||||||
|
dest[i] = 0;
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
bs_get(bs); // the next byte was 0, so consume it
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef UNITTEST
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#undef UNITTEST
|
||||||
|
#include "buffer/bs_init_membuf.c"
|
||||||
|
#include "buffer/bs_get.c"
|
||||||
|
#include "buffer/buffer_peekc.c"
|
||||||
|
#include "buffer/buffer_getc.c"
|
||||||
|
#include "buffer/bs_peek.c"
|
||||||
|
#include "buffer/buffer_feed.c"
|
||||||
|
#include "buffer/buffer_stubborn2.c"
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
struct bytestream bs = BS_FROM_MEMBUF("fnord\n\0x",8);
|
||||||
|
char buf[100];
|
||||||
|
assert(prs_asciiz(&bs, buf, sizeof buf) == 6); // return value should be strlen("fnord\n")
|
||||||
|
assert(!memcmp(buf,"fnord\n",7)); // returned string should be "fnord\n" with 0 terminator
|
||||||
|
assert(bs_get(&bs) == 'x'); // should have consumed the 0 terminator from bytestream
|
||||||
|
|
||||||
|
bs_init_membuf(&bs, "fnord\n\0x", 8);
|
||||||
|
assert(prs_asciiz(&bs, buf, 5) == -1); // no 0 terminator in first 5 bytes, expect error
|
||||||
|
assert(!memcmp(buf,"fnor",5)); // expect 4 bytes + 0 terminator in dest buf
|
||||||
|
assert(bs_err(&bs)); // bytestream should be in error state now
|
||||||
|
}
|
||||||
|
#endif
|
@ -0,0 +1,58 @@
|
|||||||
|
#include "parse.h"
|
||||||
|
|
||||||
|
static const size_t max_ssize_t = (((size_t)1) << (sizeof(size_t)*8-1))-1;
|
||||||
|
|
||||||
|
/* Some protocols have a fixed field length for a string,
|
||||||
|
* If the string is shorter than the field, the rest is filled with 0
|
||||||
|
* bytes. But it is not an error if there are no 0 bytes.
|
||||||
|
* This function is for those cases (the filename field in the tar file
|
||||||
|
* header is an example of this).
|
||||||
|
* For a field of length 8, you need to pass destsize as 9 so we can add
|
||||||
|
* a 0 terminator. This function will consume the 8 bytes and add a 0 byte.
|
||||||
|
* The return value is strlen(dest). */
|
||||||
|
ssize_t prs_asciiz_fixedlen(struct bytestream* bs, char* dest, size_t len) {
|
||||||
|
size_t i;
|
||||||
|
ssize_t r=0;
|
||||||
|
/* The maximum value of ssize_t is half that of size_t.
|
||||||
|
* So we arbitrarily decide to limit len to it here. */
|
||||||
|
if (len>max_ssize_t) len=max_ssize_t;
|
||||||
|
if (len==0) {
|
||||||
|
bs->cur = 1; // mark bytestream state as erroneous
|
||||||
|
bs->max = 0;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i=0; i+1<len; ++i)
|
||||||
|
if ((dest[i] = bs_get(bs)) == 0 && r==0)
|
||||||
|
r=i; // note the position of the first 0 byte
|
||||||
|
dest[i] = 0; // add 0 terminator
|
||||||
|
if (dest[r]) r=i;
|
||||||
|
return bs_err(bs) ? -1 : r;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef UNITTEST
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#undef UNITTEST
|
||||||
|
#include "buffer/bs_init_membuf.c"
|
||||||
|
#include "buffer/bs_get.c"
|
||||||
|
#include "buffer/buffer_peekc.c"
|
||||||
|
#include "buffer/buffer_getc.c"
|
||||||
|
#include "buffer/bs_peek.c"
|
||||||
|
#include "buffer/buffer_feed.c"
|
||||||
|
#include "buffer/buffer_stubborn2.c"
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
struct bytestream bs = BS_FROM_MEMBUF("fnord\n\0x",8);
|
||||||
|
char buf[100];
|
||||||
|
assert(prs_asciiz_fixedlen(&bs, buf, 8) == 6); // return value should be strlen("fnord\n")
|
||||||
|
assert(!memcmp(buf,"fnord\n\0\0",8)); // returned string should be "fnord\n" and the rest filled with 0 bytes
|
||||||
|
assert(!bs_err(&bs));
|
||||||
|
|
||||||
|
bs_init_membuf(&bs, "fnord\n\0x", 8);
|
||||||
|
assert(prs_asciiz_fixedlen(&bs, buf, 5) == 4); // no 0 terminator in first 4 bytes
|
||||||
|
assert(!memcmp(buf,"fnor",5)); // expect 4 bytes + 0 terminator in dest buf
|
||||||
|
assert(bs_get(&bs) == 'd'); // 0 terminator in buf was artificial, 'd' was not consumed
|
||||||
|
assert(!bs_err(&bs)); // bytestream should be ok
|
||||||
|
}
|
||||||
|
#endif
|
@ -0,0 +1,117 @@
|
|||||||
|
/* this header file comes from libowfat, http://www.fefe.de/libowfat/ */
|
||||||
|
#ifndef PARSE_H
|
||||||
|
#define PARSE_H
|
||||||
|
|
||||||
|
/* for size_t: */
|
||||||
|
#include <stddef.h>
|
||||||
|
/* for uint32_t: */
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <libowfat/buffer.h>
|
||||||
|
|
||||||
|
#include <libowfat/uint16.h>
|
||||||
|
#include <libowfat/uint32.h>
|
||||||
|
#include <libowfat/uint64.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This file declares an API for decoding binary messages.
|
||||||
|
Goals:
|
||||||
|
|
||||||
|
- You say in advance if there is a message size limit
|
||||||
|
|
||||||
|
- The object can be bound to a memory buffer of an I/O buffer
|
||||||
|
|
||||||
|
- After having set up the object, you get two APIs:
|
||||||
|
|
||||||
|
1. read bytes
|
||||||
|
2. was there an error?
|
||||||
|
|
||||||
|
If you parse a memory buffer manually, you need to do range
|
||||||
|
checking for every byte. If you parse from an I/O buffer
|
||||||
|
manually, you need to check for end of file or I/O error after
|
||||||
|
every byte.
|
||||||
|
|
||||||
|
This API will return 0 bytes and set the error flag when you read
|
||||||
|
past the limit. That way you don't have to check after every
|
||||||
|
byte, but only once at the end of each sub-message.
|
||||||
|
|
||||||
|
- Many binary protocols have a message length and then sub-packets.
|
||||||
|
For example, an IPv4 packet has a header with a length and the IP
|
||||||
|
options. With this abstraction here, you would have one
|
||||||
|
bytestream for the packet (with size limit set to how many bytes
|
||||||
|
came in from the network) and then you would make a new
|
||||||
|
bytestream for the IP header and another one for the option
|
||||||
|
headers. Each would only let you read bytes from that subregion,
|
||||||
|
and would do bounds checking at instantiation time to make sure
|
||||||
|
it physically fits into to upper layer space.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct bytestream {
|
||||||
|
enum {
|
||||||
|
MEMBUF,
|
||||||
|
IOBUF,
|
||||||
|
BSTREAM
|
||||||
|
} type;
|
||||||
|
size_t cur, max;
|
||||||
|
union {
|
||||||
|
const unsigned char* base;
|
||||||
|
struct buffer* b;
|
||||||
|
struct bytestream* bs;
|
||||||
|
} u;
|
||||||
|
};
|
||||||
|
|
||||||
|
void bs_init_membuf(struct bytestream* bs,const unsigned char* membuf,size_t len);
|
||||||
|
|
||||||
|
void bs_init_iobuf(struct bytestream* bs,struct buffer* b);
|
||||||
|
void bs_init_iobuf_size(struct bytestream* bs,struct buffer* b,size_t maxlen);
|
||||||
|
|
||||||
|
void bs_init_bstream_size(struct bytestream* bs,struct bytestream* parent,size_t maxlen);
|
||||||
|
|
||||||
|
#define BS_FROM_MEMBUF(buf,len) { .type=MEMBUF, .max=(len), .u.base=(const unsigned char*)(buf) }
|
||||||
|
#define BS_FROM_BUFFER(buffer) { .type=IOBUF, .max=(size_t)-1, .u.b=(buffer) }
|
||||||
|
#define BS_FROM_BUFFER_SIZE(buffer,len) { .type=IOBUF, .max=(len), u.b=(buffer) }
|
||||||
|
|
||||||
|
/* return next byte from stream or 0 if EOF or read error. */
|
||||||
|
unsigned char bs_get(struct bytestream* bs);
|
||||||
|
|
||||||
|
/* like bs_get but do not advance position in stream. */
|
||||||
|
unsigned char bs_peek(struct bytestream* bs);
|
||||||
|
|
||||||
|
/* was there a read error or did we attempt to read more than maxlen bytes? */
|
||||||
|
int bs_err(struct bytestream* bs);
|
||||||
|
|
||||||
|
/* Can we read this much more bytes from the bytestream? */
|
||||||
|
int bs_capacitycheck(struct bytestream* bs,size_t capacity);
|
||||||
|
|
||||||
|
uint16_t prs_u16(struct bytestream* bs);
|
||||||
|
uint16_t prs_u16_big(struct bytestream* bs);
|
||||||
|
uint32_t prs_u32(struct bytestream* bs);
|
||||||
|
uint32_t prs_u32_big(struct bytestream* bs);
|
||||||
|
uint64_t prs_u64(struct bytestream* bs);
|
||||||
|
uint64_t prs_u64_big(struct bytestream* bs);
|
||||||
|
|
||||||
|
/* Read an asciiz string from the byte stream, up to len bytes (including the 0 terminator). */
|
||||||
|
/* Return number of bytes consumed (excluding the 0 terminator), i.e. strlen(dest) */
|
||||||
|
/* If there is no 0 byte in these len bytes, set error flag in stream and return -1. */
|
||||||
|
/* Calling this function with destsize==0 is an error. */
|
||||||
|
/* destsize will be clamped to the maximum number representable in ssize_t */
|
||||||
|
ssize_t prs_asciiz(struct bytestream* bs, char* dest, size_t destsize);
|
||||||
|
|
||||||
|
/* Some protocols have a fixed field length for a string,
|
||||||
|
* If the string is shorter than the field, the rest is filled with 0
|
||||||
|
* bytes. But it is not an error if there are no 0 bytes.
|
||||||
|
* This function is for those cases (the filename field in the tar file
|
||||||
|
* header is an example of this).
|
||||||
|
* For a field of length 8, you need to pass destsize as 9 so we can add
|
||||||
|
* a 0 terminator. This function will consume the 8 bytes and add a 0 byte.
|
||||||
|
* The return value is strlen(dest). */
|
||||||
|
ssize_t prs_asciiz_fixedlen(struct bytestream* bs, char* dest, size_t destsize);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in New Issue