diff --git a/buffer/bs_peek.c b/buffer/bs_peek.c new file mode 100644 index 0000000..a0c784b --- /dev/null +++ b/buffer/bs_peek.c @@ -0,0 +1,95 @@ +#include "parse.h" + +unsigned char bs_peek(struct bytestream* bs) { + unsigned char r; + char c; + if (bs->cur>=bs->max) { // EOF or already error state? + bs->max=0; // signal error + bs->cur=1; + return 0; // return 0 + } + switch (bs->type) { + + case MEMBUF: + r=bs->u.base[bs->cur]; + break; + + case IOBUF: + { + int ret=buffer_peekc(bs->u.b, &c); + if (ret==1) { + r=c; + } else { + bs->max=0; + bs->cur=1; + return 0; + } + } + break; + + case BSTREAM: + r=bs_peek(bs->u.bs); + break; + + default: + r=0; // cannot happen + } + return r; +} + +#ifdef UNITTEST +#include + +int main() { + struct bytestream bs = BS_FROM_MEMBUF("fx", 1); + + /* first test: membuf. + * See if we get all the bytes we put in and then error is signaled */ + assert(bs_peek(&bs) == 'f'); + assert(bs_peek(&bs) == 'f'); + assert(!bs_err(&bs)); + assert(bs_get(&bs) == 'f'); + assert(!bs_err(&bs)); + assert(bs_peek(&bs) == 0); + assert(bs_err(&bs)); + + /* second test: iobuf with no limit. Otherwise the same. */ + struct buffer b; + buffer_init_staticcontents(&b, "fx", 1); + bs_init_iobuf(&bs, &b); + + assert(bs_peek(&bs) == 'f'); + assert(bs_peek(&bs) == 'f'); + assert(!bs_err(&bs)); + assert(bs_get(&bs) == 'f'); + assert(!bs_err(&bs)); + assert(bs_peek(&bs) == 0); + assert(bs_err(&bs)); + + /* third test: iobuf with limit. Otherwise the same. */ + buffer_init_staticcontents(&b, "fx", 2); + bs_init_iobuf_size(&bs, &b, 1); + + assert(bs_peek(&bs) == 'f'); + assert(bs_peek(&bs) == 'f'); + assert(!bs_err(&bs)); + assert(bs_get(&bs) == 'f'); + assert(!bs_err(&bs)); + assert(bs_peek(&bs) == 0); + assert(bs_err(&bs)); + + /* fourth test: iobuf with EOF */ + buffer_init_staticcontents(&b, "fx", 1); + bs_init_iobuf(&bs, &b); // bytestream has no limit but will hit EOF in backing buffer + + assert(bs_peek(&bs) == 'f'); + assert(bs_peek(&bs) == 'f'); + assert(!bs_err(&bs)); + assert(bs_get(&bs) == 'f'); + assert(!bs_err(&bs)); + assert(bs_peek(&bs) == 0); + assert(bs_err(&bs)); + return 0; +} + +#endif diff --git a/buffer/prs_asciiz.c b/buffer/prs_asciiz.c new file mode 100644 index 0000000..8011671 --- /dev/null +++ b/buffer/prs_asciiz.c @@ -0,0 +1,66 @@ +#include "parse.h" + +static const size_t max_ssize_t = (((size_t)1) << (sizeof(size_t)*8-1))-1; + +/* Read an asciiz string from the byte stream, up to len bytes (including the 0 terminator). */ +/* Return number of bytes consumed (excluding the 0 terminator), i.e. strlen(dest) */ +/* If there is no 0 byte in these len bytes, set error flag in stream and return -1. */ +/* Calling this function with len==0 is an error. */ +/* destsize will be clamped to the maximum number representable in ssize_t */ +ssize_t prs_asciiz(struct bytestream* bs, char* dest, size_t len) { + size_t i; + /* The maximum value of ssize_t is half that of size_t. + * So we arbitrarily decide to limit len to it here. */ + if (len>max_ssize_t) len=max_ssize_t; + if (len==0) { + bs->cur = 1; // mark bytestream state as erroneous + bs->max = 0; + return -1; + } + + for (i=0; i+1cur<=bs->max) ? (ssize_t)i : -1; + } + /* if we get here, we read len-1 bytes and there was no 0 byte. */ + if ((dest[i] = bs_peek(bs))) { // the loop went till i+1cur = 1; + bs->max = 0; + // but still write 0 terminator to dest + dest[i] = 0; + return -1; + } else { + bs_get(bs); // the next byte was 0, so consume it + return i; + } +} + +#ifdef UNITTEST +#include + +#undef UNITTEST +#include "buffer/bs_init_membuf.c" +#include "buffer/bs_get.c" +#include "buffer/buffer_peekc.c" +#include "buffer/buffer_getc.c" +#include "buffer/bs_peek.c" +#include "buffer/buffer_feed.c" +#include "buffer/buffer_stubborn2.c" + +int main() { + struct bytestream bs = BS_FROM_MEMBUF("fnord\n\0x",8); + char buf[100]; + assert(prs_asciiz(&bs, buf, sizeof buf) == 6); // return value should be strlen("fnord\n") + assert(!memcmp(buf,"fnord\n",7)); // returned string should be "fnord\n" with 0 terminator + assert(bs_get(&bs) == 'x'); // should have consumed the 0 terminator from bytestream + + bs_init_membuf(&bs, "fnord\n\0x", 8); + assert(prs_asciiz(&bs, buf, 5) == -1); // no 0 terminator in first 5 bytes, expect error + assert(!memcmp(buf,"fnor",5)); // expect 4 bytes + 0 terminator in dest buf + assert(bs_err(&bs)); // bytestream should be in error state now +} +#endif diff --git a/buffer/prs_asciiz_fixedlen.c b/buffer/prs_asciiz_fixedlen.c new file mode 100644 index 0000000..4b88640 --- /dev/null +++ b/buffer/prs_asciiz_fixedlen.c @@ -0,0 +1,58 @@ +#include "parse.h" + +static const size_t max_ssize_t = (((size_t)1) << (sizeof(size_t)*8-1))-1; + +/* Some protocols have a fixed field length for a string, + * If the string is shorter than the field, the rest is filled with 0 + * bytes. But it is not an error if there are no 0 bytes. + * This function is for those cases (the filename field in the tar file + * header is an example of this). + * For a field of length 8, you need to pass destsize as 9 so we can add + * a 0 terminator. This function will consume the 8 bytes and add a 0 byte. + * The return value is strlen(dest). */ +ssize_t prs_asciiz_fixedlen(struct bytestream* bs, char* dest, size_t len) { + size_t i; + ssize_t r=0; + /* The maximum value of ssize_t is half that of size_t. + * So we arbitrarily decide to limit len to it here. */ + if (len>max_ssize_t) len=max_ssize_t; + if (len==0) { + bs->cur = 1; // mark bytestream state as erroneous + bs->max = 0; + return -1; + } + + for (i=0; i+1 + +#undef UNITTEST +#include "buffer/bs_init_membuf.c" +#include "buffer/bs_get.c" +#include "buffer/buffer_peekc.c" +#include "buffer/buffer_getc.c" +#include "buffer/bs_peek.c" +#include "buffer/buffer_feed.c" +#include "buffer/buffer_stubborn2.c" + +int main() { + struct bytestream bs = BS_FROM_MEMBUF("fnord\n\0x",8); + char buf[100]; + assert(prs_asciiz_fixedlen(&bs, buf, 8) == 6); // return value should be strlen("fnord\n") + assert(!memcmp(buf,"fnord\n\0\0",8)); // returned string should be "fnord\n" and the rest filled with 0 bytes + assert(!bs_err(&bs)); + + bs_init_membuf(&bs, "fnord\n\0x", 8); + assert(prs_asciiz_fixedlen(&bs, buf, 5) == 4); // no 0 terminator in first 4 bytes + assert(!memcmp(buf,"fnor",5)); // expect 4 bytes + 0 terminator in dest buf + assert(bs_get(&bs) == 'd'); // 0 terminator in buf was artificial, 'd' was not consumed + assert(!bs_err(&bs)); // bytestream should be ok +} +#endif diff --git a/parse.h b/parse.h new file mode 100644 index 0000000..1ff6c94 --- /dev/null +++ b/parse.h @@ -0,0 +1,117 @@ +/* this header file comes from libowfat, http://www.fefe.de/libowfat/ */ +#ifndef PARSE_H +#define PARSE_H + +/* for size_t: */ +#include +/* for uint32_t: */ +#include + +#include + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* This file declares an API for decoding binary messages. + Goals: + + - You say in advance if there is a message size limit + + - The object can be bound to a memory buffer of an I/O buffer + + - After having set up the object, you get two APIs: + + 1. read bytes + 2. was there an error? + + If you parse a memory buffer manually, you need to do range + checking for every byte. If you parse from an I/O buffer + manually, you need to check for end of file or I/O error after + every byte. + + This API will return 0 bytes and set the error flag when you read + past the limit. That way you don't have to check after every + byte, but only once at the end of each sub-message. + + - Many binary protocols have a message length and then sub-packets. + For example, an IPv4 packet has a header with a length and the IP + options. With this abstraction here, you would have one + bytestream for the packet (with size limit set to how many bytes + came in from the network) and then you would make a new + bytestream for the IP header and another one for the option + headers. Each would only let you read bytes from that subregion, + and would do bounds checking at instantiation time to make sure + it physically fits into to upper layer space. +*/ + +struct bytestream { + enum { + MEMBUF, + IOBUF, + BSTREAM + } type; + size_t cur, max; + union { + const unsigned char* base; + struct buffer* b; + struct bytestream* bs; + } u; +}; + +void bs_init_membuf(struct bytestream* bs,const unsigned char* membuf,size_t len); + +void bs_init_iobuf(struct bytestream* bs,struct buffer* b); +void bs_init_iobuf_size(struct bytestream* bs,struct buffer* b,size_t maxlen); + +void bs_init_bstream_size(struct bytestream* bs,struct bytestream* parent,size_t maxlen); + +#define BS_FROM_MEMBUF(buf,len) { .type=MEMBUF, .max=(len), .u.base=(const unsigned char*)(buf) } +#define BS_FROM_BUFFER(buffer) { .type=IOBUF, .max=(size_t)-1, .u.b=(buffer) } +#define BS_FROM_BUFFER_SIZE(buffer,len) { .type=IOBUF, .max=(len), u.b=(buffer) } + +/* return next byte from stream or 0 if EOF or read error. */ +unsigned char bs_get(struct bytestream* bs); + +/* like bs_get but do not advance position in stream. */ +unsigned char bs_peek(struct bytestream* bs); + +/* was there a read error or did we attempt to read more than maxlen bytes? */ +int bs_err(struct bytestream* bs); + +/* Can we read this much more bytes from the bytestream? */ +int bs_capacitycheck(struct bytestream* bs,size_t capacity); + +uint16_t prs_u16(struct bytestream* bs); +uint16_t prs_u16_big(struct bytestream* bs); +uint32_t prs_u32(struct bytestream* bs); +uint32_t prs_u32_big(struct bytestream* bs); +uint64_t prs_u64(struct bytestream* bs); +uint64_t prs_u64_big(struct bytestream* bs); + +/* Read an asciiz string from the byte stream, up to len bytes (including the 0 terminator). */ +/* Return number of bytes consumed (excluding the 0 terminator), i.e. strlen(dest) */ +/* If there is no 0 byte in these len bytes, set error flag in stream and return -1. */ +/* Calling this function with destsize==0 is an error. */ +/* destsize will be clamped to the maximum number representable in ssize_t */ +ssize_t prs_asciiz(struct bytestream* bs, char* dest, size_t destsize); + +/* Some protocols have a fixed field length for a string, + * If the string is shorter than the field, the rest is filled with 0 + * bytes. But it is not an error if there are no 0 bytes. + * This function is for those cases (the filename field in the tar file + * header is an example of this). + * For a field of length 8, you need to pass destsize as 9 so we can add + * a 0 terminator. This function will consume the 8 bytes and add a 0 byte. + * The return value is strlen(dest). */ +ssize_t prs_asciiz_fixedlen(struct bytestream* bs, char* dest, size_t destsize); + +#ifdef __cplusplus +} +#endif + +#endif