add asciiz parsing

2020-10-30 01:27:19 +00:00 · 2020-10-30 01:27:19 +00:00 · 9ddab2153f
commit 9ddab2153f
parent 50ceb217ab
4 changed files with 336 additions and 0 deletions
--- a/buffer/bs_peek.c
+++ b/buffer/bs_peek.c
@ -0,0 +1,95 @@
 #include "parse.h"
 unsigned char bs_peek(struct bytestream* bs) {
  unsigned char r;
  char c;
  if (bs->cur>=bs->max) {	// EOF or already error state?
    bs->max=0;	// signal error
    bs->cur=1;
    return 0;	// return 0
  }
  switch (bs->type) {
  case MEMBUF:
    r=bs->u.base[bs->cur];
    break;
  case IOBUF:
    {
      int ret=buffer_peekc(bs->u.b, &c);
      if (ret==1) {
 	r=c;
      } else {
 	bs->max=0;
 	bs->cur=1;
 	return 0;
      }
    }
    break;
  case BSTREAM:
    r=bs_peek(bs->u.bs);
    break;
  default:
    r=0;	// cannot happen
  }
  return r;
 }
 #ifdef UNITTEST
 #include <assert.h>
 int main() {
  struct bytestream bs = BS_FROM_MEMBUF("fx", 1);
  /* first test: membuf.
   * See if we get all the bytes we put in and then error is signaled */
  assert(bs_peek(&bs) == 'f');
  assert(bs_peek(&bs) == 'f');
  assert(!bs_err(&bs));
  assert(bs_get(&bs) == 'f');
  assert(!bs_err(&bs));
  assert(bs_peek(&bs) == 0);
  assert(bs_err(&bs));
  /* second test: iobuf with no limit. Otherwise the same. */
  struct buffer b;
  buffer_init_staticcontents(&b, "fx", 1);
  bs_init_iobuf(&bs, &b);
  assert(bs_peek(&bs) == 'f');
  assert(bs_peek(&bs) == 'f');
  assert(!bs_err(&bs));
  assert(bs_get(&bs) == 'f');
  assert(!bs_err(&bs));
  assert(bs_peek(&bs) == 0);
  assert(bs_err(&bs));
  /* third test: iobuf with limit. Otherwise the same. */
  buffer_init_staticcontents(&b, "fx", 2);
  bs_init_iobuf_size(&bs, &b, 1);
  assert(bs_peek(&bs) == 'f');
  assert(bs_peek(&bs) == 'f');
  assert(!bs_err(&bs));
  assert(bs_get(&bs) == 'f');
  assert(!bs_err(&bs));
  assert(bs_peek(&bs) == 0);
  assert(bs_err(&bs));
  /* fourth test: iobuf with EOF */
  buffer_init_staticcontents(&b, "fx", 1);
  bs_init_iobuf(&bs, &b);	// bytestream has no limit but will hit EOF in backing buffer
  assert(bs_peek(&bs) == 'f');
  assert(bs_peek(&bs) == 'f');
  assert(!bs_err(&bs));
  assert(bs_get(&bs) == 'f');
  assert(!bs_err(&bs));
  assert(bs_peek(&bs) == 0);
  assert(bs_err(&bs));
  return 0;
 }
 #endif
--- a/buffer/prs_asciiz.c
+++ b/buffer/prs_asciiz.c
@ -0,0 +1,66 @@
 #include "parse.h"
 static const size_t max_ssize_t = (((size_t)1) << (sizeof(size_t)*8-1))-1;
 /* Read an asciiz string from the byte stream, up to len bytes (including the 0 terminator). */
 /* Return number of bytes consumed (excluding the 0 terminator), i.e. strlen(dest) */
 /* If there is no 0 byte in these len bytes, set error flag in stream and return -1. */
 /* Calling this function with len==0 is an error. */
 /* destsize will be clamped to the maximum number representable in ssize_t */
 ssize_t prs_asciiz(struct bytestream* bs, char* dest, size_t len) {
  size_t i;
  /* The maximum value of ssize_t is half that of size_t.
   * So we arbitrarily decide to limit len to it here. */
  if (len>max_ssize_t) len=max_ssize_t;
  if (len==0) {
    bs->cur = 1;	// mark bytestream state as erroneous
    bs->max = 0;
    return -1;
  }
  for (i=0; i+1<len; ++i)
    if ((dest[i] = bs_get(bs)) == 0) {
      /* we might get here either because there actually was a 0 byte in
       * the stream, or because we hit EOF. So check if we hit EOF here
       * and return -1 then. */
      return (bs->cur<=bs->max) ? (ssize_t)i : -1;
    }
  /* if we get here, we read len-1 bytes and there was no 0 byte. */
  if ((dest[i] = bs_peek(bs))) {	// the loop went till i+1<len and we checked that len!=0
    // the next byte was not 0, so signal error
    bs->cur = 1;
    bs->max = 0;
    // but still write 0 terminator to dest
    dest[i] = 0;
    return -1;
  } else {
    bs_get(bs);	// the next byte was 0, so consume it
    return i;
  }
 }
 #ifdef UNITTEST
 #include <assert.h>
 #undef UNITTEST
 #include "buffer/bs_init_membuf.c"
 #include "buffer/bs_get.c"
 #include "buffer/buffer_peekc.c"
 #include "buffer/buffer_getc.c"
 #include "buffer/bs_peek.c"
 #include "buffer/buffer_feed.c"
 #include "buffer/buffer_stubborn2.c"
 int main() {
  struct bytestream bs = BS_FROM_MEMBUF("fnord\n\0x",8);
  char buf[100];
  assert(prs_asciiz(&bs, buf, sizeof buf) == 6);	// return value should be strlen("fnord\n")
  assert(!memcmp(buf,"fnord\n",7));	// returned string should be "fnord\n" with 0 terminator
  assert(bs_get(&bs) == 'x');		// should have consumed the 0 terminator from bytestream
  bs_init_membuf(&bs, "fnord\n\0x", 8);
  assert(prs_asciiz(&bs, buf, 5) == -1);	// no 0 terminator in first 5 bytes, expect error
  assert(!memcmp(buf,"fnor",5));		// expect 4 bytes + 0 terminator in dest buf
  assert(bs_err(&bs));			// bytestream should be in error state now
 }
 #endif
--- a/buffer/prs_asciiz_fixedlen.c
+++ b/buffer/prs_asciiz_fixedlen.c
@ -0,0 +1,58 @@
 #include "parse.h"
 static const size_t max_ssize_t = (((size_t)1) << (sizeof(size_t)*8-1))-1;
 /* Some protocols have a fixed field length for a string,
 * If the string is shorter than the field, the rest is filled with 0
 * bytes. But it is not an error if there are no 0 bytes.
 * This function is for those cases (the filename field in the tar file
 * header is an example of this).
 * For a field of length 8, you need to pass destsize as 9 so we can add
 * a 0 terminator. This function will consume the 8 bytes and add a 0 byte.
 * The return value is strlen(dest). */
 ssize_t prs_asciiz_fixedlen(struct bytestream* bs, char* dest, size_t len) {
  size_t i;
  ssize_t r=0;
  /* The maximum value of ssize_t is half that of size_t.
   * So we arbitrarily decide to limit len to it here. */
  if (len>max_ssize_t) len=max_ssize_t;
  if (len==0) {
    bs->cur = 1;	// mark bytestream state as erroneous
    bs->max = 0;
    return -1;
  }
  for (i=0; i+1<len; ++i)
    if ((dest[i] = bs_get(bs)) == 0 && r==0)
      r=i;	// note the position of the first 0 byte
  dest[i] = 0;	// add 0 terminator
  if (dest[r]) r=i;
  return bs_err(bs) ? -1 : r;
 }
 #ifdef UNITTEST
 #include <assert.h>
 #undef UNITTEST
 #include "buffer/bs_init_membuf.c"
 #include "buffer/bs_get.c"
 #include "buffer/buffer_peekc.c"
 #include "buffer/buffer_getc.c"
 #include "buffer/bs_peek.c"
 #include "buffer/buffer_feed.c"
 #include "buffer/buffer_stubborn2.c"
 int main() {
  struct bytestream bs = BS_FROM_MEMBUF("fnord\n\0x",8);
  char buf[100];
  assert(prs_asciiz_fixedlen(&bs, buf, 8) == 6);	// return value should be strlen("fnord\n")
  assert(!memcmp(buf,"fnord\n\0\0",8));	// returned string should be "fnord\n" and the rest filled with 0 bytes
  assert(!bs_err(&bs));
  bs_init_membuf(&bs, "fnord\n\0x", 8);
  assert(prs_asciiz_fixedlen(&bs, buf, 5) == 4);	// no 0 terminator in first 4 bytes
  assert(!memcmp(buf,"fnor",5));	// expect 4 bytes + 0 terminator in dest buf
  assert(bs_get(&bs) == 'd');		// 0 terminator in buf was artificial, 'd' was not consumed
  assert(!bs_err(&bs));			// bytestream should be ok
 }
 #endif
--- a/parse.h
+++ b/parse.h
@ -0,0 +1,117 @@
 /* this header file comes from libowfat, http://www.fefe.de/libowfat/ */
 #ifndef PARSE_H
 #define PARSE_H
 /* for size_t: */
 #include <stddef.h>
 /* for uint32_t: */
 #include <stdint.h>
 #include <libowfat/buffer.h>
 #include <libowfat/uint16.h>
 #include <libowfat/uint32.h>
 #include <libowfat/uint64.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* This file declares an API for decoding binary messages.
   Goals:
     - You say in advance if there is a message size limit
     - The object can be bound to a memory buffer of an I/O buffer
     - After having set up the object, you get two APIs:
       1. read bytes
       2. was there an error?
       If you parse a memory buffer manually, you need to do range
       checking for every byte. If you parse from an I/O buffer
       manually, you need to check for end of file or I/O error after
       every byte.
       This API will return 0 bytes and set the error flag when you read
       past the limit. That way you don't have to check after every
       byte, but only once at the end of each sub-message.
     - Many binary protocols have a message length and then sub-packets.
       For example, an IPv4 packet has a header with a length and the IP
       options. With this abstraction here, you would have one
       bytestream for the packet (with size limit set to how many bytes
       came in from the network) and then you would make a new
       bytestream for the IP header and another one for the option
       headers. Each would only let you read bytes from that subregion,
       and would do bounds checking at instantiation time to make sure
       it physically fits into to upper layer space.
 */
 struct bytestream {
  enum {
    MEMBUF,
    IOBUF,
    BSTREAM
  } type;
  size_t cur, max;
  union {
    const unsigned char* base;
    struct buffer* b;
    struct bytestream* bs;
  } u;
 };
 void bs_init_membuf(struct bytestream* bs,const unsigned char* membuf,size_t len);
 void bs_init_iobuf(struct bytestream* bs,struct buffer* b);
 void bs_init_iobuf_size(struct bytestream* bs,struct buffer* b,size_t maxlen);
 void bs_init_bstream_size(struct bytestream* bs,struct bytestream* parent,size_t maxlen);
 #define BS_FROM_MEMBUF(buf,len) { .type=MEMBUF, .max=(len), .u.base=(const unsigned char*)(buf) }
 #define BS_FROM_BUFFER(buffer) { .type=IOBUF, .max=(size_t)-1, .u.b=(buffer) }
 #define BS_FROM_BUFFER_SIZE(buffer,len) { .type=IOBUF, .max=(len), u.b=(buffer) }
 /* return next byte from stream or 0 if EOF or read error. */
 unsigned char bs_get(struct bytestream* bs);
 /* like bs_get but do not advance position in stream. */
 unsigned char bs_peek(struct bytestream* bs);
 /* was there a read error or did we attempt to read more than maxlen bytes? */
 int bs_err(struct bytestream* bs);
 /* Can we read this much more bytes from the bytestream? */
 int bs_capacitycheck(struct bytestream* bs,size_t capacity);
 uint16_t prs_u16(struct bytestream* bs);
 uint16_t prs_u16_big(struct bytestream* bs);
 uint32_t prs_u32(struct bytestream* bs);
 uint32_t prs_u32_big(struct bytestream* bs);
 uint64_t prs_u64(struct bytestream* bs);
 uint64_t prs_u64_big(struct bytestream* bs);
 /* Read an asciiz string from the byte stream, up to len bytes (including the 0 terminator). */
 /* Return number of bytes consumed (excluding the 0 terminator), i.e. strlen(dest) */
 /* If there is no 0 byte in these len bytes, set error flag in stream and return -1. */
 /* Calling this function with destsize==0 is an error. */
 /* destsize will be clamped to the maximum number representable in ssize_t */
 ssize_t prs_asciiz(struct bytestream* bs, char* dest, size_t destsize);
 /* Some protocols have a fixed field length for a string,
 * If the string is shorter than the field, the rest is filled with 0
 * bytes. But it is not an error if there are no 0 bytes.
 * This function is for those cases (the filename field in the tar file
 * header is an example of this).
 * For a field of length 8, you need to pass destsize as 9 so we can add
 * a 0 terminator. This function will consume the 8 bytes and add a 0 byte.
 * The return value is strlen(dest). */
 ssize_t prs_asciiz_fixedlen(struct bytestream* bs, char* dest, size_t destsize);
 #ifdef __cplusplus
 }
 #endif
 #endif