From 558c6128fe68cfa1d69822c5294d161121fd6897 Mon Sep 17 00:00:00 2001 From: leitner Date: Sun, 8 Jun 2003 21:20:36 +0000 Subject: [PATCH] better and more predication, improved documentation --- CHANGES | 1 + buffer.h | 24 ++++++++++++++++++++++ buffer/buffer_get_token.3 | 21 +++++-------------- buffer/buffer_get_token_pred.3 | 23 +++++++++++++++++++++ buffer/buffer_get_token_pred.c | 16 +++++++++++++++ buffer/buffer_get_token_sa.3 | 15 +++++++++----- buffer/buffer_get_token_sa.c | 1 - buffer/buffer_get_token_sa_pred.3 | 34 +++++++++++++++++++++++++++++++ buffer/buffer_get_token_sa_pred.c | 21 +++++++++++++++++++ buffer/buffer_getline_sa.3 | 9 ++++++-- stralloc.h | 16 +++++++++++++++ test/stralloc_buffer.c | 2 ++ 12 files changed, 159 insertions(+), 24 deletions(-) create mode 100644 buffer/buffer_get_token_pred.3 create mode 100644 buffer/buffer_get_token_pred.c create mode 100644 buffer/buffer_get_token_sa_pred.3 create mode 100644 buffer/buffer_get_token_sa_pred.c diff --git a/CHANGES b/CHANGES index 551c9f8..1d1e97f 100644 --- a/CHANGES +++ b/CHANGES @@ -20,6 +20,7 @@ scan_ushort extended socket API; you can now pass NULL for results you don't want rename Makefile to GNUmakefile; create Makefile target + add buffer_get_token_pred and buffer_get_token_sa_pred 0.14: avoid bus errors in byte_copy diff --git a/buffer.h b/buffer.h index dca8f91..3b9c5c2 100644 --- a/buffer.h +++ b/buffer.h @@ -46,6 +46,13 @@ extern int buffer_getn(buffer* b,char* x,unsigned int len); extern int buffer_get_token(buffer* b,char* x,unsigned int len,const char* charset,unsigned int setlen); #define buffer_getline(b,x,len) buffer_get_token((b),(x),(len),"\n",1) +/* this predicate is given the string as currently read from the buffer + * and is supposed to return 1 if the token is complete, 0 if not. */ +typedef int (*string_predicate)(const char* x,unsigned int len); + +/* like buffer_get_token but the token ends when your predicate says so */ +extern int buffer_get_token_pred(buffer* b,char* x,unsigned int len,string_predicate p); + extern char *buffer_peek(buffer* b); extern void buffer_seek(buffer* b,unsigned int len); @@ -74,10 +81,27 @@ extern buffer *buffer_2; #ifdef STRALLOC_H /* write stralloc to buffer */ extern int buffer_putsa(buffer* b,stralloc* sa); + +/* these "read token" functions return 0 if the token was complete or + * EOF was hit or -1 on error. In contrast to the non-stralloc token + * functions, the separator is also put in the stralloc; use + * stralloc_chop or stralloc_chomp to get rid of it. */ + +/* WARNING! These token reading functions will not clear the stralloc! + * They _append_ the token to the contents of the stralloc. The idea is + * that this way these functions can be used on non-blocking sockets; + * when you get signalled EAGAIN, just call the functions again when new + * data is available. */ + /* read token from buffer to stralloc */ extern int buffer_get_token_sa(buffer* b,stralloc* sa,const char* charset,unsigned int setlen); /* read line from buffer to stralloc */ extern int buffer_getline_sa(buffer* b,stralloc* sa); + +typedef int (*sa_predicate)(stralloc* sa); + +/* like buffer_get_token_sa but the token ends when your predicate says so */ +extern int buffer_get_token_sa_pred(buffer* b,stralloc* sa,sa_predicate p); #endif #endif diff --git a/buffer/buffer_get_token.3 b/buffer/buffer_get_token.3 index c4152f5..7905f82 100644 --- a/buffer/buffer_get_token.3 +++ b/buffer/buffer_get_token.3 @@ -7,21 +7,10 @@ buffer_get_token \- read token from buffer int \fBbuffer_get_token\fP(buffer* \fIb\fR,char* \fIx\fR,unsigned int \fIlen\fR, const char* \fIcharset\fR,unsigned int \fIsetlen\fR); .SH DESCRIPTION -Normally buffer_get_token copies data to \fIx\fR[0], \fIx\fR[1], ..., -\fIx\fR[\fIlen\fR-1] from the beginning of a string stored in -preallocated space; removes these \fIlen\fR bytes from the string; and -returns \fIlen\fR. - -If, however, the string has fewer than \fIlen\fR (but more than 0) -bytes, buffer_get_token copies only that many bytes, and returns that number. - -If the string is empty, buffer_get_token first uses a \fBread operation\fR to -feed data into the string. The \fBread operation\fR may indicate end of -input, in which case buffer_get_token returns 0; or a read error, in which -case buffer_get_token returns -1, setting \fIerrno\fR approporiately. - -The preallocated space and the \fBread operation\fR are specified by -\fIb\fR. You must initialize \fBb\fR using buffer_init before calling -buffer_get_token (or use the pre-initialized buffer_0). +buffer_get_token copies data from \fIb\fR to \fIx\fR[0], \fIx\fR[1], ..., +\fIx\fR[\fIlen\fR-1] until \fIlen\fR bytes have been copied or one of +\fIcharset\fR[0], \fIcharset\fR[1], ..., \fIcharset\fR[\fIsetlen\fR] +equals the last byte that was read. buffer_get_token returns the number +of bytes read or -1 on error (setting \fIerrno\fR appropriately). .SH "SEE ALSO" buffer_init(3), buffer_feed(3), buffer_peek(3), buffer_seek(3), buffer(3) diff --git a/buffer/buffer_get_token_pred.3 b/buffer/buffer_get_token_pred.3 new file mode 100644 index 0000000..1cc5d9d --- /dev/null +++ b/buffer/buffer_get_token_pred.3 @@ -0,0 +1,23 @@ +.TH buffer_get_token_pred 3 +.SH NAME +buffer_get_token_pred \- read token from buffer +.SH SYNTAX +.B #include + +int \fBbuffer_get_token_pred\fP(buffer* \fIb\fR,char* \fIx\fR,unsigned int \fIlen\fR, + int (*\fIpredicate\fR)(const char* \fIs\fR,unsigned int \fIlen\fR)); +.SH DESCRIPTION +buffer_get_token_pred copies data from \fIb\fR to \fIx\fR[0], +\fIx\fR[1], ..., \fIx\fR[\fIlen\fR-1] until \fIlen\fR bytes have been +read or \fIpredicate\fR called on the destination string returns +nonzero. + +\fIpredicate\fR can also return 0 (indicating further input is required +to complete the token) or -1 (abort and return -1; use this if +\fIpredicate\fR wants to enfore a maximum message size or does timeout +handling or detects a malformed message). + +buffer_get_token_pred returns the number of bytes copied or -1 on +\fIerrno\fR (setting errno appropriately). +.SH "SEE ALSO" +buffer_init(3), buffer_feed(3), buffer_peek(3), buffer_seek(3), buffer(3) diff --git a/buffer/buffer_get_token_pred.c b/buffer/buffer_get_token_pred.c new file mode 100644 index 0000000..88c297d --- /dev/null +++ b/buffer/buffer_get_token_pred.c @@ -0,0 +1,16 @@ +#include "byte.h" +#include "buffer.h" +#include "scan.h" + +int buffer_get_token_pred(buffer* b,char* x,unsigned int len,string_predicate p) { + int blen; + + for (blen=0;blen int buffer_get_token_sa(buffer* b,stralloc* sa,const char* charset,unsigned int setlen) { - sa->len=0; for (;;) { char x; if (!stralloc_readyplus(sa,1)) goto nomem; diff --git a/buffer/buffer_get_token_sa_pred.3 b/buffer/buffer_get_token_sa_pred.3 new file mode 100644 index 0000000..77c24a8 --- /dev/null +++ b/buffer/buffer_get_token_sa_pred.3 @@ -0,0 +1,34 @@ +.TH buffer_get_token_sa_pred 3 +.SH NAME +buffer_get_token_sa_pred \- read token from buffer +.SH SYNTAX +.nf +.B #include +.B #include + +int \fBbuffer_get_token_sa_pred\fP(buffer* \fIb\fR,stralloc* \fIsa\fR, + int (*\fIpredicate\fR)(stralloc* * \fIsa\fR)); +.SH DESCRIPTION +buffer_get_token_sa_pred appends data from the \fIb\fR to \fIsa\fR until +\fIpredicate\fR(\fIsa\fR) returns 1 or -1. + +If \fIpredicate\fR returns 1 once a '\\n' was read, that new-line +character is still appended to \fIsa\fR -- use stralloc_chop or +stralloc_chomp to get rid of it. \fIpredicate\fR can also return 0 +(indicating further input is required to complete the token) or -1 +(abort and return -1; use this if \fIpredicate\fR wants to enfore a +maximum message size or does timeout handling or detects a malformed +message). + +If reading from the buffer or allocating memory fails, +buffer_get_token_sa_pred returns -1 and sets \fIerrno\fR appropriately. At +that point \fIsa\fR may already contain a partial token. + +On success, buffer_get_token_sa_pred returns 0. +.SH RATIONALE +buffer_get_token_sa_pred appends instead of overwriting so it can be +used on non-blocking sockets (these signal error and set \fIerrno\fR to +EAGAIN; in this case you can simply call buffer_get_token_sa again when +\fBselect\fR or \fBpoll\fR indicate more data is available). +.SH "SEE ALSO" +buffer_getline_sa(3), buffer_get_token(3), buffer(3) diff --git a/buffer/buffer_get_token_sa_pred.c b/buffer/buffer_get_token_sa_pred.c new file mode 100644 index 0000000..44ba216 --- /dev/null +++ b/buffer/buffer_get_token_sa_pred.c @@ -0,0 +1,21 @@ +#include "byte.h" +#include "stralloc.h" +#include "buffer.h" +#include + +int buffer_get_token_sa_pred(buffer* b,stralloc* sa,sa_predicate p) { + for (;;) { + char x; + if (!stralloc_readyplus(sa,1)) return -1; + switch (buffer_getc(b,&x)) { + case -1: return -1; + case 0: return 0; + } + stralloc_append(sa,&x); + switch (p(sa)) { + case -1: return -1; + case 0: break; + case 1: return 0; + } + } +} diff --git a/buffer/buffer_getline_sa.3 b/buffer/buffer_getline_sa.3 index 78ac382..6797f2d 100644 --- a/buffer/buffer_getline_sa.3 +++ b/buffer/buffer_getline_sa.3 @@ -8,8 +8,8 @@ buffer_getline_sa \- read line from buffer int \fBbuffer_getline_sa\fP(buffer* \fIb\fR,stralloc* \fIsa\fR); .SH DESCRIPTION -buffer_getline_sa copies data from the \fIb\fR to \fIsa\fR until a '\\n' -is found, overwriting the previous content of \fIsa\fR. The new-line +buffer_getline_sa appends data from the \fIb\fR to \fIsa\fR until a '\\n' +is found, NOT overwriting the previous content of \fIsa\fR. The new-line is also appended to \fIsa\fR. If reading from the buffer or allocating memory fails, @@ -18,5 +18,10 @@ that point \fIsa\fR may be empty or it may already contain a partial token. On success, buffer_getline_sa returns 0. +.SH RATIONALE +buffer_getline_sa appends instead of overwriting so it can be used on +non-blocking sockets (these signal error and set \fIerrno\fR to EAGAIN; +in this case you can simply call buffer_getline_sa again when +\fBselect\fR or \fBpoll\fR indicate more data is available). .SH "SEE ALSO" buffer_get_token_sa(3), buffer(3) diff --git a/stralloc.h b/stralloc.h index 0a83b79..f587ade 100644 --- a/stralloc.h +++ b/stralloc.h @@ -114,10 +114,26 @@ extern int stralloc_chomp(stralloc* sa); #ifdef BUFFER_H /* write stralloc to buffer */ extern int buffer_putsa(buffer* b,stralloc* sa); +/* these "read token" functions return 0 if the token was complete or + * EOF was hit or -1 on error. In contrast to the non-stralloc token + * functions, the separator is also put in the stralloc; use + * stralloc_chop or stralloc_chomp to get rid of it. */ + +/* WARNING! These token reading functions will not clear the stralloc! + * They _append_ the token to the contents of the stralloc. The idea is + * that this way these functions can be used on non-blocking sockets; + * when you get signalled EAGAIN, just call the functions again when new + * data is available. */ + /* read token from buffer to stralloc */ extern int buffer_get_token_sa(buffer* b,stralloc* sa,const char* charset,unsigned int setlen); /* read line from buffer to stralloc */ extern int buffer_getline_sa(buffer* b,stralloc* sa); + +typedef int (*sa_predicate)(stralloc* sa); + +/* like buffer_get_token_sa but the token ends when your predicate says so */ +extern int buffer_get_token_sa_pred(buffer* b,stralloc* sa,sa_predicate p); #endif #endif diff --git a/test/stralloc_buffer.c b/test/stralloc_buffer.c index 61d50d9..6b0dbb5 100644 --- a/test/stralloc_buffer.c +++ b/test/stralloc_buffer.c @@ -3,6 +3,8 @@ main() { static stralloc sa; + /* static makes sure sa is initialized and empty; + * use stralloc_init to initialize and stralloc_copys(&sa,"") to empty */ if (buffer_get_token_sa(buffer_0,&sa," \t\n",3)==0) { buffer_putsa(buffer_1,&sa); buffer_putnlflush(buffer_1);