parent
ad6c2d9ce7
commit
d17c2d1012
@ -0,0 +1,18 @@
|
|||||||
|
.TH fmt_asn1derlength 3
|
||||||
|
.SH NAME
|
||||||
|
fmt_asn1derlength \- encode unsigned integer like ASN.1 DER length
|
||||||
|
.SH SYNTAX
|
||||||
|
.B #include <fmt.h>
|
||||||
|
|
||||||
|
size_t \fBfmt_asn1derlength\fP(char *\fIdest\fR,unsigned long long \fIsource\fR);
|
||||||
|
.SH DESCRIPTION
|
||||||
|
fmt_asn1derlength encodes an unsigned integer using the UTF-8 rules. This
|
||||||
|
can take from 1 byte (0-0x7f) up to sizeof(source)+1 bytes.
|
||||||
|
|
||||||
|
If \fIdest\fR equals FMT_LEN (i.e. is NULL), fmt_asn1derlength returns the
|
||||||
|
number of bytes it would have written.
|
||||||
|
|
||||||
|
For convenience, fmt.h defines the integer FMT_ASN1LENGTH to be big
|
||||||
|
enough to contain every possible fmt_asn1derlength output.
|
||||||
|
.SH "SEE ALSO"
|
||||||
|
scan_asn1derlength(3)
|
@ -0,0 +1,25 @@
|
|||||||
|
#include "fmt.h"
|
||||||
|
|
||||||
|
/* write int in least amount of bytes, return number of bytes */
|
||||||
|
/* as used in ASN.1 length */
|
||||||
|
size_t fmt_asn1derlength(char* dest,unsigned long long l) {
|
||||||
|
/* encoding is either l%128 or (0x80+number of bytes,bytes) */
|
||||||
|
size_t needed=(sizeof l),i;
|
||||||
|
if (l<128) {
|
||||||
|
if (dest) *dest=l&0x7f;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
for (i=1; i<needed; ++i)
|
||||||
|
if (!(l>>(i*8)))
|
||||||
|
break;
|
||||||
|
if (dest) {
|
||||||
|
int j=i;
|
||||||
|
*dest=0x80+i; ++dest;
|
||||||
|
while (j) {
|
||||||
|
--j;
|
||||||
|
*dest=((l>>(j*8))&0xff);
|
||||||
|
++dest;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i+1;
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
.TH fmt_utf8 3
|
||||||
|
.SH NAME
|
||||||
|
fmt_utf8 \- encode 31-bit unsigned integer using UTF-8 rules
|
||||||
|
.SH SYNTAX
|
||||||
|
.B #include <fmt.h>
|
||||||
|
|
||||||
|
size_t \fBfmt_utf8\fP(char *\fIdest\fR,uint32_t \fIsource\fR);
|
||||||
|
.SH DESCRIPTION
|
||||||
|
fmt_utf8 encodes a 31-bit unsigned integer using the UTF-8 rules. This
|
||||||
|
can take from 1 byte (0-0x7f) up to 5 bytes (0x4000000-0x7fffffff).
|
||||||
|
Values larger than 0x7fffffff cannot be represented in this encoding.
|
||||||
|
|
||||||
|
If \fIdest\fR equals FMT_LEN (i.e. is NULL), fmt_utf8 returns the
|
||||||
|
number of bytes it would have written.
|
||||||
|
|
||||||
|
For convenience, fmt.h defines the integer FMT_UTF8 to be big enough to
|
||||||
|
contain every possible fmt_utf8 output.
|
||||||
|
.SH "SEE ALSO"
|
||||||
|
scan_utf8(3)
|
@ -0,0 +1,26 @@
|
|||||||
|
#include "fmt.h"
|
||||||
|
|
||||||
|
size_t fmt_utf8(char *dest,uint32_t n) {
|
||||||
|
size_t i,j;
|
||||||
|
if (n<=0x7f) {
|
||||||
|
if (dest) *dest=n;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
for (i=0x3f,j=1; i<0x7fffffff; i=(i<<5)|0x1f, ++j) {
|
||||||
|
if (i>=n) {
|
||||||
|
--j;
|
||||||
|
if (dest) {
|
||||||
|
size_t k=j*6;
|
||||||
|
*dest++=((char)0xc0 >> (j-1)) | (n >> k);
|
||||||
|
while (k) {
|
||||||
|
*dest++=0x80 | ((n >> (k-6)) & 0x3f);
|
||||||
|
k-=6;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return j+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* we were asked to encode a value that cannot be encoded */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
|||||||
|
.TH scan_asn1length 3
|
||||||
|
.SH NAME
|
||||||
|
scan_asn1length \- decode an unsigned integer from ASN.1 DER length encoding
|
||||||
|
.SH SYNTAX
|
||||||
|
.B #include <scan.h>
|
||||||
|
|
||||||
|
size_t \fBscan_asn1length\fP(const char *\fIsrc\fR,size_t \fIlen\fR,unsigned long long *\fIdest\fR);
|
||||||
|
.SH DESCRIPTION
|
||||||
|
scan_asn1length decodes an unsigned integer in ASN.1 DER length encoding
|
||||||
|
from a memory area holding binary data. It writes the decode value in
|
||||||
|
\fIdest\fR and returns the number of bytes it read from \fIsrc\fR.
|
||||||
|
|
||||||
|
scan_asn1length never reads more than \fIlen\fR bytes from \fIsrc\fR. If the
|
||||||
|
sequence is longer than that, or the memory area contains an invalid
|
||||||
|
sequence, scan_asn1length returns 0 and does not touch \fIdest\fR.
|
||||||
|
|
||||||
|
The length of the longest ASN.1 DER length sequence is 128 bytes. In
|
||||||
|
practice the largest sequence is sizeof(*dest)+1.
|
||||||
|
.SH "SEE ALSO"
|
||||||
|
fmt_asn1length(3)
|
@ -0,0 +1,25 @@
|
|||||||
|
#include "scan.h"
|
||||||
|
|
||||||
|
size_t scan_asn1derlength(const char* src,size_t len,unsigned long long* length) {
|
||||||
|
const char* orig=src;
|
||||||
|
const char* max=orig+len;
|
||||||
|
if (src>=max) return 0;
|
||||||
|
/* If the highest bit of the first byte is clear, the byte is the length.
|
||||||
|
* Otherwise the next n bytes are the length (n being the lower 7 bits) */
|
||||||
|
if (*src&0x80) {
|
||||||
|
int chars=*src&0x7f;
|
||||||
|
unsigned long long l=0;
|
||||||
|
while (chars>0) {
|
||||||
|
if (++src>=max) return 0;
|
||||||
|
if (l>(((unsigned long long)-1)>>8)) return 0; /* catch integer overflow */
|
||||||
|
l=l*256+(unsigned char)*src;
|
||||||
|
--chars;
|
||||||
|
}
|
||||||
|
*length=l;
|
||||||
|
} else
|
||||||
|
*length=*src&0x7f;
|
||||||
|
src++;
|
||||||
|
if (src+*length>max) return 0; /* catch integer overflow */
|
||||||
|
if ((uintptr_t)src+*length<(uintptr_t)src) return 0; /* gcc 4.1 removes this check without the cast to uintptr_t */
|
||||||
|
return src-orig;
|
||||||
|
}
|
@ -0,0 +1,21 @@
|
|||||||
|
.TH scan_utf8 3
|
||||||
|
.SH NAME
|
||||||
|
scan_utf8 \- decode an unsigned integer from UTF-8 encoding
|
||||||
|
.SH SYNTAX
|
||||||
|
.B #include <scan.h>
|
||||||
|
|
||||||
|
size_t \fBscan_utf8\fP(const char *\fIsrc\fR,size_t \fIlen\fR,uint32_t *\fIdest\fR);
|
||||||
|
.SH DESCRIPTION
|
||||||
|
scan_utf8 decodes an unsigned integer in UTF-8 encoding from a memory
|
||||||
|
area holding binary data. It writes the decode value in \fIdest\fR and
|
||||||
|
returns the number of bytes it read from \fIsrc\fR.
|
||||||
|
|
||||||
|
scan_utf8 never reads more than \fIlen\fR bytes from \fIsrc\fR. If the
|
||||||
|
sequence is longer than that, or the memory area contains an invalid
|
||||||
|
sequence, scan_utf8 returns 0 and does not touch \fIdest\fR.
|
||||||
|
|
||||||
|
The length of the longest UTF-8 sequence is 5. If the buffer is longer
|
||||||
|
than that, and scan_utf8 fails, then the data was not a valid UTF-8
|
||||||
|
encoded sequence.
|
||||||
|
.SH "SEE ALSO"
|
||||||
|
fmt_utf8(3)
|
@ -0,0 +1,51 @@
|
|||||||
|
#include "fmt.h"
|
||||||
|
|
||||||
|
size_t scan_utf8(const char* in,size_t len,uint32_t* num) {
|
||||||
|
uint32_t i,k,m;
|
||||||
|
const char* orig=in;
|
||||||
|
if (len==0) return 0;
|
||||||
|
i=(*(unsigned char*)in++); /* grab first byte */
|
||||||
|
if (i>=0xfe || /* 0xfe and 0xff are invalid encodings in utf-8 for the first byte */
|
||||||
|
(i&0xc0)==0x80) return 0; /* first bits being 10 marks continuation chars, invalid sequence for first byte */
|
||||||
|
for (k=0; i&0x80; i<<=1, ++k); /* count leading 1 bits */
|
||||||
|
if (!k) {
|
||||||
|
if (num) *num=i;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (k>len) return 0;
|
||||||
|
i=(i&0xff)>>k; /* mask the leading 1 bits */
|
||||||
|
/* The next part is a little tricky.
|
||||||
|
* UTF-8 says that the encoder has to choose the most efficient
|
||||||
|
* encoding, and the decoder has to reject other encodings. The
|
||||||
|
* background is that attackers encoded '/' not as 0x2f but as 0xc0
|
||||||
|
* 0xaf, and that evaded bad security checks just scan for the '/'
|
||||||
|
* byte in pathnames.
|
||||||
|
* At this point k contains the number of bytes, so k-1 is the number
|
||||||
|
* of continuation bytes. For each additional continuation byte, we
|
||||||
|
* gain 6 bits of storage space, but we lose one in the signalling in
|
||||||
|
* the initial byte. So we have 6 + (k-1) * 5 bits total storage
|
||||||
|
* space for this encoding. The minimum value for k bytes is the
|
||||||
|
* maximum number for k-1 bytes plus 1. If the previous encoding has
|
||||||
|
* 11 bits, its maximum value is 11 1-bits or 0x7ff, and the minimum
|
||||||
|
* value we are looking for is 0x800 or 1<<11. For 2 bytes, UTF-8 can
|
||||||
|
* encode 11 bits, after that each additional byte gains 5 more bits.
|
||||||
|
* So for k>2, we want
|
||||||
|
* 1 << (11+(k-3)*5)
|
||||||
|
* or optimized to get rid of the -3
|
||||||
|
* 1 << (k*5-4)
|
||||||
|
* but for k==2 the delta is 4 bits (not 5), so we want
|
||||||
|
* 1 << 7
|
||||||
|
* abusing the fact that a boolean expression evaluates to 0 or 1, the
|
||||||
|
* expression can be written as
|
||||||
|
* 1 << (k*5-4+(k==2))
|
||||||
|
*/
|
||||||
|
m=(1<<(k*5-4+(k==2)));
|
||||||
|
while (k>1) {
|
||||||
|
i=(i<<6) | ((*in++)&0x3f);
|
||||||
|
--k;
|
||||||
|
}
|
||||||
|
if (i<m) return 0; /* if the encoded value was less than m, reject */
|
||||||
|
if (num) *num=i;
|
||||||
|
return in-orig;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue