Skip to content

Commit

Permalink
Parser: modify the zero-copy string API
Browse files Browse the repository at this point in the history
Instead of just one function (_cbor_value_get_string_chunk), we now have
_cbor_value_begin_string_iteration, _cbor_value_finish_string_iteration,
_cbor_value_get_string_chunk_size, and _cbor_value_get_string_chunk.

The "begin" function positions the pointer at the first chunk. That's
what makes "get_size" possible, since it doesn't need to check for any
state. The "finish" funcntion allows the caller to distinguish an error
parsing the string from an error parsing the next value.

Signed-off-by: Thiago Macieira <[email protected]>
  • Loading branch information
thiagomacieira committed Sep 3, 2021
1 parent 5159ec3 commit 8adc3cf
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 93 deletions.
41 changes: 41 additions & 0 deletions src/cbor.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ typedef enum CborError {
CborErrorIllegalType, /* type not allowed here */
CborErrorIllegalNumber,
CborErrorIllegalSimpleType, /* types of value less than 32 encoded in two bytes */
CborErrorNoMoreStringChunks,

/* parser errors in strict mode parsing only */
CborErrorUnknownSimpleType = 512,
Expand Down Expand Up @@ -292,11 +293,23 @@ enum CborParserGlobalFlags

enum CborParserIteratorFlags
{
/* used for all types, but not during string chunk iteration
* (values are static-asserted, don't change) */
CborIteratorFlag_IntegerValueIs64Bit = 0x01,
CborIteratorFlag_IntegerValueTooLarge = 0x02,

/* used only for CborIntegerType */
CborIteratorFlag_NegativeInteger = 0x04,

/* used only during string iteration */
CborIteratorFlag_BeforeFirstStringChunk = 0x04,
CborIteratorFlag_IteratingStringChunks = 0x08,

/* used for arrays, maps and strings, including during chunk iteration */
CborIteratorFlag_UnknownLength = 0x10,

/* used for maps, but must be kept for all types
* (ContainerIsMap value must be CborMapType - CborArrayType) */
CborIteratorFlag_ContainerIsMap = 0x20,
CborIteratorFlag_NextIsMapKey = 0x40
};
Expand Down Expand Up @@ -499,6 +512,34 @@ CBOR_INLINE_API CborError cbor_value_dup_byte_string(const CborValue *value, uin
return _cbor_value_dup_string(value, (void **)buffer, buflen, next);
}

CBOR_PRIVATE_API CborError _cbor_value_get_string_chunk_size(const CborValue *value, size_t *len);
CBOR_INLINE_API CborError cbor_value_get_string_chunk_size(const CborValue *value, size_t *len)
{
assert(value->flags & CborIteratorFlag_IteratingStringChunks);
return _cbor_value_get_string_chunk_size(value, len);
}

CBOR_INLINE_API bool cbor_value_string_iteration_at_end(const CborValue *value)
{
size_t dummy;
return cbor_value_get_string_chunk_size(value, &dummy) == CborErrorNoMoreStringChunks;
}

CBOR_PRIVATE_API CborError _cbor_value_begin_string_iteration(CborValue *value);
CBOR_INLINE_API CborError cbor_value_begin_string_iteration(CborValue *value)
{
assert(cbor_value_is_text_string(value) || cbor_value_is_byte_string(value));
assert(!(value->flags & CborIteratorFlag_IteratingStringChunks));
return _cbor_value_begin_string_iteration(value);
}

CBOR_PRIVATE_API CborError _cbor_value_finish_string_iteration(CborValue *value);
CBOR_INLINE_API CborError cbor_value_finish_string_iteration(CborValue *value)
{
assert(cbor_value_string_iteration_at_end(value));
return _cbor_value_finish_string_iteration(value);
}

CBOR_PRIVATE_API CborError _cbor_value_get_string_chunk(const CborValue *value, const void **bufferptr,
size_t *len, CborValue *next);
CBOR_INLINE_API CborError cbor_value_get_text_string_chunk(const CborValue *value, const char **bufferptr,
Expand Down
3 changes: 3 additions & 0 deletions src/cborerrorstrings.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ const char *cbor_error_string(CborError error)
case CborErrorIllegalSimpleType:
return _("illegal encoding of simple type smaller than 32");

case CborErrorNoMoreStringChunks:
return _("no more byte or text strings available");

case CborErrorUnknownSimpleType:
return _("unknown simple type");

Expand Down
2 changes: 0 additions & 2 deletions src/cborinternal_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,6 @@ enum {
BreakByte = (unsigned)Break | (SimpleTypesType << MajorTypeShift)
};

CBOR_INTERNAL_API CborError CBOR_INTERNAL_API_CC _cbor_value_prepare_string_iteration(CborValue *it);

static inline void copy_current_position(CborValue *dst, const CborValue *src)
{
/* This "if" is here for pedantry only: the two branches should perform
Expand Down
148 changes: 74 additions & 74 deletions src/cborparser.c
Original file line number Diff line number Diff line change
Expand Up @@ -966,103 +966,99 @@ CborError cbor_value_calculate_string_length(const CborValue *value, size_t *len
return _cbor_value_copy_string(value, NULL, len, NULL);
}

static inline void prepare_string_iteration(CborValue *it)
CborError _cbor_value_begin_string_iteration(CborValue *it)
{
it->flags |= CborIteratorFlag_IteratingStringChunks |
CborIteratorFlag_BeforeFirstStringChunk;
if (!cbor_value_is_length_known(it)) {
/* chunked string: we're before the first chunk;
* advance to the first chunk */
advance_bytes(it, 1);
it->flags |= CborIteratorFlag_IteratingStringChunks;
}

return CborNoError;
}

CborError CBOR_INTERNAL_API_CC _cbor_value_prepare_string_iteration(CborValue *it)
CborError _cbor_value_finish_string_iteration(CborValue *it)
{
cbor_assert((it->flags & CborIteratorFlag_IteratingStringChunks) == 0);
prepare_string_iteration(it);
if (!cbor_value_is_length_known(it))
advance_bytes(it, 1); /* skip the Break */

/* are we at the end? */
if (!can_read_bytes(it, 1))
return CborErrorUnexpectedEOF;
return CborNoError;
return preparse_next_value(it);
}

static CborError get_string_chunk(CborValue *it, const void **bufferptr, size_t *len)
static CborError get_string_chunk_size(const CborValue *it, size_t *offset, size_t *len)
{
/* Possible states:
* length known | iterating | meaning
* no | no | before the first chunk of a chunked string
* yes | no | at a non-chunked string
* no | yes | second or later chunk
* yes | yes | after a non-chunked string
*/
if (it->flags & CborIteratorFlag_IteratingStringChunks) {
/* already iterating */
if (cbor_value_is_length_known(it)) {
/* if the length was known, it wasn't chunked, so finish iteration */
goto last_chunk;
}
} else {
prepare_string_iteration(it);
}
uint8_t descriptor;
size_t bytesNeeded = 1;

if (cbor_value_is_length_known(it) && (it->flags & CborIteratorFlag_BeforeFirstStringChunk) == 0)
return CborErrorNoMoreStringChunks;

/* are we at the end? */
uint8_t descriptor;
if (!read_bytes(it, &descriptor, 0, 1))
return CborErrorUnexpectedEOF;

if (descriptor == BreakByte) {
/* last chunk */
advance_bytes(it, 1);
last_chunk:
*bufferptr = NULL;
*len = 0;
return preparse_next_value(it);
} else if ((descriptor & MajorTypeMask) == it->type) {
/* find the string length */
size_t bytesNeeded = 1;

descriptor &= SmallValueMask;
if (descriptor < Value8Bit) {
*len = descriptor;
} else if (unlikely(descriptor > Value64Bit)) {
return CborErrorIllegalNumber;
} else {
uint64_t val;
bytesNeeded = (size_t)(1 << (descriptor - Value8Bit));
if (!can_read_bytes(it, 1 + bytesNeeded))
return CborErrorUnexpectedEOF;

if (descriptor <= Value16Bit) {
if (descriptor == Value16Bit)
val = read_uint16(it, 1);
else
val = read_uint8(it, 1);
} else {
if (descriptor == Value32Bit)
val = read_uint32(it, 1);
else
val = read_uint64(it, 1);
}
if (descriptor == BreakByte)
return CborErrorNoMoreStringChunks;
if ((descriptor & MajorTypeMask) != it->type)
return CborErrorIllegalType;

*len = val;
if (*len != val)
return CborErrorDataTooLarge;
/* find the string length */
descriptor &= SmallValueMask;
if (descriptor < Value8Bit) {
*len = descriptor;
} else if (unlikely(descriptor > Value64Bit)) {
return CborErrorIllegalNumber;
} else {
uint64_t val;
bytesNeeded = (size_t)(1 << (descriptor - Value8Bit));
if (!can_read_bytes(it, 1 + bytesNeeded))
return CborErrorUnexpectedEOF;

++bytesNeeded;
if (descriptor <= Value16Bit) {
if (descriptor == Value16Bit)
val = read_uint16(it, 1);
else
val = read_uint8(it, 1);
} else {
if (descriptor == Value32Bit)
val = read_uint32(it, 1);
else
val = read_uint64(it, 1);
}

if (*len != (size_t)*len)
*len = val;
if (*len != val)
return CborErrorDataTooLarge;

CborError err = transfer_string(it, bufferptr, bytesNeeded, *len);
if (err)
return err;
} else {
return CborErrorIllegalType;
++bytesNeeded;
}

it->flags |= CborIteratorFlag_IteratingStringChunks;
*offset = bytesNeeded;
return CborNoError;
}

CborError _cbor_value_get_string_chunk_size(const CborValue *value, size_t *len)
{
size_t offset;
return get_string_chunk_size(value, &offset, len);
}

static CborError get_string_chunk(CborValue *it, const void **bufferptr, size_t *len)
{
size_t offset;
CborError err = get_string_chunk_size(it, &offset, len);
if (err)
return err;

/* we're good, transfer the string now */
err = transfer_string(it, bufferptr, offset, *len);
if (err)
return err;

/* we've iterated at least once */
it->flags &= ~CborIteratorFlag_BeforeFirstStringChunk;
return CborNoError;
}

Expand Down Expand Up @@ -1195,14 +1191,18 @@ static CborError iterate_string_chunks(const CborValue *value, char *buffer, siz
*next = *value;
*result = true;

err = _cbor_value_begin_string_iteration(next);
if (err)
return err;

while (1) {
size_t newTotal;
size_t chunkLen;
err = get_string_chunk(next, &ptr, &chunkLen);
if (err == CborErrorNoMoreStringChunks)
break;
if (err)
return err;
if (!ptr)
break;

if (unlikely(add_check_overflow(total, chunkLen, &newTotal)))
return CborErrorDataTooLarge;
Expand All @@ -1221,7 +1221,7 @@ static CborError iterate_string_chunks(const CborValue *value, char *buffer, siz
*result = !!func(buffer + total, nul, 1);
}
*buflen = total;
return CborNoError;
return _cbor_value_finish_string_iteration(next);
}

/**
Expand Down
15 changes: 7 additions & 8 deletions src/cborpretty.c
Original file line number Diff line number Diff line change
Expand Up @@ -407,25 +407,24 @@ static CborError value_to_pretty(CborStreamFunction stream, void *out, CborValue
open[1] = '\0';
}

if (showingFragments) {
if (showingFragments)
err = stream(out, "(_ ");
if (!err)
err = _cbor_value_prepare_string_iteration(it);
} else {
else
err = stream(out, "%s", open);
}

if (!err)
err = cbor_value_begin_string_iteration(it);
while (!err) {
if (showingFragments || indicator == NULL) {
/* any iteration, except the second for a non-chunked string */
indicator = resolve_indicator(it, flags);
}

err = _cbor_value_get_string_chunk(it, &ptr, &n, it);
if (err)
return err;
if (!ptr)
if (err == CborErrorNoMoreStringChunks) {
err = cbor_value_finish_string_iteration(it);
break;
}

if (!err && showingFragments)
err = stream(out, "%s%s", separator, open);
Expand Down
12 changes: 6 additions & 6 deletions src/cborvalidation.c
Original file line number Diff line number Diff line change
Expand Up @@ -543,24 +543,24 @@ static CborError validate_value(CborValue *it, uint32_t flags, int recursionLeft
size_t n = 0;
const void *ptr;

err = _cbor_value_prepare_string_iteration(it);
err = cbor_value_begin_string_iteration(it);
if (err)
return err;

while (1) {
CborValue next;
err = _cbor_value_get_string_chunk(it, &ptr, &n, &next);
if (err)
return err;
if (ptr) {
if (!err) {
err = validate_number(it, type, flags);
if (err)
return err;
}

*it = next;
if (!ptr)
break;
if (err == CborErrorNoMoreStringChunks)
return cbor_value_finish_string_iteration(it);
if (err)
return err;

if (type == CborTextStringType && flags & CborValidateUtf8) {
err = validate_utf8_string(ptr, n);
Expand Down
11 changes: 8 additions & 3 deletions tests/parser/tst_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -869,18 +869,23 @@ static void chunkedStringTest(const QByteArray &data, const QString &concatenate

CborValue copy = value;

err = cbor_value_begin_string_iteration(&value);
QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\"");
forever {
QString decoded;
err = parseOneChunk(&value, &decoded);
QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\"");

if (decoded.isEmpty())
if (err == CborErrorNoMoreStringChunks)
break; // last chunk

QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\"");

QVERIFY2(!chunks.isEmpty(), "Too many chunks");
QString expected = chunks.takeFirst();
QCOMPARE(decoded, expected);
}

err = cbor_value_finish_string_iteration(&value);
QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\"");
QVERIFY2(chunks.isEmpty(), "Too few chunks");

// compare to the concatenated data
Expand Down

0 comments on commit 8adc3cf

Please sign in to comment.