Skip to content

Commit

Permalink
xml: Initial xml reader / writer (#1265)
Browse files Browse the repository at this point in the history
* xml: Library skeleton

* xml: Add create / destroy utils

* xml: Add attribute storage

* xml: Sketch out api

* xml: Add node storage

* xml: Add string allocator

* xml: Added basic nodes to doc

* xml: Add attribute tests

* xml: Fix bug in child link

* xml: Test child elements

* xml: Add error enum

* xml: Dummy lexer

* xml: Lex start / end tags

* xml: Lex close tag

* xml: Lex TagEndClose

* xml: Lex equal token

* xml: Minor renaming

* xml: Lex strings

* xml: Lex name tokens

* xml: Skip whitespace

* xml: Lex comments

* xml: Lex decl start / close

* xml: Support lexing content

* xml: Add compat comment

* xml: Support singly quoted strings

* xml: Support references

* xml: Fix consuming reference content

* xml: Support character references

* xml: Add result struct

* xml: Define the read api

* xml: Parse decl

* xml: Make decl optional

* xml: Initial parser

* xml: Refactor parsing to be single pass

* xml: Add missing token error

* xml: Add xml_error_str util

* xml: Add unlikely annotation

* xml: Add equal util

* xml: Minor cleanup

* xml: Add equate tests

* wip

* xml: Fix bug in lexer

* xml: Fix read bugs

* lex: Trim whitespace on comments

* xml: Limit max element depth

* json: Refactor json depth check

* xml: Rename truncated error

* xml: Fix invalid tag start lexing

* xml: Fix name lexing

* xml: Fix incorrect content error

* xml: Fix invalid decl error reporting

* xml: Custom error for invalid attribute value

* xml: Initial read tests

* xml: Fix incorrect peeks in xml_process_content

* xml: Test escapes

* xml: Add complex elements test

* xml: Add write api

* xml: Basic xml writer

* xml: Fix write bugs

* xml: Add basic write test

* xml: Add attribute tests

* xml: Support single-line node content

* xml: Fix single child writing

* xml: Test nested nodes

* xml: Add escaping test

* xml: Implement content escaping

* xml: Fix not handling newlines

* xml: Don't escape quotes by default

* core: Improve scratch format limit handling

* xml: Support newlines in content

* xml: Increase xml string chunk size

* xml: Basic lexer optimizations

* core: Add stringtable_reset util

* xml: Deduplicate keys

* xml: Fix gcc compat
  • Loading branch information
BastianBlokland authored Feb 9, 2025
1 parent c0ba8e3 commit e3af444
Show file tree
Hide file tree
Showing 22 changed files with 2,057 additions and 6 deletions.
1 change: 1 addition & 0 deletions libs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ add_subdirectory(snd)
add_subdirectory(trace)
add_subdirectory(ui)
add_subdirectory(vfx)
add_subdirectory(xml)
1 change: 1 addition & 0 deletions libs/core/include/core_stringtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ StringTable* stringtable_create(Allocator*);
* Destroy a StringTable instance.
*/
void stringtable_destroy(StringTable*);
void stringtable_reset(StringTable*);

/**
* Lookup the amount of strings in the given StringTable.
Expand Down
5 changes: 3 additions & 2 deletions libs/core/src/format.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
#include "core_path.h"
#include "core_time.h"

#define fmt_txt_len_max (8 * usize_kibibyte)
#define fmt_txt_len_max (4 * usize_kibibyte)
#define fmt_txt_scratch_alloc (16 * usize_kibibyte)

typedef enum {
FormatReplOptKind_None = 0,
Expand Down Expand Up @@ -156,7 +157,7 @@ void format_write_formatted(DynString* str, String format, const FormatArg* argH
}

String format_write_formatted_scratch(String format, const FormatArg* args) {
Mem scratchMem = alloc_alloc(g_allocScratch, fmt_txt_len_max, 1);
Mem scratchMem = alloc_alloc(g_allocScratch, fmt_txt_scratch_alloc, 1);
DynString str = dynstring_create_over(scratchMem);

format_write_formatted(&str, format, args);
Expand Down
7 changes: 7 additions & 0 deletions libs/core/src/stringtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,13 @@ void stringtable_destroy(StringTable* table) {
alloc_free_t(table->alloc, table);
}

void stringtable_reset(StringTable* table) {
mem_set(mem_create(table->slots, sizeof(StringTableSlot) * table->slotCount), 0);
table->slotCountUsed = 0;

alloc_reset(table->dataAlloc);
}

u32 stringtable_count(const StringTable* table) {
StringTable* tableMutable = (StringTable*)table;
u32 res;
Expand Down
8 changes: 4 additions & 4 deletions libs/json/src/read.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
typedef struct {
JsonDoc* doc;
JsonReadFlags flags;
u32 depth;
} JsonReadState;

#define json_err(_ERR_) \
Expand Down Expand Up @@ -163,10 +164,9 @@ static String json_read_object(JsonReadState* state, String input, JsonResult* r
static String json_read_with_start_token(
JsonReadState* state, String input, JsonToken startToken, JsonResult* res) {

static THREAD_LOCAL u32 depth;
if (++depth > json_depth_max) {
if (++state->depth > json_depth_max) {
*res = json_err(JsonError_MaximumDepthExceeded);
--depth;
--state->depth;
return input;
}

Expand Down Expand Up @@ -206,7 +206,7 @@ static String json_read_with_start_token(
break;
}

--depth;
--state->depth;
return input;
}

Expand Down
27 changes: 27 additions & 0 deletions libs/xml/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# --------------------------------------------------------------------------------------------------
# Xml library cmake file.
# --------------------------------------------------------------------------------------------------

message(STATUS "> library: xml")

add_library(lib_xml STATIC
src/doc.c
src/eq.c
src/lex.c
src/read.c
src/write.c
)
target_include_directories(lib_xml PUBLIC include)
target_link_libraries(lib_xml PUBLIC lib_core)

add_executable(test_lib_xml
test/config.c
test/test_doc.c
test/test_eq.c
test/test_read.c
test/test_write.c
)
target_link_libraries(test_lib_xml PRIVATE lib_app_check lib_xml)

configure_test(test_lib_xml SHORT_NAME xml)
configure_debuggable(test_lib_xml)
13 changes: 13 additions & 0 deletions libs/xml/include/xml.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once
#include "core.h"

/**
* Forward header for the xml library.
*/

typedef enum eXmlError XmlError;
typedef enum eXmlResultType XmlResultType;
typedef struct sXmlAttribute XmlAttribute;
typedef struct sXmlDoc XmlDoc;
typedef struct sXmlResult XmlResult;
typedef u32 XmlNode;
83 changes: 83 additions & 0 deletions libs/xml/include/xml_doc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#pragma once
#include "core_string.h"
#include "xml.h"

/**
* Definition for a Xml Document.
* Supports a subset of Xml 1.0 (https://www.w3.org/TR/2008/REC-xml-20081126/).
*/
typedef struct sXmlDoc XmlDoc;

typedef enum eXmlType {
XmlType_Element,
XmlType_Attribute,
XmlType_Text,
XmlType_Comment,

XmlType_Count,
} XmlType;

/**
* Handle to a Xml node.
* 'sentinel_u32' used as a sentinel.
*/
typedef u32 XmlNode;

/**
* Create a new Xml document.
* NOTE: 'nodeCapacity' is only the initial capacity, more space is automatically allocated when
* required. Capacity of 0 is legal and will allocate memory when the first node is added.
*
* Should be destroyed using 'xml_destroy()'.
*/
XmlDoc* xml_create(Allocator*, usize nodeCapacity);

/**
* Destroy a Xml document.
*/
void xml_destroy(XmlDoc*);

/**
* Clear a Xml document.
* NOTE: After clearing all previously added Xml nodes are invalided.
*/
void xml_clear(XmlDoc*);

/**
* Add a new element node to the document.
* Optionally provide a parent element node, provide 'sentinel_u32' to make a root element.
*/
XmlNode xml_add_elem(XmlDoc*, XmlNode parent, String name);

/**
* Add a new attribute node to an element node.
* Returns 'sentinel_u32' when the parent element already had an attribute with the same name.
*/
XmlNode xml_add_attr(XmlDoc*, XmlNode parent, String name, String value);

/**
* Add a new text node to an element node.
*/
XmlNode xml_add_text(XmlDoc*, XmlNode parent, String value);

/**
* Add a new comment node to an element node.
*/
XmlNode xml_add_comment(XmlDoc*, XmlNode parent, String value);

/**
* Query node data.
*/
XmlType xml_type(const XmlDoc*, XmlNode);
String xml_name(const XmlDoc*, XmlNode);
String xml_value(const XmlDoc*, XmlNode);
bool xml_attr_has(const XmlDoc*, XmlNode node, String name);
String xml_attr_get(const XmlDoc*, XmlNode node, String name);
XmlNode xml_first_child(const XmlDoc*, XmlNode);
XmlNode xml_first_attr(const XmlDoc*, XmlNode);

/**
* Retrieve the next (sibling) node.
* Returns 'sentinel_u32' when there are no more sibling nodes.
*/
XmlNode xml_next(const XmlDoc*, XmlNode node);
7 changes: 7 additions & 0 deletions libs/xml/include/xml_eq.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#pragma once
#include "xml.h"

/**
* Check if two nodes are equal.
*/
bool xml_eq(XmlDoc*, XmlNode x, XmlNode y);
59 changes: 59 additions & 0 deletions libs/xml/include/xml_read.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#pragma once
#include "xml.h"

typedef enum eXmlResultType {
XmlResultType_Success,
XmlResultType_Fail,
} XmlResultType;

typedef enum eXmlError {
XmlError_InvalidDeclStart,
XmlError_InvalidTagStart,
XmlError_InvalidTagEnd,
XmlError_InvalidChar,
XmlError_InvalidCharInContent,
XmlError_InvalidUtf8,
XmlError_InvalidCommentTerminator,
XmlError_InvalidReference,
XmlError_InvalidDecl,
XmlError_InvalidAttribute,
XmlError_InvalidAttributeValue,
XmlError_UnterminatedString,
XmlError_UnterminatedComment,
XmlError_ContentTooLong,
XmlError_Truncated,
XmlError_UnexpectedToken,
XmlError_MismatchedEndTag,
XmlError_MaximumDepthExceeded,

XmlError_Count,
} XmlError;

/**
* Result of parsing a Xml node.
* If 'type == XmlResultType_Success' then 'node' contains a node in the provided XmlDoc.
* else 'error' contains the reason why parsing failed.
*/
typedef struct sXmlResult {
XmlResultType type;
union {
XmlNode node;
XmlError error;
};
} XmlResult;

/**
* Return a textual representation of the given XmlError.
*/
String xml_error_str(XmlError);

/**
* Read a Xml node.
* Supports a subset of Xml 1.0 (https://www.w3.org/TR/2008/REC-xml-20081126/).
*
* Returns the remaining input.
* The result is written to the output pointer.
*
* Pre-condition: res != null.
*/
String xml_read(XmlDoc*, String, XmlResult* res);
33 changes: 33 additions & 0 deletions libs/xml/include/xml_write.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#pragma once
#include "core_string.h"
#include "xml.h"

typedef enum {
XmlWriteFlags_None = 0,
XmlWriteFlags_SkipDeclaration = 1 << 0,
} XmlWriteFlags;

/**
* Formatting options for writing a Xml node.
*/
typedef struct {
XmlWriteFlags flags;
String indent;
String newline;
} XmlWriteOpts;

/**
* Formatting options for writing a Xml node.
*/
#define xml_write_opts(...) \
((XmlWriteOpts){ \
.flags = XmlWriteFlags_None, \
.indent = string_lit(" "), \
.newline = string_lit("\n"), \
__VA_ARGS__})

/**
* Write a Xml node.
* Supports a subset of Xml 1.0 (https://www.w3.org/TR/2008/REC-xml-20081126/).
*/
void xml_write(DynString*, const XmlDoc*, XmlNode, const XmlWriteOpts*);
Loading

0 comments on commit e3af444

Please sign in to comment.