Skip to content

Commit

Permalink
Add support for 'xml' keyword in liblognorm.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jérémie Jourdin authored and frikilax committed Nov 15, 2021
1 parent 1e18f60 commit 3d0260d
Show file tree
Hide file tree
Showing 9 changed files with 218 additions and 3 deletions.
23 changes: 23 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,28 @@ else
fi
AC_SUBST(FEATURE_REGEXP)

# XML parsing
AC_ARG_ENABLE(xml,
[AS_HELP_STRING([--enable-xml],[Enable XML parsing @<:@default=no@:>@])],
[case "${enableval}" in
yes) enable_xml="yes" ;;
no) enable_xml="no" ;;
*) AC_MSG_ERROR(bad value ${enableval} for --enable-xml) ;;
esac],
[enable_xml="no"]
)
AM_CONDITIONAL(ENABLE_XML, test x$enable_xml = xyes)
if test "$enable_xml" = "yes"; then
PKG_CHECK_MODULES(LIBXML2, libxml2,,
[PKG_CHECK_MODULES(LIBXML2, libxml-2.0,,)]
)
AC_DEFINE(FEATURE_XML, 1, [XML parsing support enabled.])
FEATURE_XML=1
else
FEATURE_XML=0
fi
AC_SUBST(FEATURE_XML)

# debug mode settings
AC_ARG_ENABLE(debug,
[AS_HELP_STRING([--enable-debug],[Enable debug mode @<:@default=no@:>@])],
Expand Down Expand Up @@ -189,6 +211,7 @@ echo "*****************************************************"
echo "liblognorm will be compiled with the following settings:"
echo
echo "Regex enabled: $enable_regexp"
echo "XML enabled: $enable_xml"
echo "Advanced Statistics enabled: $enable_advstats"
echo "Testbench enabled: $enable_testbench"
echo "Valgrind enabled: $enable_valgrind"
Expand Down
4 changes: 2 additions & 2 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ liblognorm_la_SOURCES += \
v1_ptree.c \
v1_samp.c

liblognorm_la_CPPFLAGS = $(JSON_C_CFLAGS) $(WARN_CFLAGS) $(LIBESTR_CFLAGS) $(PCRE_CFLAGS)
liblognorm_la_LIBADD = $(rt_libs) $(JSON_C_LIBS) $(LIBESTR_LIBS) $(PCRE_LIBS) -lestr
liblognorm_la_CPPFLAGS = $(JSON_C_CFLAGS) $(WARN_CFLAGS) $(LIBESTR_CFLAGS) $(PCRE_CFLAGS) $(LIBXML2_CFLAGS)
liblognorm_la_LIBADD = $(rt_libs) $(JSON_C_LIBS) $(LIBESTR_LIBS) $(PCRE_LIBS) $(LIBXML2_LIBS) -lestr
# info on version-info:
# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
# Note: v2 now starts at version 5, as v1 previously also had 4
Expand Down
105 changes: 105 additions & 0 deletions src/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@
#include <errno.h>
#endif

#ifdef FEATURE_XML
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
#endif


/* how should output values be formatted? */
enum FMT_MODE {
Expand Down Expand Up @@ -75,6 +80,41 @@ hParseInt(const unsigned char **buf, size_t *lenBuf)
return i;
}


#ifdef FEATURE_XML
/* Credits to https://github.com/katie-snow/xml2json-c
This code is under GPL-3.0 License
*/
static inline void
xml2jsonc_convert_elements(xmlNode *anode, json_object *jobj)
{
xmlNode *cur_node = NULL;
json_object *cur_jobj = NULL;
json_object *cur_jstr = NULL;

for (cur_node = anode; cur_node; cur_node = cur_node->next)
{
if (cur_node->type == XML_ELEMENT_NODE)
{
if (xmlChildElementCount(cur_node) == 0)
{
/* JSON string object */
cur_jobj = json_object_new_object();
cur_jstr = json_object_new_string((const char *)xmlNodeGetContent(cur_node));
json_object_object_add(jobj, (const char *)cur_node->name, cur_jstr);
}
else
{
/* JSON object */
cur_jobj = json_object_new_object();
json_object_object_add(jobj, (const char *)cur_node->name, json_object_get(cur_jobj));
}
}
xml2jsonc_convert_elements(cur_node->children, cur_jobj);
}
}
#endif /* #ifdef FEATURE_XML */

/* parser _parse interface
*
* All parsers receive
Expand Down Expand Up @@ -2325,6 +2365,71 @@ PARSER_Parse(v2IPTables)
return r;
}

#ifdef FEATURE_XML
/**
* Parse XML. This parser tries to find XML data inside a message.
* If it finds valid XML, it will extract it.
*
* Note: The XML Parser expects a string that begins with '<' and
* ends with '>'. whitespace or any other character at the
* beginning or at the end of the string will cause a parse failure
*
* Note: Is there is extra content after the XML content
* the parser will fail. A hack consist of finding the
* last '>' in the string and ignore the rest.
*
* added 2021-02-01 by [email protected]
*/
PARSER_Parse(XML)
xmlDocPtr doc = NULL;
xmlNodePtr root_element = NULL;

/* Find the last occurence of '>' in the string */
char * pch;
pch=strrchr((const char *) npb->str + *offs, '>');

/* Truncate the string after the last occurence of '>' */
int newLen = pch - (npb->str + *offs) + 1;
char *cstr = strndup(npb->str + *offs, newLen);
CHKN(cstr);

doc=xmlParseDoc((xmlChar*) cstr);
free(cstr);

/* Invalid XML string */
if (doc == NULL) {
goto done;
}

/* Now convert XML document into JSON document */
root_element = xmlDocGetRootElement(doc);
json_object *json = NULL;
json = json_object_new_object();
xml2jsonc_convert_elements(root_element, json);

if(json == NULL)
goto done;

/* parsing OK */
*parsed = newLen ;
r = 0;

if(value == NULL) {
json_object_put(json);
} else {
*value = json;
}

done:
if(doc != NULL)
xmlFreeDoc(doc);
xmlCleanupParser();
return r;
}
#endif /* #ifdef FEATURE_XML */



/**
* Parse JSON. This parser tries to find JSON data inside a message.
* If it finds valid JSON, it will extract it. Extra data after the
Expand Down
3 changes: 3 additions & 0 deletions src/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ PARSERDEF_NO_DATA(MAC48);
PARSERDEF_NO_DATA(CEF);
PARSERDEF(CheckpointLEA);
PARSERDEF(NameValue);
#ifdef FEATURE_XML
PARSERDEF_NO_DATA(XML);
#endif

#undef PARSERDEF_NO_DATA

Expand Down
5 changes: 4 additions & 1 deletion src/pdag.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,10 @@ static struct ln_parser_info parser_lookup_table[] = {
PARSER_ENTRY("string-to", StringTo, 32),
PARSER_ENTRY("char-to", CharTo, 32),
PARSER_ENTRY("char-sep", CharSeparated, 32),
PARSER_ENTRY("string", String, 32)
PARSER_ENTRY("string", String, 32),
#ifdef FEATURE_XML
PARSER_ENTRY_NO_DATA("xml", XML, 4),
#endif
};
#define NPARSERS (sizeof(parser_lookup_table)/sizeof(struct ln_parser_info))
#define DFLT_USR_PARSER_PRIO 30000 /**< default priority if user has not specified it */
Expand Down
1 change: 1 addition & 0 deletions src/v1_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ hParseInt(const unsigned char **buf, size_t *lenBuf)
return i;
}


/* parsers for the primitive types
*
* All parsers receive
Expand Down
8 changes: 8 additions & 0 deletions tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ REGEXP_TESTS = \
field_tokenized_with_regex.sh \
field_regex_while_regex_support_is_disabled.sh

XML_TESTS = \
field_xml.sh \
field_xml_jsoncnf.sh

EXTRA_DIST = exec.sh \
$(TESTS_SHELLSCRIPTS) \
$(REGEXP_TESTS) \
Expand All @@ -167,3 +171,7 @@ EXTRA_DIST = exec.sh \
if ENABLE_REGEXP
TESTS += $(REGEXP_TESTS)
endif

if ENABLE_XML
TESTS += $(XML_TESTS)
endif
36 changes: 36 additions & 0 deletions tests/field_xml.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash
# added 2021-11-14 by Theo Bertin
# This file is part of the liblognorm project, released under ASL 2.0
. $srcdir/exec.sh

test_def $0 "XML field"
add_rule 'version=2'
add_rule 'rule=:%field:xml%'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note>'
assert_output_json_eq '{ "field": { "note": "This is a simple note"} }'

execute '<?xml version="1.0" encoding="UTF-8"?><note><one>first note</one><two>second note</two></note>'
assert_output_json_eq '{ "field": { "note": { "one": "first note", "two": "second note" } } }'

# execute '@cee: {"f1": "1", "f2": 2}'
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'

# execute '@cee: {"f1": "1", "f2": 2}'
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'

#
# Things that MUST NOT work
#
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note> ' # note the trailing space
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note<\/note> ", "unparsed-data": " " }'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note'
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note" }'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note2>'
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>" }'


cleanup_tmp_files

36 changes: 36 additions & 0 deletions tests/field_xml_jsoncnf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash
# added 2021-11-14 by Theo Bertin
# This file is part of the liblognorm project, released under ASL 2.0
. $srcdir/exec.sh

test_def $0 "XML field"
add_rule 'version=2'
add_rule 'rule=:%{"name":"field", "type":"xml"}%'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note>'
assert_output_json_eq '{ "field": { "note": "This is a simple note"} }'

execute '<?xml version="1.0" encoding="UTF-8"?><note><one>first note</one><two>second note</two></note>'
assert_output_json_eq '{ "field": { "note": { "one": "first note", "two": "second note" } } }'

# execute '@cee: {"f1": "1", "f2": 2}'
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'

# execute '@cee: {"f1": "1", "f2": 2}'
# assert_output_json_eq '{ "field": { "f1": "1", "f2": 2 } }'

#
# Things that MUST NOT work
#
execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note> ' # note the trailing space
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note<\/note> ", "unparsed-data": " " }'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note'
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note" }'

execute '<?xml version="1.0" encoding="UTF-8"?><note>This is a simple note</note2>'
assert_output_json_eq '{ "originalmsg": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>", "unparsed-data": "<?xml version=\"1.0\" encoding=\"UTF-8\"?><note>This is a simple note</note2>" }'


cleanup_tmp_files

0 comments on commit 3d0260d

Please sign in to comment.