diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9909120e..74d5e1c1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,7 +6,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ windows-2019, macos-10.15, ubuntu-20.04 ] + os: [ windows-2019, macos-latest, ubuntu-20.04 ] cmake: [ 3.15, 3.x ] include: - os: windows-2019 @@ -17,7 +17,7 @@ jobs: - os: ubuntu-20.04 tree: tree - - os: macos-10.15 + - os: macos-latest tree: find - cmake: 3.15 diff --git a/readme.md b/readme.md index 5245ac4e..81fbe68c 100644 --- a/readme.md +++ b/readme.md @@ -1,9 +1,7 @@ TinyXML-2 ========= -![Build](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml/badge.svg) - -![TinyXML-2 Logo](http://www.grinninglizard.com/tinyxml2/TinyXML2_small.png) +[![Test](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml/badge.svg)](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml) TinyXML-2 is a simple, small, efficient, C++ XML parser that can be easily integrated into other programs. @@ -93,7 +91,7 @@ by the Document. When the Document is deleted, so are all the nodes it contains. ### White Space -#### Whitespace Preservation (default) +#### Whitespace Preservation (default, PRESERVE_WHITESPACE) Microsoft has an excellent article on white space: http://msdn.microsoft.com/en-us/library/ms256097.aspx @@ -125,7 +123,7 @@ valuable. TinyXML-2 sees these as the same XML: 123 -#### Whitespace Collapse +#### Whitespace Collapse (COLLAPSE_WHITESPACE) For some applications, it is preferable to collapse whitespace. Collapsing whitespace gives you "HTML-like" behavior, which is sometimes more suitable @@ -143,7 +141,15 @@ However, you may also use COLLAPSE_WHITESPACE, which will: Note that (currently) there is a performance impact for using COLLAPSE_WHITESPACE. It essentially causes the XML to be parsed twice. -#### Error Reporting +#### Pedantic Whitespace (PEDANTIC_WHITESPACE) + +For applications that need to know about text nodes that are composed entirely of +whitespace, PEDANTIC_WHITESPACE is available. PEDANTIC_WHITESPACE maintains all the +whilespace between elements. + +PEDANTIC_WHITESPACE is a new mode and not as tested as the other whitespace modes. + +### Error Reporting TinyXML-2 reports the line number of any errors in an XML document that cannot be parsed correctly. In addition, all nodes (elements, declarations, diff --git a/tinyxml2.cpp b/tinyxml2.cpp index 4e95fb8c..a71445ea 100755 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -715,7 +715,7 @@ bool XMLUtil::ToUnsigned64(const char* str, uint64_t* value) { } -char* XMLDocument::Identify( char* p, XMLNode** node ) +char* XMLDocument::Identify( char* p, XMLNode** node, bool first ) { TIXMLASSERT( node ); TIXMLASSERT( p ); @@ -767,9 +767,19 @@ char* XMLDocument::Identify( char* p, XMLNode** node ) p += dtdHeaderLen; } else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) { - returnNode = CreateUnlinkedNode( _elementPool ); - returnNode->_parseLineNum = _parseCurLineNum; - p += elementHeaderLen; + + // Preserve whitespace pedantically before closing tag, when it's immediately after opening tag + if (WhitespaceMode() == PEDANTIC_WHITESPACE && first && p != start && *(p + elementHeaderLen) == '/') { + returnNode = CreateUnlinkedNode(_textPool); + returnNode->_parseLineNum = startLine; + p = start; // Back it up, all the text counts. + _parseCurLineNum = startLine; + } + else { + returnNode = CreateUnlinkedNode(_elementPool); + returnNode->_parseLineNum = _parseCurLineNum; + p += elementHeaderLen; + } } else { returnNode = CreateUnlinkedNode( _textPool ); @@ -1098,14 +1108,16 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEndTag, int* curLineNumPtr ) if (_document->Error()) return 0; + bool first = true; while( p && *p ) { XMLNode* node = 0; - p = _document->Identify( p, &node ); + p = _document->Identify( p, &node, first ); TIXMLASSERT( p ); if ( node == 0 ) { break; } + first = false; const int initialLineNum = node->_parseLineNum; diff --git a/tinyxml2.h b/tinyxml2.h index da9a5a77..bab582c3 100755 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -1710,7 +1710,8 @@ class TINYXML2_LIB XMLElement : public XMLNode enum Whitespace { PRESERVE_WHITESPACE, - COLLAPSE_WHITESPACE + COLLAPSE_WHITESPACE, + PEDANTIC_WHITESPACE }; @@ -1921,7 +1922,7 @@ class TINYXML2_LIB XMLDocument : public XMLNode void DeepCopy(XMLDocument* target) const; // internal - char* Identify( char* p, XMLNode** node ); + char* Identify( char* p, XMLNode** node, bool first ); // internal void MarkInUse(const XMLNode* const); diff --git a/xmltest.cpp b/xmltest.cpp index c3ce079e..ae976042 100755 --- a/xmltest.cpp +++ b/xmltest.cpp @@ -1869,6 +1869,178 @@ int main( int argc, const char ** argv ) XMLTest( "Whitespace all space", true, 0 == doc.FirstChildElement()->FirstChild() ); } + // ----------- Preserve Whitespace ------------ + { + const char* xml = "This is ' \n\n text '"; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", "This is ' \n\n text '", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " This \nis ' text ' "; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", " This \nis ' text ' ", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n This is ' text ' \n"; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", " \n This is ' text ' \n", doc.FirstChildElement()->GetText()); + } + + // Following cases are for text that is all whitespace which are not preserved intentionally + { + const char* xml = " "; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " "; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText()); + } + + { + const char* xml = "\n\n"; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n"; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n \n "; + XMLDocument doc(true, PRESERVE_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with whitespace preserved", false, doc.Error()); + XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText()); + } + + // ----------- Pedantic Whitespace ------------ + { + const char* xml = "This is ' \n\n text '"; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", "This is ' \n\n text '", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " This \nis ' text ' "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " This \nis ' text ' ", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n This is ' text ' \n"; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " \n This is ' text ' \n", doc.FirstChildElement()->GetText()); + } + + // Following cases are for text that is all whitespace which is preserved with pedantic mode + { + const char* xml = " "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = "\n\n\n"; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", "\n\n", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n \n "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " \n", doc.FirstChildElement()->GetText()); + } + + { + const char* xml = " \n \n "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " \n \n ", doc.FirstChildElement()->GetText()); + } + + // Following cases are for checking nested elements are still parsed with pedantic whitespace + { + const char* xml = "\n\t This is nested text \n "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " This is nested text ", doc.RootElement()->FirstChildElement()->GetText()); + } + + { + const char* xml = " \n"; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", " ", doc.RootElement()->FirstChildElement()->GetText()); + } + + { + const char* xml = " \n "; + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.Parse(xml); + XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error()); + XMLTest("Pedantic whitespace", true, 0 == doc.RootElement()->FirstChildElement()->GetText()); + } + + // Check sample xml can be parsed with pedantic mode + { + XMLDocument doc(true, PEDANTIC_WHITESPACE); + doc.LoadFile("resources/dream.xml"); + XMLTest("Load dream.xml with pedantic whitespace mode", false, doc.Error()); + + XMLTest("Dream", "xml version=\"1.0\"", + doc.FirstChild()->ToDeclaration()->Value()); + XMLTest("Dream", true, doc.FirstChild()->NextSibling()->ToUnknown() != 0); + XMLTest("Dream", "DOCTYPE PLAY SYSTEM \"play.dtd\"", + doc.FirstChild()->NextSibling()->ToUnknown()->Value()); + XMLTest("Dream", "And Robin shall restore amends.", + doc.LastChild()->LastChild()->LastChild()->LastChild()->LastChildElement()->GetText()); + } + { // An assert should not fire. const char* xml = "";