From 98303bbda342433ef3735b13aa902cef4499c5e7 Mon Sep 17 00:00:00 2001
From: Kevin Saul <kevinsaul@gmail.com>
Date: Sun, 28 May 2023 21:36:28 +1200
Subject: [PATCH 1/4] add pedantic whitespace mode

---
 tinyxml2.cpp |  22 +++++--
 tinyxml2.h   |   5 +-
 xmltest.cpp  | 172 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 192 insertions(+), 7 deletions(-)
diff --git a/tinyxml2.cpp b/tinyxml2.cpp
index 4b561b3d..901e3957 100755
--- a/tinyxml2.cpp
+++ b/tinyxml2.cpp
@@ -715,7 +715,7 @@ bool XMLUtil::ToUnsigned64(const char* str, uint64_t* value) {
 }
 
 
-char* XMLDocument::Identify( char* p, XMLNode** node )
+char* XMLDocument::Identify( char* p, XMLNode** node, bool first )
 {
     TIXMLASSERT( node );
     TIXMLASSERT( p );
@@ -767,9 +767,19 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
         p += dtdHeaderLen;
     }
     else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {
-        returnNode =  CreateUnlinkedNode<XMLElement>( _elementPool );
-        returnNode->_parseLineNum = _parseCurLineNum;
-        p += elementHeaderLen;
+
+        // Preserve whitespace pedantically before closing tag, when it's immediately after opening tag
+        if (WhitespaceMode() == PEDANTIC_WHITESPACE && first && p != start && *(p + elementHeaderLen) == '/') {
+            returnNode = CreateUnlinkedNode<XMLText>(_textPool);
+            returnNode->_parseLineNum = startLine;
+            p = start;	// Back it up, all the text counts.
+            _parseCurLineNum = startLine;
+        }
+        else {
+            returnNode = CreateUnlinkedNode<XMLElement>(_elementPool);
+            returnNode->_parseLineNum = _parseCurLineNum;
+            p += elementHeaderLen;
+        }
     }
     else {
         returnNode = CreateUnlinkedNode<XMLText>( _textPool );
@@ -1070,14 +1080,16 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEndTag, int* curLineNumPtr )
 	if (_document->Error())
 		return 0;
 
+	bool first = true;
 	while( p && *p ) {
         XMLNode* node = 0;
 
-        p = _document->Identify( p, &node );
+        p = _document->Identify( p, &node, first );
         TIXMLASSERT( p );
         if ( node == 0 ) {
             break;
         }
+        first = false;
 
        const int initialLineNum = node->_parseLineNum;
 
diff --git a/tinyxml2.h b/tinyxml2.h
index b0c8b6c0..45d6980c 100755
--- a/tinyxml2.h
+++ b/tinyxml2.h
@@ -1704,7 +1704,8 @@ class TINYXML2_LIB XMLElement : public XMLNode
 
 enum Whitespace {
     PRESERVE_WHITESPACE,
-    COLLAPSE_WHITESPACE
+    COLLAPSE_WHITESPACE,
+    PEDANTIC_WHITESPACE
 };
 
 
@@ -1915,7 +1916,7 @@ class TINYXML2_LIB XMLDocument : public XMLNode
 	void DeepCopy(XMLDocument* target) const;
 
 	// internal
-    char* Identify( char* p, XMLNode** node );
+    char* Identify( char* p, XMLNode** node, bool first );
 
 	// internal
 	void MarkInUse(const XMLNode* const);
diff --git a/xmltest.cpp b/xmltest.cpp
index c3ce079e..ae976042 100755
--- a/xmltest.cpp
+++ b/xmltest.cpp
@@ -1869,6 +1869,178 @@ int main( int argc, const char ** argv )
 		XMLTest( "Whitespace  all space", true, 0 == doc.FirstChildElement()->FirstChild() );
 	}
 
+	// ----------- Preserve Whitespace ------------
+	{
+		const char* xml = "<element>This  is  &apos;  \n\n text &apos;</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", "This  is  '  \n\n text '", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> This \nis &apos;  text  &apos;  </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", " This \nis '  text  '  ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n This is &apos; text &apos;  \n</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", "  \n This is ' text '  \n", doc.FirstChildElement()->GetText());
+	}
+
+	// Following cases are for text that is all whitespace which are not preserved intentionally
+	{
+		const char* xml = "<element> </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>   </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>\n\n</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> \n \n </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	// ----------- Pedantic Whitespace ------------
+	{
+		const char* xml = "<element>This  is  &apos;  \n\n text &apos;</element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "This  is  '  \n\n text '", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> This \nis &apos;  text  &apos;  </element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " This \nis '  text  '  ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n This is &apos; text &apos;  \n</element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "  \n This is ' text '  \n", doc.FirstChildElement()->GetText());
+	}
+
+	// Following cases are for text that is all whitespace which is preserved with pedantic mode
+	{
+		const char* xml = "<element> </element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>   </element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "   ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>\n\n</element>\n";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "\n\n", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n</element> \n ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "  \n", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> \n  \n </element>  ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " \n  \n ", doc.FirstChildElement()->GetText());
+	}
+
+	// Following cases are for checking nested elements are still parsed with pedantic whitespace
+	{
+		const char* xml = "<element>\n\t<a> This is nested text </a>\n</element>  ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " This is nested text ", doc.RootElement()->FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  <b> </b>  </element>\n";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " ", doc.RootElement()->FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  <c attribute=\"test\"/>  </element>\n ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", true, 0 == doc.RootElement()->FirstChildElement()->GetText());
+	}
+
+	// Check sample xml can be parsed with pedantic mode
+	{
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.LoadFile("resources/dream.xml");
+		XMLTest("Load dream.xml with pedantic whitespace mode", false, doc.Error());
+
+		XMLTest("Dream", "xml version=\"1.0\"",
+			doc.FirstChild()->ToDeclaration()->Value());
+		XMLTest("Dream", true, doc.FirstChild()->NextSibling()->ToUnknown() != 0);
+		XMLTest("Dream", "DOCTYPE PLAY SYSTEM \"play.dtd\"",
+			doc.FirstChild()->NextSibling()->ToUnknown()->Value());
+		XMLTest("Dream", "And Robin shall restore amends.",
+			doc.LastChild()->LastChild()->LastChild()->LastChild()->LastChildElement()->GetText());
+	}
+
 	{
 		// An assert should not fire.
 		const char* xml = "<element/>";

From 428cd1be420f99f8af5552bf1e75a1eb2474ae49 Mon Sep 17 00:00:00 2001
From: Lee Thomason <leethomason@gmail.com>
Date: Tue, 21 Nov 2023 11:52:44 -0800
Subject: [PATCH 2/4] update readme

---
 readme.md | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/readme.md b/readme.md
index 5245ac4e..5cc158c4 100644
--- a/readme.md
+++ b/readme.md
@@ -93,7 +93,7 @@ by the Document. When the Document is deleted, so are all the nodes it contains.
 
 ### White Space
 
-#### Whitespace Preservation (default)
+#### Whitespace Preservation (default, PRESERVE_WHITESPACE)
 
 Microsoft has an excellent article on white space: http://msdn.microsoft.com/en-us/library/ms256097.aspx
 
@@ -125,7 +125,7 @@ valuable. TinyXML-2 sees these as the same XML:
 
 	<document><data>1</data><data>2</data><data>3</data></document>
 
-#### Whitespace Collapse
+#### Whitespace Collapse (COLLAPSE_WHITESPACE)
 
 For some applications, it is preferable to collapse whitespace. Collapsing
 whitespace gives you "HTML-like" behavior, which is sometimes more suitable
@@ -143,7 +143,15 @@ However, you may also use COLLAPSE_WHITESPACE, which will:
 Note that (currently) there is a performance impact for using COLLAPSE_WHITESPACE.
 It essentially causes the XML to be parsed twice.
 
-#### Error Reporting
+#### Pedantic Whitespace (PEDANTIC_WHITESPACE)
+
+For applications that need to know about text nodes that are composed entirely of 
+whitespace, PEDANTIC_WHITESPACE is available. PEDANTIC_WHITESPACE maintains all the
+whilespace between elements. 
+
+PEDANTIC_WHITESPACE is a new mode and not as tested as the other whitespace modes.
+
+### Error Reporting
 
 TinyXML-2 reports the line number of any errors in an XML document that
 cannot be parsed correctly. In addition, all nodes (elements, declarations,

From 941e2d9018af2d689e3c79a4e5310be617b23e19 Mon Sep 17 00:00:00 2001
From: Lee Thomason <leethomason@gmail.com>
Date: Tue, 21 Nov 2023 11:58:20 -0800
Subject: [PATCH 3/4] fix the tests?

---
 .github/workflows/test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9909120e..74d5e1c1 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -6,7 +6,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ windows-2019, macos-10.15, ubuntu-20.04 ]
+        os: [ windows-2019, macos-latest, ubuntu-20.04 ]
         cmake: [ 3.15, 3.x ]
         include:
           - os: windows-2019
@@ -17,7 +17,7 @@ jobs:
           - os: ubuntu-20.04
             tree: tree
 
-          - os: macos-10.15
+          - os: macos-latest
             tree: find
 
           - cmake: 3.15

From 8d3cdf50dbfe388aadc0fdacfd026c2a9f22d9d4 Mon Sep 17 00:00:00 2001
From: Lee Thomason <leethomason@gmail.com>
Date: Tue, 21 Nov 2023 12:02:26 -0800
Subject: [PATCH 4/4] now fix the badge

---
 readme.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/readme.md b/readme.md
index 5cc158c4..81fbe68c 100644
--- a/readme.md
+++ b/readme.md
@@ -1,9 +1,7 @@
 TinyXML-2
 =========
 
-![Build](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml/badge.svg)
-
-![TinyXML-2 Logo](http://www.grinninglizard.com/tinyxml2/TinyXML2_small.png)
+[![Test](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml/badge.svg)](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml)
 
 TinyXML-2 is a simple, small, efficient, C++ XML parser that can be
 easily integrated into other programs.