diff --git a/lib/wgit/document_extractors.rb b/lib/wgit/document_extractors.rb
index 8e4b6f6..ed09a57 100644
--- a/lib/wgit/document_extractors.rb
+++ b/lib/wgit/document_extractors.rb
@@ -70,7 +70,7 @@
   text_content_only: true
 ) do |text, doc, type|
   if type == :document
-    html_to_text = Wgit::HtmlToText.new(doc.parser)
+    html_to_text = Wgit::HTMLToText.new(doc.parser)
     text = html_to_text.extract
   end
 
diff --git a/lib/wgit/html_to_text.rb b/lib/wgit/html_to_text.rb
index e2ac014..1622333 100644
--- a/lib/wgit/html_to_text.rb
+++ b/lib/wgit/html_to_text.rb
@@ -4,12 +4,14 @@
 
 module Wgit
   # Class used to extract the visible page text from a HTML string.
-  # This is used to set the output of a Wgit::Document#text method.
-  class HtmlToText
+  # This is in turn used to set the output of a Wgit::Document#text method.
+  class HTMLToText
     include Assertable
 
     # Set of text elements used to extract the visible text.
-    # The element's display (:inline or :block) is used to delimit sentences.
+    # The element's display (:inline or :block) is used to delimit sentences e.g.
+    # <div>foo</div><div>bar</div> will be extracted as ['foo', 'bar'] whereas
+    # <span>foo</span><span>bar</span> will be extracted as ['foobar'].
     @text_elements = {
       a:          :inline,
       abbr:       :inline,
@@ -21,7 +23,7 @@ class HtmlToText
       bdo:        :inline,
       blockquote: :block,
       br:         :block,
-      button:     :inline,
+      button:     :block, # Normally inline but Wgit treats as block.
       caption:    :block,
       cite:       :inline,
       code:       :inline,
@@ -106,7 +108,7 @@ def initialize(parser)
 
     # Extracts and returns the text sentences from the @parser HTML.
     #
-    # @return [Array<String>] An array of text sentences.
+    # @return [Array<String>] An array of unique text sentences.
     def extract_arr
       Wgit::Utils.pprint('START_TEXT_ARR', display: @display_logs)
 
@@ -128,17 +130,18 @@ def extract_arr
       text
     end
 
+    # Extracts and returns a text string from the @parser HTML.
+    #
+    # @return [String] A string of text with \n delimiting sentences.
     def extract_str
       text_str = ''
 
       iterate_child_nodes(@parser) do |node, display|
-
         Wgit::Utils.pprint('NODE', display: @display_logs, node: node.name, text: node.text)
 
-        # byebug if node_name(node) == :a && node.text.downcase == 'contact'
+        # byebug if node_name(node) == :span && node.text.downcase == 'post'
 
         # Handle any special cases e.g. skip nodes we don't care about...
-
         # <pre> nodes should have their contents displayed exactly as is.
         if node_name(node) == :pre
           Wgit::Utils.pprint('ADDING_PRE_CONTENT_AS_IS', display: @display_logs, content: "\n#{node.text}")
@@ -152,25 +155,22 @@ def extract_str
         next if child_of?(:pre, node)
 
         if node.text?
-          # Skip any text element containing a new line as semantic HTML will
-          # use <br> and block elements for this.
-          next if contains_new_line(node.text)
+          # Skip any text element that is purely whitespace.
+          next unless valid_text_content?(node.text)
         else
           # Skip a concrete node if it has other concrete child nodes as these
           # will be iterated onto later.
-          # Process if node has no children or one child which is a text node.
+          # Process if node has no children or one child which is a valid text node.
           unless node.children.empty? || (node.children.size == 1 && parent_of_text_node?(node))
             next
           end
         end
 
+        # Apply display rules deciding if a new line is needed before node.text.
         add_new_line = false
         node_text    = format_text(node.text)
         prev         = prev_sibling_or_parent(node)
-        sibling      = prev_sibling(node)
-        parent       = node.parent
 
-        # Apply display rules deciding if a new line is needed before node.text.
         if node.text?
           unless prev && inline?(prev)
             Wgit::Utils.pprint('ADDING_NEW_LINE_FOR_TEXT_1', display: @display_logs)
@@ -186,11 +186,6 @@ def extract_str
             Wgit::Utils.pprint('ADDING_NEW_LINE_FOR_NODE_2', display: @display_logs)
             add_new_line = true
           end
-
-          if prev && block?(prev) && !parent_of_text_node?(prev)
-            Wgit::Utils.pprint('ADDING_NEW_LINE_FOR_NODE_3', display: @display_logs)
-            add_new_line = true
-          end
         end
 
         text_str << "\n" if add_new_line
@@ -205,6 +200,7 @@ def extract_str
         .strip
         .squeeze("\n")
         .squeeze("\t")
+        .squeeze(' ')
     end
 
     private
@@ -215,7 +211,7 @@ def node_name(node)
 
     def display(node)
       name = node_name(node)
-      HtmlToText.text_elements[name]
+      Wgit::HTMLToText.text_elements[name]
     end
 
     def inline?(node)
@@ -226,16 +222,21 @@ def block?(node)
       display(node) == :block
     end
 
+    # Returns the previous sibling of node or nil. Only valid text elements are
+    # returned i.e. non duplicates with valid text content.
     def prev_sibling(node)
       prev = node.previous
 
       return nil unless prev
       return prev unless prev.text?
       return prev if valid_text_node?(prev) && !contains_new_line(prev.text)
+      return prev if valid_text_node?(prev) && !format_text(prev.text).strip.empty?
 
       prev.previous
     end
 
+    # Returns node's previous sibling, parent or nil; in that order. Only valid
+    # text elements are returned i.e. non duplicates with valid text content.
     def prev_sibling_or_parent(node)
       prev = prev_sibling(node)
       return prev if prev
@@ -267,7 +268,8 @@ def contains_new_line(text)
       ["\n", '\\n'].any? { |new_line| text.include?(new_line) }
     end
 
-    # Remove any new lines as semantic HTML will use <br> or block elements.
+    # Remove special characters including any new lines; as semantic HTML will
+    # use <br> and/or block elements to denote a line break.
     def format_text(text)
       text
         .gsub("\n",  '')
@@ -277,7 +279,7 @@ def format_text(text)
     end
 
     # Iterate over node and it's child nodes, yielding each to &block.
-    # Only HtmlToText.text_elements or valid :text nodes will be yielded.
+    # Only HTMLToText.text_elements or valid :text nodes will be yielded.
     # Duplicate text nodes (that follow a concrete node) are omitted.
     def iterate_child_nodes(node, &block)
       display = display(node)
diff --git a/test/mock/fixtures/test_doc.html b/test/mock/fixtures/test_doc.html
index 4aa7184..2b8d598 100644
--- a/test/mock/fixtures/test_doc.html
+++ b/test/mock/fixtures/test_doc.html
@@ -15,21 +15,21 @@
 
 <body id="main-body" onload="">
   <script type="text/javascript">var msg = "Hello from html body";</script>
-  <h1>Howdy!</h1>
-  <a href="#welcome">Welcome</a>
-  <a href="?foo=bar">Foo Bar</a>
-  <a href="http://www.google.co.uk">Google</a>
-  <a href="//fonts.googleapis.com">Scheme-relative URL</a>
-  <a href="http://www.mytestsite.com/security.html">Security</a>
+  <h1>Howdy!</h1><br>
+  <div><a href="#welcome">Welcome</a></div>
+  <div><a href="?foo=bar">Foo Bar</a></div>
+  <div><a href="http://www.google.co.uk">Google</a></div>
+  <div><a href="//fonts.googleapis.com">Scheme-relative URL</a></div>
+  <div><a href="http://www.mytestsite.com/security.html">Security</a></div>
   <h2 id="welcome">Welcome to my site, I hope you like what you see and enjoy browsing the various randomness.</h2>
-  <a href="/about.html">About</a>
-  <a href="about.html/">About 2</a><!-- This duplicate URL is deliberate -->
-  <a href="/">Index</a>
+  <div><a href="/about.html">About</a></div>
+  <div><a href="about.html/">About 2</a><!-- This duplicate URL is deliberate --></div>
+  <div><a href="/">Index</a></div>
   <br>
   <br>
   <img src="https://www.w3schools.com/html/pic_trulli.jpg" alt="Image alt text" height="20" width="20">
   <p>This page is primarily for testing the Ruby code used in Wgit with the Minitest framework.</p>
-  <span>
+  <div>
     Here is a table:
     <table>
       <tr>
@@ -45,7 +45,7 @@ <h2 id="welcome">Welcome to my site, I hope you like what you see and enjoy brow
         <td>Dublin</td>
       </tr>
     </table>
-  </span>
+  </div>
   <br />
   <div id="minitest">
     Minitest rocks!! It's simplicity and power matches the Ruby language in which it's developed.
@@ -68,19 +68,19 @@ <h2 id="welcome">Welcome to my site, I hope you like what you see and enjoy brow
     </form>
   </div>
   <br />
-  <a href="http://www.yahoo.com">Yahoo</a>
-  <a href="/contact.html">Contact</a>
-  <a href="http://www.bing.com/">Bing</a>
-  <a href="http://www.mytestsite.com">Index 2</a><!-- Duplicate of / -->
-  <a href="http://www.mytestsite.com/">Index 3</a><!-- Duplicate of / -->
-  <a href="http://www.mytestsite.com/tests.html">Tests</a>
-  <a href="https://search.yahoo.com/search?q=hello&page=2">Yahoo Search</a>
-  <a href="/blog#about-us">Blog</a>
-  <a href="https://example.com/blog#about-us">Example.com Blog</a>
-  <a href="/contents/">Contents</a>
-  <a href="http://ftp.mytestsite.com">Same Domain FTP Server</a>
-  <a href="http://ftp.mytestsite.com/">Same Domain FTP Server 2</a><!-- Duplicate of ftp.mytestsite.com -->
-  <a href="http://ftp.mytestsite.com/files">Same Domain FTP Server Files</a>
+  <a href="http://www.yahoo.com">Yahoo</a><br>
+  <a href="/contact.html">Contact</a><br>
+  <a href="http://www.bing.com/">Bing</a><br>
+  <a href="http://www.mytestsite.com">Index 2</a><br><!-- Duplicate of / -->
+  <a href="http://www.mytestsite.com/">Index 3</a><br><!-- Duplicate of / -->
+  <a href="http://www.mytestsite.com/tests.html">Tests</a><br>
+  <a href="https://search.yahoo.com/search?q=hello&page=2">Yahoo Search</a><br>
+  <a href="/blog#about-us">Blog</a><br>
+  <a href="https://example.com/blog#about-us">Example.com Blog</a><br>
+  <a href="/contents/">Contents</a><br>
+  <a href="http://ftp.mytestsite.com">Same Domain FTP Server</a><br>
+  <a href="http://ftp.mytestsite.com/">Same Domain FTP Server 2</a><br><!-- Duplicate of ftp.mytestsite.com -->
+  <a href="http://ftp.mytestsite.com/files">Same Domain FTP Server Files</a><br>
 </body>
 
 </html>
diff --git a/test/test_document.rb b/test/test_document.rb
index bbd9219..f3e24c9 100644
--- a/test/test_document.rb
+++ b/test/test_document.rb
@@ -92,7 +92,7 @@ def setup
     }
     @stats = {
       url: 30,
-      html: 3180,
+      html: 3322,
       title: 15,
       description: 32,
       author: 15,
diff --git a/test/test_document_extractors.rb b/test/test_document_extractors.rb
index 2db96de..6397117 100644
--- a/test/test_document_extractors.rb
+++ b/test/test_document_extractors.rb
@@ -23,12 +23,12 @@ def setup
   # Runs after every test and should remove all defined extractors
   # to avoid affecting other tests.
   def teardown
-    if Wgit::HtmlToText.text_elements.include?(:table)
-      Wgit::HtmlToText.text_elements.delete(:table)
+    if Wgit::HTMLToText.text_elements.include?(:table)
+      Wgit::HTMLToText.text_elements.delete(:table)
     end
 
-    unless Wgit::HtmlToText.text_elements.include?(:p)
-      Wgit::HtmlToText.text_elements[:p] = :block
+    unless Wgit::HTMLToText.text_elements.include?(:p)
+      Wgit::HTMLToText.text_elements[:p] = :block
     end
 
     if Wgit::Document.to_h_ignore_vars.include?('@data')
@@ -85,7 +85,7 @@ def teardown
   end
 
   def test_text_elements__addition
-    Wgit::HtmlToText.text_elements[:table] = :block
+    Wgit::HTMLToText.text_elements[:table] = :block
 
     doc = Wgit::Document.new(
       'http://some_url.com',
@@ -98,11 +98,11 @@ def test_text_elements__addition
     )
 
     assert_equal ['Hello world!', 'My table'], doc.text
-    assert Wgit::HtmlToText.text_elements.keys.include?(:table)
+    assert Wgit::HTMLToText.text_elements.keys.include?(:table)
   end
 
   def test_text_elements__deletion
-    Wgit::HtmlToText.text_elements.delete(:p)
+    Wgit::HTMLToText.text_elements.delete(:p)
 
     doc = Wgit::Document.new(
       'http://some_url.com',
@@ -115,7 +115,7 @@ def test_text_elements__deletion
     )
 
     assert_equal ['obj.method()'], doc.text
-    refute Wgit::HtmlToText.text_elements.keys.include?(:p)
+    refute Wgit::HTMLToText.text_elements.keys.include?(:p)
   end
 
   def test_to_h_ignore_vars__addition
@@ -400,7 +400,7 @@ def test_document_extractor__init_from_database
     empty_db
 
     # Define a text extractor.
-    Wgit::HtmlToText.text_elements[:table] = :block
+    Wgit::HTMLToText.text_elements[:table] = :block
 
     # Define a Document extractor.
     name = Wgit::Document.define_extractor(
@@ -469,7 +469,7 @@ def test_document_extractor__init_from_database
     assert db_doc.respond_to? :table_text
     assert_instance_of String, db_doc.table_text
     assert_equal "Boomsk\n    Header Text\n    Another Header", db_doc.table_text
-    assert Wgit::HtmlToText.text_elements.keys.include?(:table)
+    assert Wgit::HTMLToText.text_elements.keys.include?(:table)
   end
 
   def test_document_extractor__init_from_mongo_doc
diff --git a/test/test_html_to_text.rb b/test/test_html_to_text.rb
index 68e6633..4328540 100644
--- a/test/test_html_to_text.rb
+++ b/test/test_html_to_text.rb
@@ -1,7 +1,7 @@
 require_relative 'helpers/test_helper'
 
 # Test class for utility module functions.
-class TestUtils < TestHelper
+class TestHTMLToText < TestHelper
   # Run non DB tests in parallel for speed.
   parallelize_me!
 
@@ -30,6 +30,7 @@ def setup
       '    ',
       "\n",
       "  \n ",
+      " \n foo bar \n ",
       '<br>',
       '<hr>'
     ]
@@ -37,14 +38,15 @@ def setup
     # For each use_case * text_variation combo above, what do we expect.
     @expected = [
       # inline parent - inline inline
-      "prepost",
-      "prefoobarpost",
-      "prefoo barpost",
-      "pre foo bar  post",
-      "pre post",
-      "pre    post",
-      "prepost",
-      "prepost",
+      'prepost',
+      'prefoobarpost',
+      'prefoo barpost',
+      'pre foo bar post',
+      'pre post',
+      'pre post',
+      'prepost',
+      'pre post',
+      'pre foo bar post',
       "pre\npost",
       "pre\npost",
 
@@ -52,11 +54,12 @@ def setup
       "pre\npost",
       "prefoobar\npost",
       "prefoo bar\npost",
-      "pre foo bar  \npost",
+      "pre foo bar \npost",
+      "pre \npost",
       "pre \npost",
-      "pre    \npost",
-      "pre\npost",
       "pre\npost",
+      "pre \npost",
+      "pre foo bar \npost",
       "pre\npost",
       "pre\npost",
 
@@ -64,11 +67,12 @@ def setup
       "pre\npost",
       "pre\nfoobarpost",
       "pre\nfoo barpost",
-      "pre\n foo bar  post",
+      "pre\n foo bar post",
+      "pre\n post",
       "pre\n post",
-      "pre\n    post",
-      "pre\npost",
       "pre\npost",
+      "pre\n \npost",
+      "pre\n foo bar post",
       "pre\npost",
       "pre\npost",
 
@@ -76,25 +80,27 @@ def setup
       "pre\npost",
       "pre\nfoobar\npost",
       "pre\nfoo bar\npost",
-      "pre\n foo bar  \npost",
+      "pre\n foo bar \npost",
+      "pre\n \npost",
       "pre\n \npost",
-      "pre\n    \npost",
-      "pre\npost",
       "pre\npost",
+      "pre\n \npost",
+      "pre\n foo bar \npost",
       "pre\npost",
       "pre\npost",
 
       #######
 
       # block parent - inline inline
-      "prepost",
-      "prefoobarpost",
-      "prefoo barpost",
-      "pre foo bar  post",
-      "pre post",
-      "pre    post",
-      "prepost",
-      "prepost",
+      'prepost',
+      'prefoobarpost',
+      'prefoo barpost',
+      'pre foo bar post',
+      'pre post',
+      'pre post',
+      'prepost',
+      'pre post',
+      'pre foo bar post',
       "pre\npost",
       "pre\npost",
 
@@ -102,11 +108,12 @@ def setup
       "pre\npost",
       "prefoobar\npost",
       "prefoo bar\npost",
-      "pre foo bar  \npost",
+      "pre foo bar \npost",
+      "pre \npost",
       "pre \npost",
-      "pre    \npost",
-      "pre\npost",
       "pre\npost",
+      "pre \npost",
+      "pre foo bar \npost",
       "pre\npost",
       "pre\npost",
 
@@ -114,11 +121,12 @@ def setup
       "pre\npost",
       "pre\nfoobarpost",
       "pre\nfoo barpost",
-      "pre\n foo bar  post",
+      "pre\n foo bar post",
+      "pre\n post",
       "pre\n post",
-      "pre\n    post",
-      "pre\npost",
       "pre\npost",
+      "pre\n \npost",
+      "pre\n foo bar post",
       "pre\npost",
       "pre\npost",
 
@@ -126,53 +134,52 @@ def setup
       "pre\npost",
       "pre\nfoobar\npost",
       "pre\nfoo bar\npost",
-      "pre\n foo bar  \npost",
+      "pre\n foo bar \npost",
+      "pre\n \npost",
       "pre\n \npost",
-      "pre\n    \npost",
-      "pre\npost",
       "pre\npost",
+      "pre\n \npost",
+      "pre\n foo bar \npost",
       "pre\npost",
       "pre\npost"
     ]
   end
 
   def test_extract_text_str
-    unless (@use_cases.size * @content_variations.size) == @expected.size
-      raise 'invalid @expected array'
-    end
-
+    total_test_cases = @use_cases.size * @content_variations.size
     should_fail = false
+    fail_count = 0
     i = 0
 
+    raise 'invalid @expected array' unless total_test_cases == @expected.size
+
     @use_cases.each do |use_case|
       @content_variations.each do |content|
-        nodes = use_case
-                .gsub('<inline_parent>',  '<span>')
-                .gsub('</inline_parent>', '</span>')
-                .gsub('<block_parent>',   '<div>')
-                .gsub('</block_parent>',  '</div>')
-                .gsub('<inline>',         '<span>pre</span>')
-                .gsub('</inline>',        '<span>post</span>')
-                .gsub('<block>',          '<div>pre</div>')
-                .gsub('</block>',         '<div>post</div>')
-                .gsub('*',                content)
+        nodes = gsub_use_case_content(use_case, content)
         parser = Nokogiri::HTML("<html><body>#{nodes}</body></html>")
 
         expected = @expected[i]
-        actual = Wgit::HtmlToText.new(parser).extract_str
+        actual = Wgit::HTMLToText.new(parser).extract_str
 
         i += 1
+        assert true # Add our assertion to minitest's total.
         has_passed = expected == actual
         next if has_passed
 
-        Wgit::Utils.pprint(i, prefix: 'TEST_EXTRACT_TEXT_STR_CASE', new_line: true,
+        Wgit::Utils.pprint("CASE_#{i}", prefix: 'TEST_EXTRACT_TEXT_STR', new_line: true,
           use_case: use_case, content: content, nodes: nodes, expected: expected, actual: actual)
 
         should_fail = true
+        fail_count += 1
       end
     end
 
-    flunk 'test_extract_text_str failed, see logs above for info' if should_fail
+    return unless should_fail
+
+    Wgit::Utils.pprint('SUMMARY', prefix: 'TEST_EXTRACT_TEXT_STR', new_line: true,
+      total_test_cases: total_test_cases, total_failing_cases: fail_count)
+
+    flunk 'test_extract_text_str failed, see logs above for info'
   end
 
   def test_extract__anchors
@@ -180,7 +187,7 @@ def test_extract__anchors
     html = File.read './test/mock/fixtures/anchor_display.html'
     doc = Wgit::Document.new url, html
 
-    assert_equal ['About', 'Foo Location Bar', 'Contact Contact2Contact3'], doc.text
+    assert_equal ['About', 'Foo Location Bar', 'Contact Contact2 Contact3'], doc.text
   end
 
   def test_extract__spans
@@ -217,4 +224,19 @@ def test_extract__getting_started_wiki
       'Note: The text search index lists all document fields to be searched by MongoDB when calling Wgit::Database#search. Therefore, you should append this list with any other fields that you want searched. For example, if you extend the API then you might want to search your new fields in the database by adding them to the index above. This can be done programmatically with:',
     ], doc.text
   end
+
+  private
+
+  def gsub_use_case_content(use_case, content)
+    use_case
+      .gsub('<inline_parent>',  '<span>')
+      .gsub('</inline_parent>', '</span>')
+      .gsub('<block_parent>',   '<div>')
+      .gsub('</block_parent>',  '</div>')
+      .gsub('<inline>',         '<span>pre</span>')
+      .gsub('</inline>',        '<span>post</span>')
+      .gsub('<block>',          '<div>pre</div>')
+      .gsub('</block>',         '<div>post</div>')
+      .gsub('*',                content)
+  end
 end