Added unit tests. Fixes. Added function.

frnmst · Apr 2, 2021 · 88b0267 · 88b0267
1 parent 1f04d00
commit 88b0267
Show file tree

Hide file tree

Showing 2 changed files with 252 additions and 9 deletions.
diff --git a/md_toc/api.py b/md_toc/api.py
@@ -702,6 +702,9 @@ def get_generic_fdr_indices(i: int, line: str, char: str, mem: dict, type: str =
 
     if parser in ['github', 'cmark', 'gitlab', 'commonmarker']:
         is_fdr = False
+        is_fdr_b = False
+        is_fdr_c = False
+        is_fdr_d = False
         was_char = False
 
         char_start = i
@@ -711,19 +714,51 @@ def get_generic_fdr_indices(i: int, line: str, char: str, mem: dict, type: str =
 
         # Go back 1.
         char_end = i - 1
-        if char_end > char_start or was_char:
+
+        # Using definition from 0.29 which is much clearer than the one from 0.28:
+        #
+        # "
+        # A left-flanking delimiter run is a delimiter run that is
+        #       (1) not followed by Unicode whitespace,
+        #           AND
+        #               either (2a) not followed by a punctuation character,
+        #               OR
+        #               (2b) followed by a punctuation character
+        #                   AND preceded by Unicode whitespace or a punctuation character.
+        # For purposes of this definition, the beginning and the end of the line count as Unicode whitespace.
+        # "
+        if was_char:
             if char_end < len(line) - 1:
+                # (1)
                 if line[char_end + 1] not in md_parser[parser]['pseudo-re']['UWC']:
                     is_fdr = True
+            else:
+                # End of the line.
+                # For purposes of this definition, the beginning and the end of the line count as Unicode whitespace.
+                is_fdr = True
+            # AND.
             if is_fdr:
                 if char_end < len(line) - 1:
+                    # (2a)
                     if line[char_end + 1] not in md_parser[parser]['pseudo-re']['PC']:
-                        is_fdr = True
-                if char_start > 0:
-                    if line[char_start - 1] not in md_parser[parser]['pseudo-re']['UWC'] and line[char_start - 1] not in md_parser[parser]['pseudo-re']['PC']:
-                        is_fdr = True
-
-            if is_fdr:
+                        is_fdr_b = True
+                # OR
+                if not is_fdr_b:
+                    # (2b)
+                    if char_end < len(line) - 1:
+                        if line[char_end + 1] in md_parser[parser]['pseudo-re']['PC']:
+                            is_fdr_c = True
+                    # AND.
+                    if is_fdr_c:
+                        if char_start > 0:
+                            if line[char_start - 1] in md_parser[parser]['pseudo-re']['UWC'] or line[char_start - 1] in md_parser[parser]['pseudo-re']['PC']:
+                                is_fdr_d = True
+                        else:
+                            # Beginning of the line.
+                            # For purposes of this definition, the beginning and the end of the line count as Unicode whitespace.
+                            is_fdr_d = True
+
+            if is_fdr and (is_fdr_b or (is_fdr_c and is_fdr_d)):
                 # LFDR and RFDR are very similar.
                 # RFDR is just the reverse of LFDR.
                 if type == 'left':
@@ -765,6 +800,70 @@ def get_fdr_indices(line: str, type: str = 'left', parser: str = 'github') -> di
     return m
 
 
+def get_remove_emphasis_indices(line: str):
+    r"""get_remove_emphasis_indices."""
+    i = 0
+    j = len(line) - 1
+    ignore_list = list()
+    no_fdr = {
+        '*': list(),
+        '_': list(),
+    }
+
+    while i < len(line) and j >= 0 and i < j:
+        Si = i
+        has_open = False
+        has_close = False
+        is_lfdr = True
+
+        while i < len(line) and line[i] == '*':
+            i += 1
+
+        if Si > 0:
+            start_inspect = Si - 1
+        else:
+            start_inspect = Si
+
+        lfdr_indices = get_fdr_indices(line=line[start_inspect:i + 1], type='left')
+        if lfdr_indices == no_fdr:
+            is_lfdr = False
+
+        iter = i - Si
+
+        if iter > 0 and is_lfdr:
+            has_open = True
+            has_close = False
+
+            while i < j and iter > 0:
+                Sj = j
+
+                while line[j] != '*' and j > i:
+                    j -= 1
+
+                # Last position of a star character.
+                last_j = j
+
+                # Check if characters are part of RFDR to determine if they might be closing emphasis.
+                while j > 0 and line[j] == '*' and last_j - j < iter and line[j - 1] != '\\':
+                    j -= 1
+
+                closing_emph = False
+                if get_fdr_indices(line=line[j:Sj + 1], type='right') != no_fdr:
+                    closing_emph = True
+
+                if j < Sj and closing_emph:
+                    total_closed = last_j - j
+                    iter -= total_closed
+                    ignore_list.append([j + 1, last_j])
+                    has_close = True
+
+        if has_open and has_close:
+            ignore_list.append([Si, i - 1])
+        i += 1
+
+    return ignore_list
+
+
 def build_anchor_link(header_text_trimmed: str,
                       header_duplicate_counter: str,
                       parser: str = 'github') -> str:

diff --git a/md_toc/tests/tests.py b/md_toc/tests/tests.py
@@ -431,14 +431,18 @@ def test_build_toc_line(self):
         """
 
     def test_get_fdr_indices(self):
-        r"""Test get left-flanking delimiter run indices."""
+        r"""Test get flanking delimiter run indices.
+
+        ..note: some examples in the documentation appear to be contradicting.
+        """
         # Star character.
         self.assertEqual(api.get_fdr_indices('***abc', 'left', 'github'), {'*': [[0, 2]], '_': list(), })
         self.assertEqual(api.get_fdr_indices('**"abc"', 'left', 'github'), {'*': [[0, 1]], '_': list(), })
         self.assertEqual(api.get_fdr_indices(' abc***def', 'left', 'github'), {'*': [[4, 6]], '_': list(), })
-
         self.assertEqual(api.get_fdr_indices('***abc **def', 'left', 'github'), {'*': [[0, 2], [7, 8]], '_': list(), })
 
+        self.assertEqual(api.get_fdr_indices('a*"foo"*', 'left', 'github'), {'*': list(), '_': list(), })
+
         self.assertEqual(api.get_fdr_indices('abc***', 'left', 'github'), {'*': list(), '_': list(), })
         self.assertEqual(api.get_fdr_indices('"abc"***', 'left', 'github'), {'*': list(), '_': list(), })
 
@@ -466,6 +470,146 @@ def test_get_fdr_indices(self):
         self.assertEqual(api.get_fdr_indices('abc *** def', 'right', 'github'), {'*': list(), '_': list(), })
         self.assertEqual(api.get_fdr_indices('a _ b', 'right', 'github'), {'*': list(), '_': list(), })
 
+    def test_get_remove_emphasis_indices(self):
+        r"""Test get remove emphasis indices."""
+        # Example 331
+        self.assertEqual(api.get_remove_emphasis_indices('*foo bar*'), [[8, 8], [0, 0]])
+
+        # Example 332
+        self.assertEqual(api.get_remove_emphasis_indices('a * foo bar*'), list())
+
+        # Example 333
+        self.assertEqual(api.get_remove_emphasis_indices('a*"foo"*'), list())
+
+        # Example 334
+        self.assertEqual(api.get_remove_emphasis_indices('* a *'), list())
+
+        # Example 335
+        self.assertEqual(api.get_remove_emphasis_indices('foo*bar*'), [[7, 7], [3, 3]])
+
+        # Example 336
+        self.assertEqual(api.get_remove_emphasis_indices('5*6*78'), [[3, 3], [1, 1]])
+
+        # Example 337
+        # self.assertEqual(api.get_remove_emphasis_indices('_foo bar_'), [[3, 3], [1, 1]])
+        # Example 338
+        # Example 339
+        # Example 340
+        # Example 341
+        # Example 342
+        # Example 343
+        # Example 344
+        # Example 345
+
+        # Example 346
+        self.assertEqual(api.get_remove_emphasis_indices('*foo bar *'), list())
+
+        # Example 347
+        self.assertEqual(api.get_remove_emphasis_indices('*foo bar\n*'), list())
+
+        # Example 348
+        self.assertEqual(api.get_remove_emphasis_indices('*(*foo)'), list())
+
+        # Example 349
+        self.assertEqual(api.get_remove_emphasis_indices('*(*foo*)*'), [[8, 8], [0, 0], [6, 6], [2, 2]])
+
+        # Example 350
+        self.assertEqual(api.get_remove_emphasis_indices('*foo*bar'), [[4, 4], [0, 0]])
+
+        # Example 351
+        # Example 352
+        # Example 353
+        # Example 354
+        # Example 355
+        # Example 356
+        # Example 357
+
+        # Example 358
+        self.assertEqual(api.get_remove_emphasis_indices('**foo bar**'), [[9, 10], [0, 1]])
+
+        # Example 359
+        self.assertEqual(api.get_remove_emphasis_indices('** foo bar**'), list())
+
+        # Example 360
+        self.assertEqual(api.get_remove_emphasis_indices('a**"foo"**'), list())
+
+        # Example 361
+        self.assertEqual(api.get_remove_emphasis_indices('foo**bar**'), [[8, 9], [3, 4]])
+
+        # Example 362
+        # Example 363
+        # Example 364
+        # Example 365
+        # Example 366
+        # Example 367
+        # Example 368
+        # Example 369
+        # Example 370
+
+        # Example 371
+        self.assertEqual(api.get_remove_emphasis_indices('**foo bar **'), list())
+
+        # Example 372
+        self.assertEqual(api.get_remove_emphasis_indices('**(**foo)'), list())
+
+        # Example 373
+        self.assertEqual(api.get_remove_emphasis_indices('*(**foo**)*'), [[10, 10], [0, 0], [7, 8], [2, 3]])
+
+        # Example 374
+#        self.assertEqual(api.get_remove_emphasis_indices('**Gomphocarpus (*Gomphocarpus physocarpus*, syn.\n*Asclepias physocarpa*)**'), [[0, 1], [16, 16], [41, 41], [49, 49], [70, 70], [72, 73]])
+
+        # Example 375
+        self.assertEqual(api.get_remove_emphasis_indices('**foo "*bar*" foo**'), [[17, 18], [0, 1], [11, 11], [7, 7]])
+
+        # Example 376
+        self.assertEqual(api.get_remove_emphasis_indices('**foo**bar'), [[5, 6], [0, 1]])
+
+        # Example 377
+        # Example 378
+        # Example 379
+        # Example 380
+        # Example 381
+        # Example 382
+        # Example 383
+
+        # Example 384
+        self.assertEqual(api.get_remove_emphasis_indices('*foo [bar](/url)*'), [[16, 16], [0, 0]])
+
+        # Example 385
+        self.assertEqual(api.get_remove_emphasis_indices('*foo\nbar*'), [[8, 8], [0, 0]])
+
+        # Example 386
+        # Example 387
+        # Example 388
+
+        # Example 389
+#        self.assertEqual(api.get_remove_emphasis_indices('*foo *bar**'), [[9, 10], [5, 5], [0, 0]])
+
+        # Example 390
+        self.assertEqual(api.get_remove_emphasis_indices('*foo **bar** baz*'), [[16, 16], [0, 0], [10, 11], [5, 6]])
+
+        # Example 391
+
+        # Example 392
+
+        # Example 393
+
+        # Example 394
+
+        # Example 395
+
+        # Example 396
+
+        # Example 397
+
+        # Example 398
+
+        # Example 399
+
+        # Example 400
+
+        # Example 458
+
     def test_remove_html_tags(self):
         r"""Test remove html tags."""
         # Example 584