Skip to content

Commit

Permalink
Added unit tests. Fixes. Added function.
Browse files Browse the repository at this point in the history
  • Loading branch information
Franco Masotti committed Apr 2, 2021
1 parent 1f04d00 commit 88b0267
Show file tree
Hide file tree
Showing 2 changed files with 252 additions and 9 deletions.
113 changes: 106 additions & 7 deletions md_toc/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,9 @@ def get_generic_fdr_indices(i: int, line: str, char: str, mem: dict, type: str =

if parser in ['github', 'cmark', 'gitlab', 'commonmarker']:
is_fdr = False
is_fdr_b = False
is_fdr_c = False
is_fdr_d = False
was_char = False

char_start = i
Expand All @@ -711,19 +714,51 @@ def get_generic_fdr_indices(i: int, line: str, char: str, mem: dict, type: str =

# Go back 1.
char_end = i - 1
if char_end > char_start or was_char:

# Using definition from 0.29 which is much clearer than the one from 0.28:
#
# "
# A left-flanking delimiter run is a delimiter run that is
# (1) not followed by Unicode whitespace,
# AND
# either (2a) not followed by a punctuation character,
# OR
# (2b) followed by a punctuation character
# AND preceded by Unicode whitespace or a punctuation character.
# For purposes of this definition, the beginning and the end of the line count as Unicode whitespace.
# "
if was_char:
if char_end < len(line) - 1:
# (1)
if line[char_end + 1] not in md_parser[parser]['pseudo-re']['UWC']:
is_fdr = True
else:
# End of the line.
# For purposes of this definition, the beginning and the end of the line count as Unicode whitespace.
is_fdr = True
# AND.
if is_fdr:
if char_end < len(line) - 1:
# (2a)
if line[char_end + 1] not in md_parser[parser]['pseudo-re']['PC']:
is_fdr = True
if char_start > 0:
if line[char_start - 1] not in md_parser[parser]['pseudo-re']['UWC'] and line[char_start - 1] not in md_parser[parser]['pseudo-re']['PC']:
is_fdr = True

if is_fdr:
is_fdr_b = True
# OR
if not is_fdr_b:
# (2b)
if char_end < len(line) - 1:
if line[char_end + 1] in md_parser[parser]['pseudo-re']['PC']:
is_fdr_c = True
# AND.
if is_fdr_c:
if char_start > 0:
if line[char_start - 1] in md_parser[parser]['pseudo-re']['UWC'] or line[char_start - 1] in md_parser[parser]['pseudo-re']['PC']:
is_fdr_d = True
else:
# Beginning of the line.
# For purposes of this definition, the beginning and the end of the line count as Unicode whitespace.
is_fdr_d = True

if is_fdr and (is_fdr_b or (is_fdr_c and is_fdr_d)):
# LFDR and RFDR are very similar.
# RFDR is just the reverse of LFDR.
if type == 'left':
Expand Down Expand Up @@ -765,6 +800,70 @@ def get_fdr_indices(line: str, type: str = 'left', parser: str = 'github') -> di
return m


def get_remove_emphasis_indices(line: str):
r"""get_remove_emphasis_indices."""
i = 0
j = len(line) - 1
ignore_list = list()
no_fdr = {
'*': list(),
'_': list(),
}

while i < len(line) and j >= 0 and i < j:
Si = i
has_open = False
has_close = False
is_lfdr = True

while i < len(line) and line[i] == '*':
i += 1

if Si > 0:
start_inspect = Si - 1
else:
start_inspect = Si

lfdr_indices = get_fdr_indices(line=line[start_inspect:i + 1], type='left')
if lfdr_indices == no_fdr:
is_lfdr = False

iter = i - Si

if iter > 0 and is_lfdr:
has_open = True
has_close = False

while i < j and iter > 0:
Sj = j

while line[j] != '*' and j > i:
j -= 1

# Last position of a star character.
last_j = j

# Check if characters are part of RFDR to determine if they might be closing emphasis.
while j > 0 and line[j] == '*' and last_j - j < iter and line[j - 1] != '\\':
j -= 1

closing_emph = False
if get_fdr_indices(line=line[j:Sj + 1], type='right') != no_fdr:
closing_emph = True

if j < Sj and closing_emph:
total_closed = last_j - j
iter -= total_closed
ignore_list.append([j + 1, last_j])
has_close = True

if has_open and has_close:
ignore_list.append([Si, i - 1])
i += 1

return ignore_list


def build_anchor_link(header_text_trimmed: str,
header_duplicate_counter: str,
parser: str = 'github') -> str:
Expand Down
148 changes: 146 additions & 2 deletions md_toc/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,14 +431,18 @@ def test_build_toc_line(self):
"""

def test_get_fdr_indices(self):
r"""Test get left-flanking delimiter run indices."""
r"""Test get flanking delimiter run indices.
..note: some examples in the documentation appear to be contradicting.
"""
# Star character.
self.assertEqual(api.get_fdr_indices('***abc', 'left', 'github'), {'*': [[0, 2]], '_': list(), })
self.assertEqual(api.get_fdr_indices('**"abc"', 'left', 'github'), {'*': [[0, 1]], '_': list(), })
self.assertEqual(api.get_fdr_indices(' abc***def', 'left', 'github'), {'*': [[4, 6]], '_': list(), })

self.assertEqual(api.get_fdr_indices('***abc **def', 'left', 'github'), {'*': [[0, 2], [7, 8]], '_': list(), })

self.assertEqual(api.get_fdr_indices('a*"foo"*', 'left', 'github'), {'*': list(), '_': list(), })

self.assertEqual(api.get_fdr_indices('abc***', 'left', 'github'), {'*': list(), '_': list(), })
self.assertEqual(api.get_fdr_indices('"abc"***', 'left', 'github'), {'*': list(), '_': list(), })

Expand Down Expand Up @@ -466,6 +470,146 @@ def test_get_fdr_indices(self):
self.assertEqual(api.get_fdr_indices('abc *** def', 'right', 'github'), {'*': list(), '_': list(), })
self.assertEqual(api.get_fdr_indices('a _ b', 'right', 'github'), {'*': list(), '_': list(), })

def test_get_remove_emphasis_indices(self):
r"""Test get remove emphasis indices."""
# Example 331
self.assertEqual(api.get_remove_emphasis_indices('*foo bar*'), [[8, 8], [0, 0]])

# Example 332
self.assertEqual(api.get_remove_emphasis_indices('a * foo bar*'), list())

# Example 333
self.assertEqual(api.get_remove_emphasis_indices('a*"foo"*'), list())

# Example 334
self.assertEqual(api.get_remove_emphasis_indices('* a *'), list())

# Example 335
self.assertEqual(api.get_remove_emphasis_indices('foo*bar*'), [[7, 7], [3, 3]])

# Example 336
self.assertEqual(api.get_remove_emphasis_indices('5*6*78'), [[3, 3], [1, 1]])

# Example 337
# self.assertEqual(api.get_remove_emphasis_indices('_foo bar_'), [[3, 3], [1, 1]])
# Example 338
# Example 339
# Example 340
# Example 341
# Example 342
# Example 343
# Example 344
# Example 345

# Example 346
self.assertEqual(api.get_remove_emphasis_indices('*foo bar *'), list())

# Example 347
self.assertEqual(api.get_remove_emphasis_indices('*foo bar\n*'), list())

# Example 348
self.assertEqual(api.get_remove_emphasis_indices('*(*foo)'), list())

# Example 349
self.assertEqual(api.get_remove_emphasis_indices('*(*foo*)*'), [[8, 8], [0, 0], [6, 6], [2, 2]])

# Example 350
self.assertEqual(api.get_remove_emphasis_indices('*foo*bar'), [[4, 4], [0, 0]])

# Example 351
# Example 352
# Example 353
# Example 354
# Example 355
# Example 356
# Example 357

# Example 358
self.assertEqual(api.get_remove_emphasis_indices('**foo bar**'), [[9, 10], [0, 1]])

# Example 359
self.assertEqual(api.get_remove_emphasis_indices('** foo bar**'), list())

# Example 360
self.assertEqual(api.get_remove_emphasis_indices('a**"foo"**'), list())

# Example 361
self.assertEqual(api.get_remove_emphasis_indices('foo**bar**'), [[8, 9], [3, 4]])

# Example 362
# Example 363
# Example 364
# Example 365
# Example 366
# Example 367
# Example 368
# Example 369
# Example 370

# Example 371
self.assertEqual(api.get_remove_emphasis_indices('**foo bar **'), list())

# Example 372
self.assertEqual(api.get_remove_emphasis_indices('**(**foo)'), list())

# Example 373
self.assertEqual(api.get_remove_emphasis_indices('*(**foo**)*'), [[10, 10], [0, 0], [7, 8], [2, 3]])

# Example 374
# self.assertEqual(api.get_remove_emphasis_indices('**Gomphocarpus (*Gomphocarpus physocarpus*, syn.\n*Asclepias physocarpa*)**'), [[0, 1], [16, 16], [41, 41], [49, 49], [70, 70], [72, 73]])

# Example 375
self.assertEqual(api.get_remove_emphasis_indices('**foo "*bar*" foo**'), [[17, 18], [0, 1], [11, 11], [7, 7]])

# Example 376
self.assertEqual(api.get_remove_emphasis_indices('**foo**bar'), [[5, 6], [0, 1]])

# Example 377
# Example 378
# Example 379
# Example 380
# Example 381
# Example 382
# Example 383

# Example 384
self.assertEqual(api.get_remove_emphasis_indices('*foo [bar](/url)*'), [[16, 16], [0, 0]])

# Example 385
self.assertEqual(api.get_remove_emphasis_indices('*foo\nbar*'), [[8, 8], [0, 0]])

# Example 386
# Example 387
# Example 388

# Example 389
# self.assertEqual(api.get_remove_emphasis_indices('*foo *bar**'), [[9, 10], [5, 5], [0, 0]])

# Example 390
self.assertEqual(api.get_remove_emphasis_indices('*foo **bar** baz*'), [[16, 16], [0, 0], [10, 11], [5, 6]])

# Example 391

# Example 392

# Example 393

# Example 394

# Example 395

# Example 396

# Example 397

# Example 398

# Example 399

# Example 400

# Example 458

def test_remove_html_tags(self):
r"""Test remove html tags."""
# Example 584
Expand Down

0 comments on commit 88b0267

Please sign in to comment.