From 96a03412c4140e22de912bfaa062b7f5aba86f0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Porto?=
 <78543243+joaoandreporto@users.noreply.github.com>
Date: Tue, 21 Jun 2022 12:20:17 +0000
Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=9B=91=20fenced=20comments=20clashing?=
 =?UTF-8?q?=20with=20markdown=20headers?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This proposes an initial solution to https://github.com/vimwiki/utils/issues/11 .
---
 vwtags.py | 106 +++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 73 insertions(+), 33 deletions(-)

diff --git a/vwtags.py b/vwtags.py
index 2043dcf..8ca5897 100755
--- a/vwtags.py
+++ b/vwtags.py
@@ -1,9 +1,16 @@
 #! /usr/bin/env python3
 # -*- coding: utf-8 -*-
+"""
+source: https://github.com/vimwiki/utils/blob/master/vwtags.py
+forked from the script by EinfachToll originally committed on 24 Jun 2014 to
+https://github.com/vimwiki/utils
+"""
 
 from __future__ import print_function
+import sys
+import re
 
-help_text = """
+help_text = r"""
 Extracts tags from Vimwiki files. Useful for the Tagbar plugin.
 
 Usage:
@@ -11,14 +18,14 @@
 anywhere and add the following to your .vimrc:
 
 let g:tagbar_type_vimwiki = {
-          \   'ctagstype':'vimwiki'
-          \ , 'kinds':['h:header']
-          \ , 'sro':'&&&'
-          \ , 'kind2scope':{'h':'header'}
-          \ , 'sort':0
-          \ , 'ctagsbin':'/path/to/vwtags.py'
-          \ , 'ctagsargs': 'default'
-          \ }
+                        \   'ctagstype':'vimwiki'
+                        \ , 'kinds':['h:header']
+                        \ , 'sro':'&&&'
+                        \ , 'kind2scope':{'h':'header'}
+                        \ , 'sort':0
+                        \ , 'ctagsbin':'/path/to/vwtags.py'
+                        \ , 'ctagsargs': 'default'
+                        \ }
 
 The value of ctagsargs must be one of 'default', 'markdown' or 'media',
 whatever syntax you use. However, if you use multiple wikis with different
@@ -27,8 +34,14 @@
 but there might be erroneously shown headers.
 """
 
-import sys
-import re
+
+class Error(Exception):
+    """Base class for exceptions."""
+
+
+class ReadFileIntoBufferError(Error):
+    """Exception raising for failed reading file into Buffer attempt"""
+
 
 if len(sys.argv) < 3:
     print(help_text)
@@ -38,6 +51,8 @@
 filename = sys.argv[2]
 rx_default_media = r"^\s*(={1,6})([^=].*[^=])\1\s*$"
 rx_markdown = r"^\s*(#{1,6})([^#].*)$"
+rx_fenced_code = r"^```[^\r\n]*[a-z]*(?:\n(?!```$).*)*\n```"
+rx_header = None
 
 if syntax in ("default", "media"):
     rx_header = re.compile(rx_default_media)
@@ -46,37 +61,62 @@
 else:
     rx_header = re.compile(rx_default_media + "|" + rx_markdown)
 
-file_content = []
 try:
-    with open(filename, "r") as vim_buffer:
-        file_content = vim_buffer.readlines()
-except:
+    with open(filename, 'r') as buffer:
+        file_content = buffer.readlines()
+except ReadFileIntoBufferError:
+    print("Failed to open file")
     exit()
 
+
+def fenced_code_sentinel(filename):
+    """detect fenced code zones"""
+    # fenced code toggle init
+    fct = 0
+
+    # fct gen
+    for line in file_content:
+        if syntax == "markdown":
+            fct_d = fct
+
+            if fct_d == fct and re.match(r"```", line):
+                fct = 1 - fct_d
+            if re.match(r"```", line):
+                yield 1
+            else:
+                yield fct_d
+        else:
+            yield fct
+
+
+fcd = fenced_code_sentinel(filename)
 state = [""]*6
+
 for lnum, line in enumerate(file_content):
+    fcd_toggle = next(fcd)
 
-    match_header = rx_header.match(line)
+    if fcd_toggle == 0:
+        match_header = rx_header.match(line)
 
-    if not match_header:
-        continue
+        if not match_header:
+            continue
 
-    match_lvl = match_header.group(1) or match_header.group(3)
-    match_tag = match_header.group(2) or match_header.group(4)
+        match_lvl = match_header.group(1) or match_header.group(3)
+        match_tag = match_header.group(2) or match_header.group(4)
 
-    cur_lvl = len(match_lvl)
-    cur_tag = match_tag.strip()
-    cur_searchterm = "^" + match_header.group(0).rstrip("\r\n") + "$"
-    cur_kind = "h"
+        cur_lvl = len(match_lvl)
+        cur_tag = match_tag.strip()
+        cur_searchterm = "^" + match_header.group(0).rstrip("\r\n") + "$"
+        cur_kind = "h"
 
-    state[cur_lvl-1] = cur_tag
-    for i in range(cur_lvl, 6):
-        state[i] = ""
+        state[cur_lvl-1] = cur_tag
+        for i in range(cur_lvl, 6):
+            state[i] = ""
 
-    scope = "&&&".join(
-            [state[i] for i in range(0, cur_lvl-1) if state[i] != ""])
-    if scope:
-        scope = "\theader:" + scope
+        scope = "&&&".join(
+                [state[i] for i in range(0, cur_lvl-1) if state[i] != ""])
+        if scope:
+            scope = "\theader:" + scope
 
-    print('{0}\t{1}\t/{2}/;"\t{3}\tline:{4}{5}'.format(
-        cur_tag, filename, cur_searchterm, cur_kind, str(lnum+1), scope))
+        print('{0}\t{1}\t/{2}/;"\t{3}\tline:{4}{5}'.format(
+            cur_tag, filename, cur_searchterm, cur_kind, str(lnum+1), scope))

From a346a3d064cc60674590369d35bb4e2d153cd40e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Porto?=
 <78543243+joaoandreporto@users.noreply.github.com>
Date: Thu, 23 Jun 2022 10:34:11 +0000
Subject: [PATCH 2/2] Refactoring

Replaced the sentinel generator, which detected fenced code blocks, by a regex which subtracts the fenced code blocks, when using markdown syntax.
---
 vwtags.py | 78 +++++++++++++++++++++----------------------------------
 1 file changed, 30 insertions(+), 48 deletions(-)

diff --git a/vwtags.py b/vwtags.py
index 8ca5897..b3e032c 100755
--- a/vwtags.py
+++ b/vwtags.py
@@ -2,8 +2,9 @@
 # -*- coding: utf-8 -*-
 """
 source: https://github.com/vimwiki/utils/blob/master/vwtags.py
-forked from the script by EinfachToll originally committed on 24 Jun 2014 to
-https://github.com/vimwiki/utils
+Forked from the script originally committed on 24 Jun 2014 by EinfachToll.
+This script generates ctags-compatible tag information for vimwiki-tagbar
+(or the like) integration.
 """
 
 from __future__ import print_function
@@ -36,7 +37,7 @@
 
 
 class Error(Exception):
-    """Base class for exceptions."""
+    """Base class for exceptions"""
 
 
 class ReadFileIntoBufferError(Error):
@@ -51,72 +52,53 @@ class ReadFileIntoBufferError(Error):
 filename = sys.argv[2]
 rx_default_media = r"^\s*(={1,6})([^=].*[^=])\1\s*$"
 rx_markdown = r"^\s*(#{1,6})([^#].*)$"
-rx_fenced_code = r"^```[^\r\n]*[a-z]*(?:\n(?!```$).*)*\n```"
+rx_fenced_code = r"^```[^\r\n]*[a-z]*$(?:\n(?!^```).*)*\n^```"
 rx_header = None
 
 if syntax in ("default", "media"):
     rx_header = re.compile(rx_default_media)
 elif syntax == "markdown":
+    comp_rx_fcode = re.compile(rx_fenced_code, flags=re.MULTILINE)
     rx_header = re.compile(rx_markdown)
 else:
     rx_header = re.compile(rx_default_media + "|" + rx_markdown)
 
 try:
     with open(filename, 'r') as buffer:
-        file_content = buffer.readlines()
+        if syntax == "markdown":
+            file_content = buffer.read()
+            sub_rx_fcode = comp_rx_fcode.sub("", file_content)
+            file_content = sub_rx_fcode.split("\n")
+        else:
+            file_content = buffer.readlines()
 except ReadFileIntoBufferError:
     print("Failed to open file")
     exit()
 
-
-def fenced_code_sentinel(filename):
-    """detect fenced code zones"""
-    # fenced code toggle init
-    fct = 0
-
-    # fct gen
-    for line in file_content:
-        if syntax == "markdown":
-            fct_d = fct
-
-            if fct_d == fct and re.match(r"```", line):
-                fct = 1 - fct_d
-            if re.match(r"```", line):
-                yield 1
-            else:
-                yield fct_d
-        else:
-            yield fct
-
-
-fcd = fenced_code_sentinel(filename)
 state = [""]*6
 
 for lnum, line in enumerate(file_content):
-    fcd_toggle = next(fcd)
-
-    if fcd_toggle == 0:
-        match_header = rx_header.match(line)
+    match_header = rx_header.match(line)
 
-        if not match_header:
-            continue
+    if not match_header:
+        continue
 
-        match_lvl = match_header.group(1) or match_header.group(3)
-        match_tag = match_header.group(2) or match_header.group(4)
+    match_lvl = match_header.group(1) or match_header.group(3)
+    match_tag = match_header.group(2) or match_header.group(4)
 
-        cur_lvl = len(match_lvl)
-        cur_tag = match_tag.strip()
-        cur_searchterm = "^" + match_header.group(0).rstrip("\r\n") + "$"
-        cur_kind = "h"
+    cur_lvl = len(match_lvl)
+    cur_tag = match_tag.strip()
+    cur_searchterm = "^" + match_header.group(0).rstrip("\r\n") + "$"
+    cur_kind = "h"
 
-        state[cur_lvl-1] = cur_tag
-        for i in range(cur_lvl, 6):
-            state[i] = ""
+    state[cur_lvl-1] = cur_tag
+    for i in range(cur_lvl, 6):
+        state[i] = ""
 
-        scope = "&&&".join(
-                [state[i] for i in range(0, cur_lvl-1) if state[i] != ""])
-        if scope:
-            scope = "\theader:" + scope
+    scope = "&&&".join(
+            [state[i] for i in range(0, cur_lvl-1) if state[i] != ""])
+    if scope:
+        scope = "\theader:" + scope
 
-        print('{0}\t{1}\t/{2}/;"\t{3}\tline:{4}{5}'.format(
-            cur_tag, filename, cur_searchterm, cur_kind, str(lnum+1), scope))
+    print('{0}\t{1}\t/{2}/;"\t{3}\tline:{4}{5}'.format(
+        cur_tag, filename, cur_searchterm, cur_kind, str(lnum+1), scope))