From 96a03412c4140e22de912bfaa062b7f5aba86f0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Porto?= <78543243+joaoandreporto@users.noreply.github.com> Date: Tue, 21 Jun 2022 12:20:17 +0000 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=9B=91=20fenced=20comments=20clashing?= =?UTF-8?q?=20with=20markdown=20headers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This proposes an initial solution to https://github.com/vimwiki/utils/issues/11 . --- vwtags.py | 106 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 33 deletions(-) diff --git a/vwtags.py b/vwtags.py index 2043dcf..8ca5897 100755 --- a/vwtags.py +++ b/vwtags.py @@ -1,9 +1,16 @@ #! /usr/bin/env python3 # -*- coding: utf-8 -*- +""" +source: https://github.com/vimwiki/utils/blob/master/vwtags.py +forked from the script by EinfachToll originally committed on 24 Jun 2014 to +https://github.com/vimwiki/utils +""" from __future__ import print_function +import sys +import re -help_text = """ +help_text = r""" Extracts tags from Vimwiki files. Useful for the Tagbar plugin. Usage: @@ -11,14 +18,14 @@ anywhere and add the following to your .vimrc: let g:tagbar_type_vimwiki = { - \ 'ctagstype':'vimwiki' - \ , 'kinds':['h:header'] - \ , 'sro':'&&&' - \ , 'kind2scope':{'h':'header'} - \ , 'sort':0 - \ , 'ctagsbin':'/path/to/vwtags.py' - \ , 'ctagsargs': 'default' - \ } + \ 'ctagstype':'vimwiki' + \ , 'kinds':['h:header'] + \ , 'sro':'&&&' + \ , 'kind2scope':{'h':'header'} + \ , 'sort':0 + \ , 'ctagsbin':'/path/to/vwtags.py' + \ , 'ctagsargs': 'default' + \ } The value of ctagsargs must be one of 'default', 'markdown' or 'media', whatever syntax you use. However, if you use multiple wikis with different @@ -27,8 +34,14 @@ but there might be erroneously shown headers. """ -import sys -import re + +class Error(Exception): + """Base class for exceptions.""" + + +class ReadFileIntoBufferError(Error): + """Exception raising for failed reading file into Buffer attempt""" + if len(sys.argv) < 3: print(help_text) @@ -38,6 +51,8 @@ filename = sys.argv[2] rx_default_media = r"^\s*(={1,6})([^=].*[^=])\1\s*$" rx_markdown = r"^\s*(#{1,6})([^#].*)$" +rx_fenced_code = r"^```[^\r\n]*[a-z]*(?:\n(?!```$).*)*\n```" +rx_header = None if syntax in ("default", "media"): rx_header = re.compile(rx_default_media) @@ -46,37 +61,62 @@ else: rx_header = re.compile(rx_default_media + "|" + rx_markdown) -file_content = [] try: - with open(filename, "r") as vim_buffer: - file_content = vim_buffer.readlines() -except: + with open(filename, 'r') as buffer: + file_content = buffer.readlines() +except ReadFileIntoBufferError: + print("Failed to open file") exit() + +def fenced_code_sentinel(filename): + """detect fenced code zones""" + # fenced code toggle init + fct = 0 + + # fct gen + for line in file_content: + if syntax == "markdown": + fct_d = fct + + if fct_d == fct and re.match(r"```", line): + fct = 1 - fct_d + if re.match(r"```", line): + yield 1 + else: + yield fct_d + else: + yield fct + + +fcd = fenced_code_sentinel(filename) state = [""]*6 + for lnum, line in enumerate(file_content): + fcd_toggle = next(fcd) - match_header = rx_header.match(line) + if fcd_toggle == 0: + match_header = rx_header.match(line) - if not match_header: - continue + if not match_header: + continue - match_lvl = match_header.group(1) or match_header.group(3) - match_tag = match_header.group(2) or match_header.group(4) + match_lvl = match_header.group(1) or match_header.group(3) + match_tag = match_header.group(2) or match_header.group(4) - cur_lvl = len(match_lvl) - cur_tag = match_tag.strip() - cur_searchterm = "^" + match_header.group(0).rstrip("\r\n") + "$" - cur_kind = "h" + cur_lvl = len(match_lvl) + cur_tag = match_tag.strip() + cur_searchterm = "^" + match_header.group(0).rstrip("\r\n") + "$" + cur_kind = "h" - state[cur_lvl-1] = cur_tag - for i in range(cur_lvl, 6): - state[i] = "" + state[cur_lvl-1] = cur_tag + for i in range(cur_lvl, 6): + state[i] = "" - scope = "&&&".join( - [state[i] for i in range(0, cur_lvl-1) if state[i] != ""]) - if scope: - scope = "\theader:" + scope + scope = "&&&".join( + [state[i] for i in range(0, cur_lvl-1) if state[i] != ""]) + if scope: + scope = "\theader:" + scope - print('{0}\t{1}\t/{2}/;"\t{3}\tline:{4}{5}'.format( - cur_tag, filename, cur_searchterm, cur_kind, str(lnum+1), scope)) + print('{0}\t{1}\t/{2}/;"\t{3}\tline:{4}{5}'.format( + cur_tag, filename, cur_searchterm, cur_kind, str(lnum+1), scope)) From a346a3d064cc60674590369d35bb4e2d153cd40e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Porto?= <78543243+joaoandreporto@users.noreply.github.com> Date: Thu, 23 Jun 2022 10:34:11 +0000 Subject: [PATCH 2/2] Refactoring Replaced the sentinel generator, which detected fenced code blocks, by a regex which subtracts the fenced code blocks, when using markdown syntax. --- vwtags.py | 78 +++++++++++++++++++++---------------------------------- 1 file changed, 30 insertions(+), 48 deletions(-) diff --git a/vwtags.py b/vwtags.py index 8ca5897..b3e032c 100755 --- a/vwtags.py +++ b/vwtags.py @@ -2,8 +2,9 @@ # -*- coding: utf-8 -*- """ source: https://github.com/vimwiki/utils/blob/master/vwtags.py -forked from the script by EinfachToll originally committed on 24 Jun 2014 to -https://github.com/vimwiki/utils +Forked from the script originally committed on 24 Jun 2014 by EinfachToll. +This script generates ctags-compatible tag information for vimwiki-tagbar +(or the like) integration. """ from __future__ import print_function @@ -36,7 +37,7 @@ class Error(Exception): - """Base class for exceptions.""" + """Base class for exceptions""" class ReadFileIntoBufferError(Error): @@ -51,72 +52,53 @@ class ReadFileIntoBufferError(Error): filename = sys.argv[2] rx_default_media = r"^\s*(={1,6})([^=].*[^=])\1\s*$" rx_markdown = r"^\s*(#{1,6})([^#].*)$" -rx_fenced_code = r"^```[^\r\n]*[a-z]*(?:\n(?!```$).*)*\n```" +rx_fenced_code = r"^```[^\r\n]*[a-z]*$(?:\n(?!^```).*)*\n^```" rx_header = None if syntax in ("default", "media"): rx_header = re.compile(rx_default_media) elif syntax == "markdown": + comp_rx_fcode = re.compile(rx_fenced_code, flags=re.MULTILINE) rx_header = re.compile(rx_markdown) else: rx_header = re.compile(rx_default_media + "|" + rx_markdown) try: with open(filename, 'r') as buffer: - file_content = buffer.readlines() + if syntax == "markdown": + file_content = buffer.read() + sub_rx_fcode = comp_rx_fcode.sub("", file_content) + file_content = sub_rx_fcode.split("\n") + else: + file_content = buffer.readlines() except ReadFileIntoBufferError: print("Failed to open file") exit() - -def fenced_code_sentinel(filename): - """detect fenced code zones""" - # fenced code toggle init - fct = 0 - - # fct gen - for line in file_content: - if syntax == "markdown": - fct_d = fct - - if fct_d == fct and re.match(r"```", line): - fct = 1 - fct_d - if re.match(r"```", line): - yield 1 - else: - yield fct_d - else: - yield fct - - -fcd = fenced_code_sentinel(filename) state = [""]*6 for lnum, line in enumerate(file_content): - fcd_toggle = next(fcd) - - if fcd_toggle == 0: - match_header = rx_header.match(line) + match_header = rx_header.match(line) - if not match_header: - continue + if not match_header: + continue - match_lvl = match_header.group(1) or match_header.group(3) - match_tag = match_header.group(2) or match_header.group(4) + match_lvl = match_header.group(1) or match_header.group(3) + match_tag = match_header.group(2) or match_header.group(4) - cur_lvl = len(match_lvl) - cur_tag = match_tag.strip() - cur_searchterm = "^" + match_header.group(0).rstrip("\r\n") + "$" - cur_kind = "h" + cur_lvl = len(match_lvl) + cur_tag = match_tag.strip() + cur_searchterm = "^" + match_header.group(0).rstrip("\r\n") + "$" + cur_kind = "h" - state[cur_lvl-1] = cur_tag - for i in range(cur_lvl, 6): - state[i] = "" + state[cur_lvl-1] = cur_tag + for i in range(cur_lvl, 6): + state[i] = "" - scope = "&&&".join( - [state[i] for i in range(0, cur_lvl-1) if state[i] != ""]) - if scope: - scope = "\theader:" + scope + scope = "&&&".join( + [state[i] for i in range(0, cur_lvl-1) if state[i] != ""]) + if scope: + scope = "\theader:" + scope - print('{0}\t{1}\t/{2}/;"\t{3}\tline:{4}{5}'.format( - cur_tag, filename, cur_searchterm, cur_kind, str(lnum+1), scope)) + print('{0}\t{1}\t/{2}/;"\t{3}\tline:{4}{5}'.format( + cur_tag, filename, cur_searchterm, cur_kind, str(lnum+1), scope))