Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Tree-sitter support for advanced highlighting #455

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ More providers can be added with extensions, see [ollama extension](https://gith

![image](https://github.com/user-attachments/assets/73a86fb6-89f8-4cd9-8552-5c1fb9c2e3b0)

### Syntax Highlighting

- Tree-sitter highlighter for Python files
- Pygments as a fallback highlighter for other file types

## `License`

Expand Down
1 change: 1 addition & 0 deletions src/biscuit/editor/editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from .breadcrumbs.breadcrumbs import BreadCrumbs
from .editorbase import BaseEditor
from .text.highlighter import Highlighter # Import the Highlighter class


class Editor(BaseEditor):
Expand Down
45 changes: 32 additions & 13 deletions src/biscuit/editor/text/highlighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from pygments.lexers import get_lexer_by_name, get_lexer_for_filename
from pygments.style import Style

from .treesitter import TreeSitterHighlighter

if typing.TYPE_CHECKING:
from biscuit import App

Expand Down Expand Up @@ -75,6 +77,11 @@ def __init__(self, text: Text, language: str = None, *args, **kwargs) -> None:
self.tag_colors = self.base.theme.syntax
self.setup_highlight_tags()

# Initialize Tree-sitter highlighter for Python
self.tree_sitter_highlighter = None
if self.language == "python":
self.tree_sitter_highlighter = TreeSitterHighlighter(language)

def detect_language(self) -> None:
"""Detect the language from the file extension and set the lexer
Refreshes language attribute of the text instance."""
Expand Down Expand Up @@ -117,6 +124,11 @@ def change_language(self, language: str) -> None:
self.text.master.on_change()
self.base.statusbar.on_open_file(self.text)

# Initialize Tree-sitter highlighter for Python
self.tree_sitter_highlighter = None
if self.language == "python":
self.tree_sitter_highlighter = TreeSitterHighlighter(language)

def setup_highlight_tags(self) -> None:
"""Setup the tags for highlighting the text content"""

Expand Down Expand Up @@ -145,6 +157,26 @@ def highlight(self) -> None:
TODO: As of now, it highlights the entire text content.
It needs to be optimized to highlight only the visible area."""

if self.tree_sitter_highlighter:
self.highlight_with_tree_sitter()
else:
self.highlight_with_pygments()

def highlight_with_tree_sitter(self) -> None:
"""Highlight the text content using Tree-sitter"""

self.clear()
text = self.text.get_all_text()
highlight_info = self.tree_sitter_highlighter.highlight(text)

for info in highlight_info:
start_index = self.text.index(f"1.0 + {info['start_byte']}c")
end_index = self.text.index(f"1.0 + {info['end_byte']}c")
self.text.tag_add(info["type"], start_index, end_index)

def highlight_with_pygments(self) -> None:
"""Highlight the text content using Pygments"""

if not self.lexer or not self.tag_colors:
return

Expand All @@ -153,21 +185,8 @@ def highlight(self) -> None:

text = self.text.get_all_text()

# NOTE: Highlighting only visible area
# total_lines = int(self.text.index('end-1c').split('.')[0])
# start_line = int(self.text.yview()[0] * total_lines)
# first_visible_index = f"{start_line}.0"
# last_visible_index =f"{self.text.winfo_height()}.end"
# for token, _ in self.tag_colors.items():
# self.text.tag_remove(str(token), first_visible_index, last_visible_index)
# text = self.text.get(first_visible_index, last_visible_index)

self.text.mark_set("range_start", "1.0")
for token, content in lex(text, self.lexer):
self.text.mark_set("range_end", f"range_start + {len(content)}c")
self.text.tag_add(str(token), "range_start", "range_end")
self.text.mark_set("range_start", "range_end")

# DEBUG
# print(f"{content} is recognized as a <{str(token)}>")
# print("==================================")
35 changes: 35 additions & 0 deletions src/biscuit/editor/text/treesitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import tree_sitter
from tree_sitter import Language, Parser

class TreeSitterHighlighter:
def __init__(self, language: str):
self.language = language
self.parser = Parser()
self.language_library = self.load_language_library(language)
self.parser.set_language(self.language_library)

def load_language_library(self, language: str):
# Load the language library for the given language
if language == "python":
return Language('build/my-languages.so', 'python')
else:
raise ValueError(f"Unsupported language: {language}")

def highlight(self, code: str):
tree = self.parser.parse(bytes(code, "utf8"))
root_node = tree.root_node
return self.extract_highlight_info(root_node)

def extract_highlight_info(self, node):
# Extract highlight information from the syntax tree
highlight_info = []
for child in node.children:
highlight_info.append({
"type": child.type,
"start_byte": child.start_byte,
"end_byte": child.end_byte,
"start_point": child.start_point,
"end_point": child.end_point
})
highlight_info.extend(self.extract_highlight_info(child))
return highlight_info
Loading