Skip to content

Commit

Permalink
Add Tree-sitter support for advanced highlighting
Browse files Browse the repository at this point in the history
Fixes #398

Add Tree-sitter support for advanced syntax highlighting for Python files.

* **New File**: `src/biscuit/editor/text/treesitter.py`
  - Implement Tree-sitter highlighter for Python files.
  - Load language library and extract highlight information from the syntax tree.

* **Update**: `src/biscuit/editor/text/highlighter.py`
  - Import and initialize Tree-sitter highlighter for Python files.
  - Add methods to highlight text content using Tree-sitter and Pygments as a fallback.

* **Update**: `src/biscuit/editor/editor.py`
  - Import the Highlighter class.

* **Update**: `README.md`
  - Add documentation for Tree-sitter highlighter and Pygments fallback.

---

For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/tomlin7/biscuit/issues/398?shareId=XXXX-XXXX-XXXX-XXXX).
  • Loading branch information
tomlin7 committed Nov 4, 2024
1 parent 899c7da commit 243e49e
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 13 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ More providers can be added with extensions, see [ollama extension](https://gith

![image](https://github.com/user-attachments/assets/73a86fb6-89f8-4cd9-8552-5c1fb9c2e3b0)

### Syntax Highlighting

- Tree-sitter highlighter for Python files
- Pygments as a fallback highlighter for other file types

## `License`

Expand Down
1 change: 1 addition & 0 deletions src/biscuit/editor/editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from .breadcrumbs.breadcrumbs import BreadCrumbs
from .editorbase import BaseEditor
from .text.highlighter import Highlighter # Import the Highlighter class


class Editor(BaseEditor):
Expand Down
45 changes: 32 additions & 13 deletions src/biscuit/editor/text/highlighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from pygments.lexers import get_lexer_by_name, get_lexer_for_filename
from pygments.style import Style

from .treesitter import TreeSitterHighlighter

if typing.TYPE_CHECKING:
from biscuit import App

Expand Down Expand Up @@ -75,6 +77,11 @@ def __init__(self, text: Text, language: str = None, *args, **kwargs) -> None:
self.tag_colors = self.base.theme.syntax
self.setup_highlight_tags()

# Initialize Tree-sitter highlighter for Python
self.tree_sitter_highlighter = None
if self.language == "python":
self.tree_sitter_highlighter = TreeSitterHighlighter(language)

def detect_language(self) -> None:
"""Detect the language from the file extension and set the lexer
Refreshes language attribute of the text instance."""
Expand Down Expand Up @@ -117,6 +124,11 @@ def change_language(self, language: str) -> None:
self.text.master.on_change()
self.base.statusbar.on_open_file(self.text)

# Initialize Tree-sitter highlighter for Python
self.tree_sitter_highlighter = None
if self.language == "python":
self.tree_sitter_highlighter = TreeSitterHighlighter(language)

def setup_highlight_tags(self) -> None:
"""Setup the tags for highlighting the text content"""

Expand Down Expand Up @@ -145,6 +157,26 @@ def highlight(self) -> None:
TODO: As of now, it highlights the entire text content.
It needs to be optimized to highlight only the visible area."""

if self.tree_sitter_highlighter:
self.highlight_with_tree_sitter()
else:
self.highlight_with_pygments()

def highlight_with_tree_sitter(self) -> None:
"""Highlight the text content using Tree-sitter"""

self.clear()
text = self.text.get_all_text()
highlight_info = self.tree_sitter_highlighter.highlight(text)

for info in highlight_info:
start_index = self.text.index(f"1.0 + {info['start_byte']}c")
end_index = self.text.index(f"1.0 + {info['end_byte']}c")
self.text.tag_add(info["type"], start_index, end_index)

def highlight_with_pygments(self) -> None:
"""Highlight the text content using Pygments"""

if not self.lexer or not self.tag_colors:
return

Expand All @@ -153,21 +185,8 @@ def highlight(self) -> None:

text = self.text.get_all_text()

# NOTE: Highlighting only visible area
# total_lines = int(self.text.index('end-1c').split('.')[0])
# start_line = int(self.text.yview()[0] * total_lines)
# first_visible_index = f"{start_line}.0"
# last_visible_index =f"{self.text.winfo_height()}.end"
# for token, _ in self.tag_colors.items():
# self.text.tag_remove(str(token), first_visible_index, last_visible_index)
# text = self.text.get(first_visible_index, last_visible_index)

self.text.mark_set("range_start", "1.0")
for token, content in lex(text, self.lexer):
self.text.mark_set("range_end", f"range_start + {len(content)}c")
self.text.tag_add(str(token), "range_start", "range_end")
self.text.mark_set("range_start", "range_end")

# DEBUG
# print(f"{content} is recognized as a <{str(token)}>")
# print("==================================")
35 changes: 35 additions & 0 deletions src/biscuit/editor/text/treesitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import tree_sitter
from tree_sitter import Language, Parser

class TreeSitterHighlighter:
def __init__(self, language: str):
self.language = language
self.parser = Parser()
self.language_library = self.load_language_library(language)
self.parser.set_language(self.language_library)

def load_language_library(self, language: str):
# Load the language library for the given language
if language == "python":
return Language('build/my-languages.so', 'python')
else:
raise ValueError(f"Unsupported language: {language}")

def highlight(self, code: str):
tree = self.parser.parse(bytes(code, "utf8"))
root_node = tree.root_node
return self.extract_highlight_info(root_node)

def extract_highlight_info(self, node):
# Extract highlight information from the syntax tree
highlight_info = []
for child in node.children:
highlight_info.append({
"type": child.type,
"start_byte": child.start_byte,
"end_byte": child.end_byte,
"start_point": child.start_point,
"end_point": child.end_point
})
highlight_info.extend(self.extract_highlight_info(child))
return highlight_info

0 comments on commit 243e49e

Please sign in to comment.