-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmllm_task_file.py
95 lines (80 loc) · 3.13 KB
/
mllm_task_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from dataclasses import dataclass
from typing import List, Optional
import re
@dataclass
class Item:
indent: int
task_id: Optional[str]
content: List[str]
def parse_text(text: str) -> List[Item]:
"""
Parse the custom task file format into a list of Items.
"""
lines = text.splitlines()
items: List[Item] = []
# Holds the item being built as we parse; None if we aren't in an item yet.
current_item = None
# Regex to detect a new item line:
# Capture leading spaces in group(1), optional "[task_id]" in group(2), and the remainder in group(3).
# Example match: " - [12] wash stuff"
# group(1) -> " "
# group(2) -> "12"
# group(3) -> "wash stuff"
item_pattern = re.compile(
r'^(\s*)-\s*(?:\[([0-9A-Fa-f]+)\]\s*)?(.*)$'
)
for line in lines:
match = item_pattern.match(line)
if match:
# We have a new item starting on this line.
# First, commit the previous item if there is one
if current_item is not None:
items.append(current_item)
indent_str, task_id, rest = match.groups()
# Prepare a new current_item
current_item = Item(
indent=len(indent_str),
task_id=task_id, # may be None if not present
content=[rest] if rest else []
)
else:
# This line is part of the current item's content (if any).
if current_item is None:
# If we hit a line that doesn't match new-item syntax
# but we don't have a current item, we can either:
# - ignore it,
# - or create an item with no dash,
# but that doesn't really fit the grammar, so let's append it
# to a "dummy" item or just start a new item with no dash.
# For simplicity, let's create a "dummy" item with indent=0.
current_item = Item(indent=0, task_id=None, content=[])
current_item.content.append(line)
# End of file: commit the last item
if current_item is not None:
items.append(current_item)
return items
def unparse_items(items: List[Item]) -> str:
"""
Convert a list of Items back into the custom-format text.
"""
lines = []
for item in items:
# Start with indentation + '-'
prefix = ' ' * item.indent + '-'
# If we have a task_id, include it
if item.task_id is not None:
prefix += f' [{item.task_id}]'
if item.content:
# Put the first content line on the same line, separated by a space
line0 = prefix
# If the first content line isn't empty, add a space first
if item.content[0]:
line0 += " " + item.content[0]
lines.append(line0)
# The rest of the content lines each go on its own line as-is
for more_line in item.content[1:]:
lines.append(more_line)
else:
# If there's no content, just append prefix
lines.append(prefix)
return "\n".join(lines)