|
1 | 1 | import json
|
| 2 | +import re |
2 | 3 | from ollama import generate
|
3 | 4 | from .lpnode import LPNode, LPNodeType
|
4 | 5 |
|
5 | 6 | class LPSuperAggregator(LPNode):
|
6 | 7 | """
|
7 | 8 | data aggregation using LLM based on Ollama, which able to list reference sources from context.
|
8 | 9 | access `lpdata['global_vars']['aggregated_data']` for the aggregated data with json format.
|
| 10 | +
|
| 11 | + NOTE: this node is also able to convert aggregated data into html format with reference links clickable, which can be used to render the data into a web page directly. |
9 | 12 | """
|
10 |
| - def __init__(self, name, aggregate_desc=None, model='minicpm-v:8b') -> None: |
| 13 | + def __init__(self, name, aggregate_desc=None, to_html=False, model='minicpm-v:8b') -> None: |
11 | 14 | super().__init__(name, LPNodeType.LLM, model)
|
12 | 15 | self.__aggregate_desc = aggregate_desc
|
| 16 | + self.__to_html = to_html |
13 | 17 | self.__aggregated_data = None
|
14 | 18 | self.__aggregate_prompt_template = """
|
15 | 19 | 你是一个强大的智能信息聚合助手(Aggregator),擅长根据上下文信息,结合自己的理解、生成有引用标注的回答。
|
@@ -77,5 +81,25 @@ def _after_handle(self, lpdata) -> None:
|
77 | 81 | record['local_vars']['__aggregate_desc'] = self.__aggregate_desc
|
78 | 82 |
|
79 | 83 | # update global variables
|
| 84 | + if self.__to_html: |
| 85 | + self.__aggregated_data = self.__convert_html() # convert to html with reference links clickable |
80 | 86 | lpdata['final_out'] = self.__aggregated_data
|
81 | 87 | lpdata['global_vars']['aggregated_data'] = json.loads(self.__aggregated_data)
|
| 88 | + |
| 89 | + def __convert_html(self) -> str: |
| 90 | + data = json.loads(self.__aggregated_data) |
| 91 | + content = data["content"] |
| 92 | + links = data["references"] |
| 93 | + |
| 94 | + def replace(match): |
| 95 | + num = int(match.group(1)) |
| 96 | + index = num - 1 |
| 97 | + if 0 <= index < len(links): |
| 98 | + url = links[index] |
| 99 | + return f'<a href="{url}" target="_blank">[{num}]</a>' |
| 100 | + else: |
| 101 | + return match.group(0) |
| 102 | + |
| 103 | + html_content = re.sub(r'\[(\d+)\]', replace, content) |
| 104 | + data['content'] = html_content |
| 105 | + return json.dumps(data, indent=4, ensure_ascii=False) |
0 commit comments