Skip to content

Commit 0c48023

Browse files
add to_html function for superaggregator
1 parent cca263b commit 0c48023

File tree

4 files changed

+83
-51
lines changed

4 files changed

+83
-51
lines changed

README.md

Lines changed: 55 additions & 47 deletions
Large diffs are not rendered by default.

docs/reference_links.png

52.2 KB
Loading

langpipe/lpsuperaggregator.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
import json
2+
import re
23
from ollama import generate
34
from .lpnode import LPNode, LPNodeType
45

56
class LPSuperAggregator(LPNode):
67
"""
78
data aggregation using LLM based on Ollama, which able to list reference sources from context.
89
access `lpdata['global_vars']['aggregated_data']` for the aggregated data with json format.
10+
11+
NOTE: this node is also able to convert aggregated data into html format with reference links clickable, which can be used to render the data into a web page directly.
912
"""
10-
def __init__(self, name, aggregate_desc=None, model='minicpm-v:8b') -> None:
13+
def __init__(self, name, aggregate_desc=None, to_html=False, model='minicpm-v:8b') -> None:
1114
super().__init__(name, LPNodeType.LLM, model)
1215
self.__aggregate_desc = aggregate_desc
16+
self.__to_html = to_html
1317
self.__aggregated_data = None
1418
self.__aggregate_prompt_template = """
1519
你是一个强大的智能信息聚合助手(Aggregator),擅长根据上下文信息,结合自己的理解、生成有引用标注的回答。
@@ -77,5 +81,25 @@ def _after_handle(self, lpdata) -> None:
7781
record['local_vars']['__aggregate_desc'] = self.__aggregate_desc
7882

7983
# update global variables
84+
if self.__to_html:
85+
self.__aggregated_data = self.__convert_html() # convert to html with reference links clickable
8086
lpdata['final_out'] = self.__aggregated_data
8187
lpdata['global_vars']['aggregated_data'] = json.loads(self.__aggregated_data)
88+
89+
def __convert_html(self) -> str:
90+
data = json.loads(self.__aggregated_data)
91+
content = data["content"]
92+
links = data["references"]
93+
94+
def replace(match):
95+
num = int(match.group(1))
96+
index = num - 1
97+
if 0 <= index < len(links):
98+
url = links[index]
99+
return f'<a href="{url}" target="_blank">[{num}]</a>'
100+
else:
101+
return match.group(0)
102+
103+
html_content = re.sub(r'\[(\d+)\]', replace, content)
104+
data['content'] = html_content
105+
return json.dumps(data, indent=4, ensure_ascii=False)

tests/13-search_engine_rag_references.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
# create nodes
2626
begin = langpipe.LPBegin('begin_node')
2727
classifier = langpipe.LPClassifier('classifier', labels_desc)
28-
bocha_search = sample_nodes.LPBoChaSearch('bocha_search', 'sk-604b3529ecba4deab76ff3e5b6c98b85') # replace with your own api key
29-
aggregator = langpipe.LPSuperAggregator('aggregator', None, 'qwen2.5:7b') # including reference sources
28+
bocha_search = sample_nodes.LPBoChaSearch('bocha_search', 'sk-***') # replace with your own api key
29+
aggregator = langpipe.LPSuperAggregator('aggregator', None, True, 'qwen2.5:7b') # including reference sources
3030
end0 = langpipe.LPEnd('end_node_0') # 正常问题 结束分支
3131
end1 = langpipe.LPEnd('end_node_1') # 敏感问题 结束分支
3232

@@ -37,7 +37,7 @@
3737
aggregator.link([end0])
3838

3939
# input what you want to
40-
begin.input(query3, None, False)
40+
begin.input(query5, None, False)
4141

4242
# visualize the pipeline with data flow
4343
print('-----board for debug purpose-----')

0 commit comments

Comments
 (0)