sherlockchou86
diff --git a/‎README.md
Lines changed: 55 additions & 47 deletions b/‎README.md
Lines changed: 55 additions & 47 deletions
diff --git a/‎docs/reference_links.png
52.2 KB b/‎docs/reference_links.png
52.2 KB
diff --git a/‎langpipe/lpsuperaggregator.py
Lines changed: 25 additions & 1 deletion b/‎langpipe/lpsuperaggregator.py
Lines changed: 25 additions & 1 deletion
diff --git a/‎tests/13-search_engine_rag_references.py
Lines changed: 3 additions & 3 deletions b/‎tests/13-search_engine_rag_references.py
Lines changed: 3 additions & 3 deletions
@@ -1,15 +1,19 @@
 import json
+import re
 from ollama import generate
 from .lpnode import LPNode, LPNodeType
 
 class LPSuperAggregator(LPNode):
     """
     data aggregation using LLM based on Ollama, which able to list reference sources from context.
     access `lpdata['global_vars']['aggregated_data']` for the aggregated data with json format.
+
+    NOTE: this node is also able to convert aggregated data into html format with reference links clickable, which can be used to render the data into a web page directly.
     """
-    def __init__(self, name, aggregate_desc=None, model='minicpm-v:8b') -> None:
+    def __init__(self, name, aggregate_desc=None, to_html=False, model='minicpm-v:8b') -> None:
         super().__init__(name, LPNodeType.LLM, model)
         self.__aggregate_desc = aggregate_desc
+        self.__to_html = to_html
         self.__aggregated_data = None
         self.__aggregate_prompt_template = """
         你是一个强大的智能信息聚合助手（Aggregator），擅长根据上下文信息，结合自己的理解、生成有引用标注的回答。
@@ -77,5 +81,25 @@ def _after_handle(self, lpdata) -> None:
         record['local_vars']['__aggregate_desc'] = self.__aggregate_desc
 
         # update global variables
+        if self.__to_html:
+            self.__aggregated_data = self.__convert_html()   # convert to html with reference links clickable 
         lpdata['final_out'] = self.__aggregated_data
         lpdata['global_vars']['aggregated_data'] = json.loads(self.__aggregated_data)
+    
+    def __convert_html(self) -> str:
+        data = json.loads(self.__aggregated_data)
+        content = data["content"]
+        links = data["references"]
+
+        def replace(match):
+            num = int(match.group(1))
+            index = num - 1
+            if 0 <= index < len(links):
+                url = links[index]
+                return f'<a href="{url}" target="_blank">[{num}]</a>'
+            else:
+                return match.group(0)
+
+        html_content = re.sub(r'\[(\d+)\]', replace, content)
+        data['content'] = html_content
+        return json.dumps(data, indent=4, ensure_ascii=False)
@@ -25,8 +25,8 @@
 # create nodes
 begin = langpipe.LPBegin('begin_node')
 classifier = langpipe.LPClassifier('classifier', labels_desc)
-bocha_search = sample_nodes.LPBoChaSearch('bocha_search', 'sk-604b3529ecba4deab76ff3e5b6c98b85') # replace with your own api key
-aggregator = langpipe.LPSuperAggregator('aggregator', None, 'qwen2.5:7b')  # including reference sources
+bocha_search = sample_nodes.LPBoChaSearch('bocha_search', 'sk-***')              # replace with your own api key
+aggregator = langpipe.LPSuperAggregator('aggregator', None, True, 'qwen2.5:7b')  # including reference sources
 end0 = langpipe.LPEnd('end_node_0')  # 正常问题 结束分支
 end1 = langpipe.LPEnd('end_node_1')  # 敏感问题 结束分支
 
@@ -37,7 +37,7 @@
 aggregator.link([end0])
 
 # input what you want to
-begin.input(query3, None, False)
+begin.input(query5, None, False)
 
 # visualize the pipeline with data flow
 print('-----board for debug purpose-----')