Skip to content

Commit 235472b

Browse files
Wang-Daojiyuan.wang
andauthored
fa_bu_hui pref (#534)
Co-authored-by: yuan.wang <[email protected]>
1 parent 39cab8c commit 235472b

File tree

4 files changed

+77
-39
lines changed

4 files changed

+77
-39
lines changed

src/memos/memories/textual/item.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ class PreferenceTextualMemoryMetadata(TextualMemoryMetadata):
199199
preference: str | None = Field(default=None, description="Preference.")
200200
created_at: str | None = Field(default=None, description="Timestamp of the dialog.")
201201
mem_cube_id: str | None = Field(default=None, description="ID of the MemCube.")
202+
score: float | None = Field(default=None, description="Score of the retrieval result.")
202203

203204

204205
class TextualMemoryItem(BaseModel):

src/memos/memories/textual/prefer_text_memory/extractor.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@ def extract_implicit_preference(self, qa_pair: MessageList | str) -> dict[str, A
9090
response = self.llm_provider.generate([{"role": "user", "content": prompt}])
9191
response = response.strip().replace("```json", "").replace("```", "").strip()
9292
result = json.loads(response)
93-
result["preference"] = result.pop("implicit_preference")
93+
for d in result:
94+
d["preference"] = d.pop("implicit_preference")
9495
return result
9596
except Exception as e:
9697
logger.error(f"Error extracting implicit preferences: {e}, return None")
@@ -136,20 +137,24 @@ def _process_single_chunk_implicit(
136137
if not implicit_pref:
137138
return None
138139

139-
vector_info = {
140-
"embedding": self.embedder.embed([implicit_pref["context_summary"]])[0],
141-
}
140+
memories = []
141+
for pref in implicit_pref:
142+
vector_info = {
143+
"embedding": self.embedder.embed([pref["context_summary"]])[0],
144+
}
142145

143-
extract_info = {**basic_info, **implicit_pref, **vector_info, **info}
146+
extract_info = {**basic_info, **pref, **vector_info, **info}
144147

145-
metadata = PreferenceTextualMemoryMetadata(
146-
type=msg_type, preference_type="implicit_preference", **extract_info
147-
)
148-
memory = TextualMemoryItem(
149-
id=extract_info["dialog_id"], memory=implicit_pref["context_summary"], metadata=metadata
150-
)
148+
metadata = PreferenceTextualMemoryMetadata(
149+
type=msg_type, preference_type="implicit_preference", **extract_info
150+
)
151+
memory = TextualMemoryItem(
152+
id=str(uuid.uuid4()), memory=pref["context_summary"], metadata=metadata
153+
)
151154

152-
return memory
155+
memories.append(memory)
156+
157+
return memories
153158

154159
def extract(
155160
self,

src/memos/memories/textual/prefer_text_memory/retrievers.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import os
2+
13
from abc import ABC, abstractmethod
24
from typing import Any
35

@@ -34,9 +36,12 @@ def _naive_reranker(
3436
self, query: str, prefs_mem: list[TextualMemoryItem], top_k: int, **kwargs: Any
3537
) -> list[TextualMemoryItem]:
3638
if self.reranker:
37-
prefs_mem = self.reranker.rerank(query, prefs_mem, top_k)
38-
return [item for item, _ in prefs_mem]
39-
return prefs_mem
39+
prefs_mem_reranked = []
40+
prefs_mem_tuple = self.reranker.rerank(query, prefs_mem, top_k)
41+
for item, score in prefs_mem_tuple:
42+
item.metadata.score = score
43+
prefs_mem_reranked.append(item)
44+
return prefs_mem_reranked
4045

4146
def _original_text_reranker(
4247
self,
@@ -52,11 +57,22 @@ def _original_text_reranker(
5257
prefs_mem_for_reranker = deepcopy(prefs_mem)
5358
for pref_mem, pref in zip(prefs_mem_for_reranker, prefs, strict=False):
5459
pref_mem.memory = pref_mem.memory + "\n" + pref.original_text
55-
prefs_mem_for_reranker = self.reranker.rerank(query, prefs_mem_for_reranker, top_k)
56-
prefs_mem_for_reranker = [item for item, _ in prefs_mem_for_reranker]
60+
reranked_results = self.reranker.rerank(query, prefs_mem_for_reranker, top_k)
61+
prefs_mem_for_reranker = [item for item, _ in reranked_results]
5762
prefs_ids = [item.id for item in prefs_mem_for_reranker]
5863
prefs_dict = {item.id: item for item in prefs_mem}
59-
return [prefs_dict[item_id] for item_id in prefs_ids if item_id in prefs_dict]
64+
65+
# Create mapping from id to score from reranked results
66+
reranked_scores = {item.id: score for item, score in reranked_results}
67+
68+
# Assign scores to the original items
69+
result_items = []
70+
for item_id in prefs_ids:
71+
if item_id in prefs_dict:
72+
original_item = prefs_dict[item_id]
73+
original_item.metadata.score = reranked_scores.get(item_id)
74+
result_items.append(original_item)
75+
return result_items
6076
return prefs_mem
6177

6278
def retrieve(
@@ -119,9 +135,6 @@ def retrieve(
119135
if pref.payload.get("preference", None)
120136
]
121137

122-
# store explicit id and score, use it after reranker
123-
explicit_id_scores = {item.id: item.score for item in explicit_prefs}
124-
125138
reranker_map = {
126139
"naive": self._naive_reranker,
127140
"original_text": self._original_text_reranker,
@@ -136,7 +149,14 @@ def retrieve(
136149

137150
# filter explicit mem by score bigger than threshold
138151
explicit_prefs_mem = [
139-
item for item in explicit_prefs_mem if explicit_id_scores.get(item.id, 0) >= 0.0
152+
item
153+
for item in explicit_prefs_mem
154+
if item.metadata.score >= float(os.getenv("PREFERENCE_SEARCH_THRESHOLD", 0.0))
155+
]
156+
implicit_prefs_mem = [
157+
item
158+
for item in implicit_prefs_mem
159+
if item.metadata.score >= float(os.getenv("PREFERENCE_SEARCH_THRESHOLD", 0.0))
140160
]
141161

142162
return explicit_prefs_mem + implicit_prefs_mem

src/memos/templates/prefer_complete_prompt.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
Requirements:
1212
1. Keep only the preferences explicitly mentioned by the user. Do not infer or assume. If the user mentions reasons for their preferences, include those reasons as well.
1313
2. Output should be a list of entries concise natural language summaries and the corresponding context summary, context summary must contain complete information of the conversation fragment that the preference is mentioned.
14-
3. If multiple preferences are mentioned within the same topic or domain, you MUST combine them into a single entry, keep each entry information complete.
14+
3. If multiple preferences are mentioned within the same topic or domain, you MUST combine them into a single entry, keep each entry information complete. Different topics of preferences should be divided into multiple entries.
15+
4. If no explicit preference can be reasonably extracted, return [].
1516
1617
Conversation:
1718
{qa_pair}
@@ -23,6 +24,7 @@
2324
"explicit_preference": "A short natural language summary of the preferences",
2425
"context_summary": "The corresponding context summary, which is a summary of the corresponding conversation, do not lack any scenario information",
2526
"reasoning": "reasoning process to find the explicit preferences"
27+
"topic": "preference topic, which can only belong to one topic or domain, such as: sports, hotel, education, etc.",
2628
},
2729
]
2830
```
@@ -42,7 +44,8 @@
4244
要求:
4345
1. 只保留用户明确提到的偏好,不要推断或假设。如果用户提到了偏好的原因,也要包含这些原因。
4446
2. 输出应该是一个条目列表,包含简洁的自然语言摘要和相应的上下文摘要,上下文摘要必须包含提到偏好的对话片段的完整信息。
45-
3. 如果在同一主题或领域内提到了多个偏好,你必须将它们合并为一个条目,保持每个条目信息完整。
47+
3. 如果在同一主题或领域内提到了多个偏好,你必须将它们合并为一个条目,保持每个条目信息完整。不同话题的偏好要分为多个条目。
48+
4. 如果没有可以合理提取的显式偏好,返回[]。
4649
4750
对话:
4851
{qa_pair}
@@ -51,9 +54,10 @@
5154
```json
5255
[
5356
{
54-
"explicit_preference": "偏好的简短自然语言摘要",
57+
"explicit_preference": "偏好的简短自然语言摘要,需要描述为“用户偏好于/不喜欢/想要/不想要/偏好什么”",
5558
"context_summary": "对应的上下文摘要,即对应对话的摘要,不要遗漏任何场景信息",
56-
"reasoning": "寻找显式偏好的推理过程"
59+
"reasoning": "寻找显式偏好的推理过程",
60+
"topic": "偏好所属的主题或领域,例如:体育、酒店、教育等, topic只能属于一个主题或领域",
5761
},
5862
]
5963
```
@@ -79,18 +83,22 @@
7983
2. Inferred implicit preferences must not conflict with explicit preferences.
8084
3. For implicit_preference: only output the preference statement itself; do not include any extra explanation, reasoning, or confidence information. Put all reasoning and explanation in the reasoning field.
8185
4. In the reasoning field, explicitly explain the underlying logic and hidden motivations you identified.
82-
5. If no implicit preference can be reasonably inferred, leave the implicit_preference field empty (do not output anything else).
86+
5. Different topics of preferences should be divided into multiple entries.
87+
6. If no implicit preference can be reasonably inferred, return [].
8388
8489
Conversation:
8590
{qa_pair}
8691
8792
Output format:
88-
```json
89-
{
90-
"implicit_preference": "A concise natural language statement of the implicit preferences reasonably inferred from the conversation, or an empty string",
91-
"context_summary": "The corresponding context summary, which is a summary of the corresponding conversation, do not lack any scenario information",
92-
"reasoning": "Explain the underlying logic, hidden motivations, and behavioral patterns that led to this inference"
93-
}
93+
[
94+
```json
95+
{
96+
"implicit_preference": "A concise natural language statement of the implicit preferences reasonably inferred from the conversation, or an empty string",
97+
"context_summary": "The corresponding context summary, which is a summary of the corresponding conversation, do not lack any scenario information",
98+
"reasoning": "Explain the underlying logic, hidden motivations, and behavioral patterns that led to this inference",
99+
"topic": "preference topic, which can only belong to one topic or domain, such as: sports, hotel, education, etc.",
100+
}
101+
]
94102
```
95103
Don't output anything except the JSON.
96104
"""
@@ -115,18 +123,22 @@
115123
2. 推断的隐式偏好不得与显式偏好冲突。
116124
3. 对于 implicit_preference:仅输出偏好陈述本身;不要包含任何额外的解释、推理或置信度信息。将所有推理和解释放在 reasoning 字段中。
117125
4. 在 reasoning 字段中,明确解释你识别出的底层逻辑和隐藏动机。
118-
5. 如果无法合理推断出隐式偏好,则将 implicit_preference 字段留空(不要输出其他任何内容)。
126+
5. 如果在同一主题或领域内提到了多个偏好,你必须将它们合并为一个条目,保持每个条目信息完整。不同话题的偏好要分为多个条目。
127+
6. 如果没有可以合理推断的隐式偏好,返回[]。
119128
120129
对话:
121130
{qa_pair}
122131
123132
输出格式:
124133
```json
125-
{
126-
"implicit_preference": "从对话中合理推断出的隐式偏好的简洁自然语言陈述,或空字符串",
127-
"context_summary": "对应的上下文摘要,即对应对话的摘要,不要遗漏任何场景信息",
128-
"reasoning": "解释推断出该偏好的底层逻辑、隐藏动机和行为模式"
129-
}
134+
[
135+
{
136+
"implicit_preference": "从对话中合理推断出的隐式偏好的简洁自然语言陈述,或空字符串",
137+
"context_summary": "对应的上下文摘要,即对应对话的摘要,不要遗漏任何场景信息",
138+
"reasoning": "解释推断出该偏好的底层逻辑、隐藏动机和行为模式",
139+
"topic": "偏好所属的主题或领域,例如:体育、酒店、教育等, topic只能属于一个主题或领域",
140+
}
141+
]
130142
```
131143
除JSON外不要输出任何其他内容。
132144
"""

0 commit comments

Comments
 (0)