-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add class that treats Codex as a backup #11
base: main
Are you sure you want to change the base?
Changes from 2 commits
3acb048
74755d2
f7f8156
d20d31c
a223b04
f15424c
23eeb58
9e8690c
d0ad8df
b4bff54
038a475
5fbb48e
3892b52
22253e9
e4bdf2c
e5a6164
807d7fa
00def49
d8a6e86
2630a2c
3286674
0ebd4fe
4eca7d3
c59cec5
6026179
a94ffb5
2510255
b439113
38666de
a5d655b
e776dfe
7866f0c
26adbf1
36f80e9
febbfd0
dc1d003
739ffc6
3e4864a
81cc934
9e91e9b
c5843c9
49f9a9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
# SPDX-License-Identifier: MIT | ||
from cleanlab_codex.codex import Codex | ||
from cleanlab_codex.codex_backup import CodexBackup | ||
from cleanlab_codex.codex_tool import CodexTool | ||
|
||
__all__ = ["Codex", "CodexTool"] | ||
__all__ = ["Codex", "CodexTool", "CodexBackup"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
from __future__ import annotations | ||
|
||
from functools import wraps | ||
from typing import Any, Callable, Optional | ||
|
||
from cleanlab_codex.codex import Codex | ||
from cleanlab_codex.utils.response_validators import is_bad_response | ||
|
||
|
||
def handle_backup_default(backup_response: str, decorated_instance: Any) -> None: # noqa: ARG001 | ||
"""Default implementation is a no-op.""" | ||
return None | ||
|
||
|
||
class CodexBackup: | ||
"""A backup decorator that connects to a Codex project to answer questions that | ||
cannot be adequately answered by the existing agent. | ||
""" | ||
|
||
DEFAULT_FALLBACK_ANSWER = "Based on the available information, I cannot provide a complete answer to this question." | ||
|
||
def __init__( | ||
self, | ||
codex_client: Codex, | ||
*, | ||
project_id: Optional[str] = None, | ||
fallback_answer: Optional[str] = DEFAULT_FALLBACK_ANSWER, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Related to Angela's comment below: rather than having the user supply a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a bit more complicated than that at the moment. But we can consider that option once the validation.py module is finalized (where we have an |
||
backup_handler: Callable[[str, Any], None] = handle_backup_default, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not very clear from the current documentation what the purpose of this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've redone most of the code now. The How can we add a "callback" to Codex-as-Backup so that if Codex responds with an answer, the state of the RAG application changes automatically? The user can define their logic to modify the state of the RAG system in this callable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. However, if the idea is that every RAG application should use Codex-as-Backup within the scope of its query/chat method, then this handler is unnecessary. class RAG:
def chat(self, query, ...) -> Response:
context = run_retrieval(query)
response = llm(query + context)
+
+ codex_response = codex_backup.run(response, query, context, ...)
+ # handle new response yourself
+ # response = ...
return response |
||
): | ||
self._codex_client = codex_client | ||
self._project_id = project_id | ||
self._fallback_answer = fallback_answer | ||
self._backup_handler = backup_handler | ||
|
||
@classmethod | ||
def from_access_key( | ||
cls, | ||
access_key: str, | ||
*, | ||
project_id: Optional[str] = None, | ||
fallback_answer: Optional[str] = DEFAULT_FALLBACK_ANSWER, | ||
backup_handler: Callable[[str, Any], None] = handle_backup_default, | ||
) -> CodexBackup: | ||
"""Creates a CodexBackup from an access key. The project ID that the CodexBackup will use is the one that is associated with the access key.""" | ||
return cls( | ||
codex_client=Codex(key=access_key), | ||
project_id=project_id, | ||
fallback_answer=fallback_answer, | ||
backup_handler=backup_handler, | ||
) | ||
|
||
@classmethod | ||
def from_client( | ||
cls, | ||
codex_client: Codex, | ||
*, | ||
project_id: Optional[str] = None, | ||
fallback_answer: Optional[str] = DEFAULT_FALLBACK_ANSWER, | ||
backup_handler: Callable[[str, Any], None] = handle_backup_default, | ||
) -> CodexBackup: | ||
"""Creates a CodexBackup from a Codex client. | ||
If the Codex client is initialized with a project access key, the CodexBackup will use the project ID that is associated with the access key. | ||
If the Codex client is initialized with a user API key, a project ID must be provided. | ||
""" | ||
return cls( | ||
codex_client=codex_client, | ||
project_id=project_id, | ||
fallback_answer=fallback_answer, | ||
backup_handler=backup_handler, | ||
) | ||
|
||
def to_decorator(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to support ways to integrate Codex as a backup other than using the decorator? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes absolutely. The way developer can always integrate Codex as backup themselves is (ignore function names I just made them up here):
Please make suggestions to this code to make it as clear as possible for developers that they can implement the above pattern themselves, and also to ensure a pleasant experience for them as they implement it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good. Let me think about this a bit and then will make some suggestions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
To ensure this is the case, and remains the case, I think we should have a tutorial that shows this style of integration. Could @elisno create that a well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Elias is working on it here: https://github.com/cleanlab/sandbox/blob/main/ml_alpha/advanced_CodexAsBackup_integrations.ipynb We will ping once it is done, and the helper methods supporting it are done in this PR. At that point, everybody can first review that tutorial and just the relevant parts of this PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing type signature, will fail CI once #13 is merged. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll have to take a closer look at this once we've finalized the code in valitation.py. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I removed this method in favor of |
||
"""Factory that creates a backup decorator using the provided Codex client""" | ||
|
||
def decorator(chat_method): | ||
""" | ||
Decorator for RAG chat methods that adds backup response handling. | ||
|
||
If the original chat method returns an inadequate response, attempts to get | ||
a backup response from Codex. Returns the backup response if available, | ||
otherwise returns the original response. | ||
|
||
Args: | ||
chat_method: Method with signature (self, user_message: str) -> str | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wondering if we could/should make this extensible beyond chat methods matching this exact signature. For example, if using llamaindex, the developer's chat method might return a llamaindex ChatResponse instead of a str. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1, on both the arguments side and return value side. We could make the decorator generic over everything. And if we add such a capability in the library, I think we should have a tutorial that exercises it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good points! Making the decorator more flexible is definitely worth exploring. I’m also thinking about return types and broader extensibility. I've opened a PR for a tutorial where we assume |
||
where 'self' refers to the instance being decorated, not an instance of CodexBackup. | ||
""" | ||
|
||
@wraps(chat_method) | ||
def wrapper(decorated_instance, user_message): | ||
# Call the original chat method | ||
assistant_response = chat_method(decorated_instance, user_message) | ||
|
||
# Return original response if it's adequate | ||
if not is_bad_response(assistant_response): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems to me like this should be something that's configurable when creating a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That was a basic implementation to get started, as we were still deciding on what the "default" checks of the detection step should be. Added more options to configure, but we're still sticking with |
||
return assistant_response | ||
|
||
# Query Codex for a backup response | ||
cache_result = self._codex_client.query(user_message)[0] | ||
if not cache_result: | ||
return assistant_response | ||
|
||
# Handle backup response if handler exists | ||
self._backup_handler( | ||
backup_response=cache_result, | ||
decorated_instance=decorated_instance, | ||
) | ||
return cache_result | ||
|
||
return wrapper | ||
|
||
return decorator |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
""" | ||
This module provides validation functions for checking if an LLM response is inadequate/unhelpful. | ||
The default implementation checks for common fallback phrases, but alternative implementations | ||
are provided below as examples that can be adapted for specific needs. | ||
""" | ||
elisno marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
def is_bad_response(response: str) -> bool: | ||
""" | ||
Default implementation that checks for common fallback phrases from LLM assistants. | ||
|
||
NOTE: YOU SHOULD MODIFY THIS METHOD YOURSELF. | ||
""" | ||
return basic_validator(response) | ||
|
||
|
||
def basic_validator(response: str) -> bool: | ||
"""Basic implementation that checks for common fallback phrases from LLM assistants. | ||
jwmueller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Args: | ||
response: The response from the assistant | ||
|
||
Returns: | ||
bool: True if the response appears to be a fallback/inadequate response | ||
""" | ||
partial_fallback_responses = [ | ||
"Based on the available information", | ||
"I cannot provide a complete answer to this question", | ||
# Add more substrings here to improve the recall of the check | ||
jwmueller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
] | ||
return any( | ||
partial_fallback_response.lower() in response.lower() | ||
for partial_fallback_response in partial_fallback_responses | ||
) | ||
|
||
|
||
# Alternative Implementations | ||
# --------------------------- | ||
# The following implementations are provided as examples and inspiration. | ||
# They should be adapted to your specific needs. | ||
jwmueller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
# Fuzzy String Matching | ||
""" | ||
from thefuzz import fuzz | ||
jwmueller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def fuzzy_match_validator(response: str, fallback_answer: str, threshold: int = 70) -> bool: | ||
partial_ratio = fuzz.partial_ratio(fallback_answer.lower(), response.lower()) | ||
return partial_ratio >= threshold | ||
""" | ||
|
||
# TLM Score Thresholding | ||
""" | ||
from cleanlab_studio import Studio | ||
jwmueller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
studio = Studio("<API_KEY>") | ||
tlm = studio.TLM() | ||
|
||
def tlm_score_validator(response: str, context: str, query: str, tlm: TLM, threshold: float = 0.5) -> bool: | ||
jwmueller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
prompt = f"Context: {context}\n\n Query: {query}\n\n Query: {query}" | ||
resp = tlm.get_trustworthiness_score(prompt, response) | ||
score = resp['trustworthiness_score'] | ||
return score < threshold | ||
""" | ||
|
||
# TLM Binary Classification | ||
""" | ||
from typing import Optional | ||
|
||
from cleanlab_studio import Studio | ||
jwmueller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
studio = Studio("<API_KEY>") | ||
tlm = studio.TLM() | ||
|
||
def tlm_binary_validator(response: str, tlm: TLM, query: Optional[str] = None) -> bool: | ||
jwmueller marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if query is None: | ||
prompt = f"Here is a response from an AI assistant: {response}\n\n Is it helpful? Answer Yes/No only." | ||
else: | ||
prompt = f"Here is a response from an AI assistant: {response}\n\n Considering the following query: {query}\n\n Is the response helpful? Answer Yes/No only." | ||
elisno marked this conversation as resolved.
Show resolved
Hide resolved
|
||
output = tlm.prompt(prompt) | ||
return output["response"].lower() == "no" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add an optional argument:
This is not the ideal implementation because user would ideally also be able to inflate the rate of True returns, by having this function return True even if the output is "yes" but trust score is low. I'm not sure what good API for that would be though There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you might brainstorm this with Jay, he's been thinking about similar ideas. The general issue is basically TLM as binary classifier is a bit awkward. I think ideally we do:
but I'm not sure |
||
""" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from unittest.mock import MagicMock | ||
|
||
from cleanlab_codex.codex_backup import CodexBackup | ||
|
||
MOCK_BACKUP_RESPONSE = "This is a test response" | ||
FALLBACK_MESSAGE = "Based on the available information, I cannot provide a complete answer to this question." | ||
TEST_MESSAGE = "Hello, world!" | ||
|
||
|
||
def test_codex_backup(mock_client: MagicMock): | ||
mock_response = MagicMock() | ||
mock_response.answer = MOCK_BACKUP_RESPONSE | ||
mock_client.projects.entries.query.return_value = mock_response | ||
|
||
codex_backup = CodexBackup.from_access_key("") | ||
|
||
class MockApp: | ||
@codex_backup.to_decorator() | ||
def chat(self, user_message: str) -> str: | ||
# Just echo the user message | ||
return user_message | ||
|
||
app = MockApp() | ||
|
||
# Echo works well | ||
response = app.chat(TEST_MESSAGE) | ||
assert response == TEST_MESSAGE | ||
|
||
# Backup works well for fallback responses | ||
response = app.chat(FALLBACK_MESSAGE) | ||
assert response == MOCK_BACKUP_RESPONSE | ||
|
||
|
||
def test_backup_handler(mock_client: MagicMock): | ||
mock_response = MagicMock() | ||
mock_response.answer = MOCK_BACKUP_RESPONSE | ||
mock_client.projects.entries.query.return_value = mock_response | ||
|
||
mock_handler = MagicMock() | ||
mock_handler.return_value = None | ||
codex_backup = CodexBackup.from_access_key("", backup_handler=mock_handler) | ||
|
||
class MockApp: | ||
@codex_backup.to_decorator() | ||
def chat(self, user_message: str) -> str: | ||
# Just echo the user message | ||
return user_message | ||
|
||
app = MockApp() | ||
|
||
response = app.chat(TEST_MESSAGE) | ||
assert response == TEST_MESSAGE | ||
|
||
# Handler should not be called for good responses | ||
assert mock_handler.call_count == 0 | ||
|
||
response = app.chat(FALLBACK_MESSAGE) | ||
assert response == MOCK_BACKUP_RESPONSE | ||
|
||
# Handler should be called for bad responses | ||
assert mock_handler.call_count == 1 | ||
# The MockApp is the second argument to the handler, i.e. it has the necessary context | ||
# to handle the new response | ||
assert mock_handler.call_args.kwargs["decorated_instance"] == app |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Delete this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll hold off on removing this until we've finalized the code in "validation.py".
The intention was to pass the fallback answer from the backup object to the relevant
is_fallback_response
helper function before deciding to call Codex as Backup.