-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(hf): use proper source when we create a file entry (#555)
* fix(hf): use proper source when we create a file entry * add more details to the unsupported PyArrow type message * add example: HF -> OpenAI -> HF -> analyze * use HF inference endpoint Co-authored-by: Quentin Lhoest <[email protected]> * use to_parquet / from_parquet to preserve schema * add a bit of comments, fix them * use HF_TOKEN to run e2e HF example --------- Co-authored-by: Quentin Lhoest <[email protected]>
- Loading branch information
1 parent
a516c94
commit 7ba4477
Showing
4 changed files
with
63 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from huggingface_hub import InferenceClient | ||
|
||
from datachain import C, DataChain, DataModel | ||
|
||
PROMPT = """ | ||
Was this dialog successful? Put result as a single word: Success or Failure. | ||
Explain the reason in a few words. | ||
""" | ||
|
||
|
||
class DialogEval(DataModel): | ||
result: str | ||
reason: str | ||
|
||
|
||
# DataChain function to evaluate dialog. | ||
# DataChain is using types for inputs, results to automatically infer schema. | ||
def eval_dialog(user_input: str, bot_response: str) -> DialogEval: | ||
client = InferenceClient("meta-llama/Llama-3.1-70B-Instruct") | ||
|
||
completion = client.chat_completion( | ||
messages=[ | ||
{ | ||
"role": "user", | ||
"content": f"{PROMPT}\n\nUser: {user_input}\nBot: {bot_response}", | ||
}, | ||
], | ||
response_format={"type": "json", "value": DialogEval.model_json_schema()}, | ||
) | ||
|
||
message = completion.choices[0].message | ||
try: | ||
return DialogEval.model_validate_json(message.content) | ||
except ValueError: | ||
return DialogEval(result="Error", reason="Failed to parse response.") | ||
|
||
|
||
# Run HF inference in parallel for each example. | ||
# Get result as Pydantic model that DataChain can understand and serialize it. | ||
# Save to HF as Parquet. Dataset can be previewed here: | ||
# https://huggingface.co/datasets/dvcorg/test-datachain-llm-eval/viewer | ||
( | ||
DataChain.from_csv( | ||
"hf://datasets/infinite-dataset-hub/MobilePlanAssistant/data.csv" | ||
) | ||
.settings(parallel=10) | ||
.map(response=eval_dialog) | ||
.to_parquet("hf://datasets/dvcorg/test-datachain-llm-eval/data.parquet") | ||
) | ||
|
||
# Read it back to filter and show. | ||
# It restores the Pydantic model from Parquet under the hood. | ||
( | ||
DataChain.from_parquet( | ||
"hf://datasets/dvcorg/test-datachain-llm-eval/data.parquet", source=False | ||
) | ||
.filter(C("response.result") == "Failure") | ||
.show(3) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters