Skip to content

Commit

Permalink
Merge branch 'dev' into CURATOR-28-add-a-lite-llm-backend-for-curator
Browse files Browse the repository at this point in the history
  • Loading branch information
CharlieJCJ committed Dec 1, 2024
2 parents cee80b0 + 112cd91 commit f358210
Show file tree
Hide file tree
Showing 8 changed files with 339 additions and 251 deletions.
1 change: 0 additions & 1 deletion examples/camel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import asyncio
from typing import List

from pydantic import BaseModel, Field
Expand Down
2 changes: 1 addition & 1 deletion examples/poem.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Topics(BaseModel):

# We define a prompter that generates topics.
topic_generator = curator.Prompter(
prompt_func=lambda: f"Generate 10 diverse topics that are suitable for writing poems about.",
prompt_func=lambda: "Generate 10 diverse topics that are suitable for writing poems about.",
model_name="gpt-4o-mini",
response_format=Topics,
parse_func=lambda _, topics: [{"topic": t} for t in topics.topics_list],
Expand Down
83 changes: 44 additions & 39 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "bespokelabs-curator"
version = "0.1.9post1"
version = "0.1.10"
description = "Bespoke Labs Curator"
authors = ["Bespoke Labs <[email protected]>"]
readme = "README.md"
Expand Down Expand Up @@ -29,12 +29,12 @@ pandas = "2.2.2"
xxhash = "^3.5.0"
tqdm = "^4.67.0"
matplotlib = "^3.9.2"
aiofiles = "^24.1.0"
tiktoken = "^0.8.0"
nest-asyncio = "^1.6.0"
rich = "^13.7.0"
litellm = "^1.52.11"
isort = "^5.13.2"
tiktoken = ">=0.7.0,<0.8.0"
aiofiles = ">=22.0,<24.0"

[tool.poetry.group.dev.dependencies]
black = "^24.2.0"
Expand Down
2 changes: 2 additions & 0 deletions src/bespokelabs/curator/prompter/prompter.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ def _completions(
)

fingerprint = xxh64(fingerprint_str.encode("utf-8")).hexdigest()
logger.debug(f"Curator Cache Fingerprint: {fingerprint}")

metadata_db_path = os.path.join(curator_cache_dir, "metadata.db")
metadata_db = MetadataDB(metadata_db_path)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def create_dataset_files(
working_dir: str,
parse_func_hash: str,
prompt_formatter: PromptFormatter,
) -> None:
) -> Dataset:
"""
Creates the request files if they don't already exist or use existing.
A single request file (requests_0.jsonl) or multiple request files
Expand Down Expand Up @@ -217,7 +217,7 @@ def create_dataset_files(
return output_dataset

error_help = (
f"Please check your `parse_func` is returning a valid row (dict) "
"Please check your `parse_func` is returning a valid row (dict) "
"or list of rows (list of dicts) and re-run. "
"Dataset will be regenerated from cached LLM responses."
)
Expand Down Expand Up @@ -314,9 +314,7 @@ def create_dataset_files(

writer.finalize()

output_dataset = Dataset.from_file(dataset_file)

return output_dataset
return Dataset.from_file(dataset_file)


def parse_response_message(
Expand Down
Loading

0 comments on commit f358210

Please sign in to comment.