Skip to content

Commit

Permalink
Clean ups in llm.py
Browse files Browse the repository at this point in the history
  • Loading branch information
madiator committed Dec 18, 2024
1 parent 5e8482a commit 2665732
Showing 1 changed file with 6 additions and 29 deletions.
35 changes: 6 additions & 29 deletions src/bespokelabs/curator/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@

from bespokelabs.curator.db import MetadataDB
from bespokelabs.curator.llm.prompt_formatter import PromptFormatter
from bespokelabs.curator.request_processor.base_request_processor import (
BaseRequestProcessor,
)
from bespokelabs.curator.request_processor.litellm_online_request_processor import (
LiteLLMOnlineRequestProcessor,
)
Expand All @@ -31,7 +28,7 @@
T = TypeVar("T")
_DictOrBaseModel = Union[Dict[str, Any], BaseModel]

logger = logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)


class LLM:
Expand Down Expand Up @@ -185,7 +182,7 @@ def _determine_backend(

# Default to LiteLLM for all other cases
logger.info(
f"Requesting {f'structured' if response_format else 'text'} output from {model_name}, using LiteLLM backend"
f"Requesting {'structured' if response_format else 'text'} output from {model_name}, using LiteLLM backend"
)
return "litellm"

Expand All @@ -194,23 +191,6 @@ def __call__(
dataset: Optional[Iterable] = None,
working_dir: str = None,
batch_cancel: bool = False,
) -> Dataset:
"""
Run completions on a dataset.
Args:
dataset (Iterable): A dataset consisting of a list of items to apply completions
working_dir (str): The working directory to save the requests.jsonl, responses.jsonl, and dataset.arrow files.
batch_cancel (bool): Whether to cancel batches
"""
return self._completions(self._request_processor, dataset, working_dir, batch_cancel)

def _completions(
self,
request_processor: BaseRequestProcessor,
dataset: Optional[Iterable] = None,
working_dir: str = None,
batch_cancel: bool = False,
) -> Dataset:
"""
Apply structured completions in parallel to a dataset using specified model and
Expand All @@ -225,13 +205,10 @@ def _completions(
Returns:
Iterable: A list of structured outputs from the completions
"""
# NOTE(Ryan): We convert from iterable to Dataset because Dataset has random access via row_idx
# We convert from iterable to Dataset because Dataset has random access via row_idx
if not isinstance(dataset, Dataset) and dataset is not None:
dataset = Dataset.from_generator(dataset)

if self is None:
raise ValueError("LLM must be provided")

if working_dir is None:
curator_cache_dir = os.environ.get(
"CURATOR_CACHE_DIR",
Expand Down Expand Up @@ -295,14 +272,14 @@ def _completions(
run_cache_dir = os.path.join(curator_cache_dir, fingerprint)

if batch_cancel:
if type(request_processor) != OpenAIBatchRequestProcessor:
if not isinstance(self._request_processor, OpenAIBatchRequestProcessor):
raise ValueError("batch_cancel can only be used with batch mode")

dataset = request_processor.cancel_batches(
dataset = self._request_processor.cancel_batches(
working_dir=run_cache_dir,
)
else:
dataset = request_processor.run(
dataset = self._request_processor.run(
dataset=dataset,
working_dir=run_cache_dir,
parse_func_hash=parse_func_hash,
Expand Down

0 comments on commit 2665732

Please sign in to comment.