Skip to content

Commit

Permalink
add isort and black and github workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
vutrung96 committed Nov 21, 2024
1 parent ec424e7 commit 5380c96
Show file tree
Hide file tree
Showing 18 changed files with 256 additions and 371 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Python Linting

on: [push, pull_request]

jobs:
PythonLinting:
runs-on: ubuntu-latest
strategy:
matrix:
project: [bespoke] # Add other projects here

steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install dependencies
run: |
cd ${{ matrix.project }}
pip install poetry
poetry install
- name: Run black
run: |
cd ${{ matrix.project }}
poetry run black --check .
- name: Run isort
run: |
cd ${{ matrix.project }}
poetry run isort --check .
6 changes: 4 additions & 2 deletions examples/distill.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from bespokelabs import curator
from datasets import load_dataset
import logging

from datasets import load_dataset

from bespokelabs import curator

dataset = load_dataset("allenai/WildChat", split="train")
dataset = dataset.select(range(3_000))

Expand Down
10 changes: 5 additions & 5 deletions examples/poem.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
We generate 10 diverse topics and then generate 2 poems for each topic."""

from bespokelabs import curator
from typing import List

from datasets import Dataset
from pydantic import BaseModel, Field
from typing import List

from bespokelabs import curator


# We use Pydantic and structured outputs to define the format of the response.
Expand Down Expand Up @@ -41,9 +43,7 @@ class Poems(BaseModel):
model_name="gpt-4o-mini",
response_format=Poems,
# `row` is the input row, and `poems` is the Poems class which is parsed from the structured output from the LLM.
parse_func=lambda row, poems: [
{"topic": row["topic"], "poem": p} for p in poems.poems_list
],
parse_func=lambda row, poems: [{"topic": row["topic"], "poem": p} for p in poems.poems_list],
)

# We apply the prompter to the topics dataset.
Expand Down
18 changes: 16 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ tiktoken = "^0.8.0"
nest-asyncio = "^1.6.0"
rich = "^13.7.0"
litellm = "^1.52.11"
isort = "^5.13.2"

[tool.poetry.group.dev.dependencies]
black = "^24.2.0"
Expand All @@ -47,4 +48,4 @@ build-backend = "poetry.core.masonry.api"
curator-viewer = "bespokelabs.curator.viewer.__main__:main"

[tool.black]
line-length = 80
line-length = 100
4 changes: 1 addition & 3 deletions src/bespokelabs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
logger = logging.getLogger("bespokelabs.curator")

handler = logging.StreamHandler()
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.WARNING)
2 changes: 1 addition & 1 deletion src/bespokelabs/curator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .prompter.prompter import Prompter
from .dataset import Dataset
from .prompter.prompter import Prompter
34 changes: 11 additions & 23 deletions src/bespokelabs/curator/dataset.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
import glob
import json
import logging
import os
import glob
from typing import Any, Dict, Iterable, Iterator, List, TypeVar

import pandas as pd

from pydantic import BaseModel
from datasets import Dataset as HFDataset
from datasets.arrow_writer import ArrowWriter, SchemaInferenceError
from typing import Any, Dict, Iterable, Iterator, List, TypeVar
from pydantic import BaseModel

from bespokelabs.curator.prompter.prompt_formatter import PromptFormatter
from bespokelabs.curator.request_processor.generic_response import (
GenericResponse,
)
from bespokelabs.curator.request_processor.generic_response import \
GenericResponse

T = TypeVar("T")

Expand All @@ -33,9 +31,7 @@ def from_iterable(iterable: Iterable[Dict[str, Any] | BaseModel]):
return Dataset(iterable=iterable)

def from_working_dir(working_dir: str, prompt_formatter: PromptFormatter):
return Dataset(
working_dir=working_dir, prompt_formatter=prompt_formatter
)
return Dataset(working_dir=working_dir, prompt_formatter=prompt_formatter)

def __iter__(self) -> Iterator[Dict[str, Any] | BaseModel]:
if self.iterable is not None:
Expand All @@ -48,13 +44,9 @@ def __iter__(self) -> Iterator[Dict[str, Any] | BaseModel]:
for line in open(response_file, "r"):
response = GenericResponse.model_validate_json(line)
if self.prompt_formatter.response_format:
response.response = self.prompt_formatter.response_format(
**response.response
)
response.response = self.prompt_formatter.response_format(**response.response)
if self.prompt_formatter.parse_func:
response = self.prompt_formatter.parse_func(
response.row, response.response
)
response = self.prompt_formatter.parse_func(response.row, response.response)
else:
response = [response.response]

Expand Down Expand Up @@ -97,10 +89,8 @@ def to_huggingface(self, in_memory: bool = False) -> None:
total_responses_count += 1
response = GenericResponse.model_validate_json(line)
if self.prompt_formatter.response_format:
response.response = (
self.prompt_formatter.response_format(
**response.response
)
response.response = self.prompt_formatter.response_format(
**response.response
)

if response is None:
Expand All @@ -119,9 +109,7 @@ def to_huggingface(self, in_memory: bool = False) -> None:
row = row.model_dump()
writer.write(row)

logging.info(
f"Read {total_responses_count} responses, {failed_responses_count} failed"
)
logging.info(f"Read {total_responses_count} responses, {failed_responses_count} failed")
logging.info("Finalizing writer")

if failed_responses_count == total_responses_count:
Expand Down
62 changes: 35 additions & 27 deletions src/bespokelabs/curator/install_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,23 @@
It includes progress tracking, status updates, and a polished success message.
"""

import sys
import subprocess
from typing import Optional, Tuple
import sys
from dataclasses import dataclass
from enum import Enum
from typing import Optional, Tuple

from rich.console import Console
from rich.text import Text
from rich.live import Live
from rich.spinner import Spinner
from rich.panel import Panel
from rich.progress import ProgressBar
from rich.spinner import Spinner
from rich.text import Text


class InstallationStage(Enum):
"""Enum representing different stages of the installation process."""

PREPARING = ("Preparing your environment...", 0.0)
COLLECTING = ("Downloading packages...", 0.2)
DOWNLOADING = ("Downloading packages...", 0.4)
Expand All @@ -35,9 +36,10 @@ def __init__(self, message: str, progress: float):
@dataclass
class InstallationUI:
"""Class to manage the installation UI components and styling."""

package_name: str
console: Console = Console()

def create_progress_bar(self, completed: float = 0) -> Text:
"""Create a stylish progress bar with the given completion percentage."""
width = 40
Expand Down Expand Up @@ -65,25 +67,33 @@ def create_loading_text(self, stage: InstallationStage, progress: float) -> Text
("Your synthetic data journey begins in moments", "dim white"),
self.create_progress_bar(progress),
("\n ", ""),
(stage.message, "italic dim white")
(stage.message, "italic dim white"),
)

def create_success_text(self) -> Text:
"""Create the success message with links."""
text = Text()
text.append("✨ Curator installed successfully!\n\n", style="bold green")
text.append("Start building production-ready synthetic data pipelines:\n\n", style="dim white")
text.append(
"Start building production-ready synthetic data pipelines:\n\n", style="dim white"
)
text.append(" 📚 ", style="")
text.append("docs.bespokelabs.ai", style="dim cyan link https://docs.bespokelabs.ai")
text.append("\n 📦 ", style="")
text.append("github.com/bespokelabsai/curator", style="dim cyan link https://github.com/bespokelabsai/curator")
text.append(
"github.com/bespokelabsai/curator",
style="dim cyan link https://github.com/bespokelabsai/curator",
)
text.append("\n 💬 ", style="")
text.append("discord.gg/KqpXvpzVBS", style="dim cyan link https://discord.com/invite/KqpXvpzVBS")
text.append(
"discord.gg/KqpXvpzVBS", style="dim cyan link https://discord.com/invite/KqpXvpzVBS"
)
return text


class PackageInstaller:
"""Class to handle the package installation process."""

def __init__(self, package_name: str, version: Optional[str] = None):
self.package_spec = f"{package_name}=={version}" if version else package_name
self.ui = InstallationUI(package_name)
Expand All @@ -96,13 +106,13 @@ def run_pip_install(self) -> subprocess.Popen:
stderr=subprocess.PIPE,
text=True,
bufsize=1,
universal_newlines=True
universal_newlines=True,
)

def parse_pip_output(self, line: str) -> Tuple[InstallationStage, float]:
"""Parse pip output to determine installation stage and progress."""
line = line.strip().lower()

if "collecting" in line:
return InstallationStage.COLLECTING, InstallationStage.COLLECTING.progress
elif "downloading" in line:
Expand All @@ -118,52 +128,50 @@ def parse_pip_output(self, line: str) -> Tuple[InstallationStage, float]:
return InstallationStage.INSTALLING, InstallationStage.INSTALLING.progress
elif "successfully installed" in line:
return InstallationStage.FINALIZING, InstallationStage.FINALIZING.progress

return InstallationStage.PREPARING, InstallationStage.PREPARING.progress

def install(self) -> None:
"""Execute the installation with progress tracking and UI updates."""
spinner = Spinner("dots2", text=self.ui.create_loading_text(InstallationStage.PREPARING, 0), style="green")

with Live(
spinner,
console=self.ui.console,
refresh_per_second=30
) as live:
spinner = Spinner(
"dots2", text=self.ui.create_loading_text(InstallationStage.PREPARING, 0), style="green"
)

with Live(spinner, console=self.ui.console, refresh_per_second=30) as live:
try:
process = self.run_pip_install()

while True:
output_line = process.stdout.readline()
if output_line == '' and process.poll() is not None:
if output_line == "" and process.poll() is not None:
break

stage, progress = self.parse_pip_output(output_line)
spinner.text = self.ui.create_loading_text(stage, progress)

# Show completion
spinner.text = self.ui.create_loading_text(InstallationStage.COMPLETE, 1.0)

if process.poll() == 0:
live.update(self.ui.create_success_text())
else:
error = process.stderr.read()
error_text = Text(error, style="red")
live.update(error_text)
sys.exit(1)

except Exception as e:
error_text = Text(f"Error: {str(e)}", style="red")
live.update(error_text)
sys.exit(1)

self.ui.console.print()


def enhanced_install(package_name: str, version: Optional[str] = None) -> None:
"""
Enhance pip installation with a professional progress UI.
Args:
package_name: Name of the package to install
version: Optional specific version to install
Expand Down
Loading

0 comments on commit 5380c96

Please sign in to comment.