add isort and black and github workflows

bespokelabsai · Nov 21, 2024 · 5380c96 · 5380c96
1 parent ec424e7
commit 5380c96
Show file tree

Hide file tree

Showing 18 changed files with 256 additions and 371 deletions.
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -0,0 +1,30 @@
+name: Python Linting
+
+on: [push, pull_request]
+
+jobs:
+  PythonLinting:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        project: [bespoke]  # Add other projects here
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+    - name: Install dependencies
+      run: |
+        cd ${{ matrix.project }}
+        pip install poetry
+        poetry install
+    - name: Run black
+      run: |
+        cd ${{ matrix.project }}
+        poetry run black --check .
+    - name: Run isort
+      run: |
+        cd ${{ matrix.project }}
+        poetry run isort --check .
diff --git a/examples/distill.py b/examples/distill.py
@@ -1,7 +1,9 @@
-from bespokelabs import curator
-from datasets import load_dataset
 import logging
 
+from datasets import load_dataset
+
+from bespokelabs import curator
+
 dataset = load_dataset("allenai/WildChat", split="train")
 dataset = dataset.select(range(3_000))
 

diff --git a/examples/poem.py b/examples/poem.py
@@ -2,10 +2,12 @@
 
 We generate 10 diverse topics and then generate 2 poems for each topic."""
 
-from bespokelabs import curator
+from typing import List
+
 from datasets import Dataset
 from pydantic import BaseModel, Field
-from typing import List
+
+from bespokelabs import curator
 
 
 # We use Pydantic and structured outputs to define the format of the response.
@@ -41,9 +43,7 @@ class Poems(BaseModel):
     model_name="gpt-4o-mini",
     response_format=Poems,
     # `row` is the input row, and `poems` is the Poems class which is parsed from the structured output from the LLM.
-    parse_func=lambda row, poems: [
-        {"topic": row["topic"], "poem": p} for p in poems.poems_list
-    ],
+    parse_func=lambda row, poems: [{"topic": row["topic"], "poem": p} for p in poems.poems_list],
 )
 
 # We apply the prompter to the topics dataset.

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,6 +34,7 @@ tiktoken = "^0.8.0"
 nest-asyncio = "^1.6.0"
 rich = "^13.7.0"
 litellm = "^1.52.11"
+isort = "^5.13.2"
 
 [tool.poetry.group.dev.dependencies]
 black = "^24.2.0"
@@ -47,4 +48,4 @@ build-backend = "poetry.core.masonry.api"
 curator-viewer = "bespokelabs.curator.viewer.__main__:main"
 
 [tool.black]
-line-length = 80
+line-length = 100
diff --git a/src/bespokelabs/__init__.py b/src/bespokelabs/__init__.py
@@ -3,9 +3,7 @@
 logger = logging.getLogger("bespokelabs.curator")
 
 handler = logging.StreamHandler()
-formatter = logging.Formatter(
-    "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-)
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 handler.setFormatter(formatter)
 logger.addHandler(handler)
 logger.setLevel(logging.WARNING)
diff --git a/src/bespokelabs/curator/__init__.py b/src/bespokelabs/curator/__init__.py
@@ -1,2 +1,2 @@
-from .prompter.prompter import Prompter
 from .dataset import Dataset
+from .prompter.prompter import Prompter
diff --git a/src/bespokelabs/curator/dataset.py b/src/bespokelabs/curator/dataset.py
@@ -1,19 +1,17 @@
+import glob
 import json
 import logging
 import os
-import glob
+from typing import Any, Dict, Iterable, Iterator, List, TypeVar
 
 import pandas as pd
-
-from pydantic import BaseModel
 from datasets import Dataset as HFDataset
 from datasets.arrow_writer import ArrowWriter, SchemaInferenceError
-from typing import Any, Dict, Iterable, Iterator, List, TypeVar
+from pydantic import BaseModel
 
 from bespokelabs.curator.prompter.prompt_formatter import PromptFormatter
-from bespokelabs.curator.request_processor.generic_response import (
-    GenericResponse,
-)
+from bespokelabs.curator.request_processor.generic_response import \
+    GenericResponse
 
 T = TypeVar("T")
 
@@ -33,9 +31,7 @@ def from_iterable(iterable: Iterable[Dict[str, Any] | BaseModel]):
         return Dataset(iterable=iterable)
 
     def from_working_dir(working_dir: str, prompt_formatter: PromptFormatter):
-        return Dataset(
-            working_dir=working_dir, prompt_formatter=prompt_formatter
-        )
+        return Dataset(working_dir=working_dir, prompt_formatter=prompt_formatter)
 
     def __iter__(self) -> Iterator[Dict[str, Any] | BaseModel]:
         if self.iterable is not None:
@@ -48,13 +44,9 @@ def __iter__(self) -> Iterator[Dict[str, Any] | BaseModel]:
             for line in open(response_file, "r"):
                 response = GenericResponse.model_validate_json(line)
                 if self.prompt_formatter.response_format:
-                    response.response = self.prompt_formatter.response_format(
-                        **response.response
-                    )
+                    response.response = self.prompt_formatter.response_format(**response.response)
                 if self.prompt_formatter.parse_func:
-                    response = self.prompt_formatter.parse_func(
-                        response.row, response.response
-                    )
+                    response = self.prompt_formatter.parse_func(response.row, response.response)
                 else:
                     response = [response.response]
 
@@ -97,10 +89,8 @@ def to_huggingface(self, in_memory: bool = False) -> None:
                         total_responses_count += 1
                         response = GenericResponse.model_validate_json(line)
                         if self.prompt_formatter.response_format:
-                            response.response = (
-                                self.prompt_formatter.response_format(
-                                    **response.response
-                                )
+                            response.response = self.prompt_formatter.response_format(
+                                **response.response
                             )
 
                         if response is None:
@@ -119,9 +109,7 @@ def to_huggingface(self, in_memory: bool = False) -> None:
                                 row = row.model_dump()
                             writer.write(row)
 
-            logging.info(
-                f"Read {total_responses_count} responses, {failed_responses_count} failed"
-            )
+            logging.info(f"Read {total_responses_count} responses, {failed_responses_count} failed")
             logging.info("Finalizing writer")
 
             if failed_responses_count == total_responses_count:

diff --git a/src/bespokelabs/curator/install_ui.py b/src/bespokelabs/curator/install_ui.py
@@ -4,22 +4,23 @@
 It includes progress tracking, status updates, and a polished success message.
 """
 
-import sys
 import subprocess
-from typing import Optional, Tuple
+import sys
 from dataclasses import dataclass
 from enum import Enum
+from typing import Optional, Tuple
 
 from rich.console import Console
-from rich.text import Text
 from rich.live import Live
-from rich.spinner import Spinner
 from rich.panel import Panel
 from rich.progress import ProgressBar
+from rich.spinner import Spinner
+from rich.text import Text
 
 
 class InstallationStage(Enum):
     """Enum representing different stages of the installation process."""
+
     PREPARING = ("Preparing your environment...", 0.0)
     COLLECTING = ("Downloading packages...", 0.2)
     DOWNLOADING = ("Downloading packages...", 0.4)
@@ -35,9 +36,10 @@ def __init__(self, message: str, progress: float):
 @dataclass
 class InstallationUI:
     """Class to manage the installation UI components and styling."""
+
     package_name: str
     console: Console = Console()
-    
+
     def create_progress_bar(self, completed: float = 0) -> Text:
         """Create a stylish progress bar with the given completion percentage."""
         width = 40
@@ -65,25 +67,33 @@ def create_loading_text(self, stage: InstallationStage, progress: float) -> Text
             ("Your synthetic data journey begins in moments", "dim white"),
             self.create_progress_bar(progress),
             ("\n ", ""),
-            (stage.message, "italic dim white")
+            (stage.message, "italic dim white"),
         )
 
     def create_success_text(self) -> Text:
         """Create the success message with links."""
         text = Text()
         text.append("✨ Curator installed successfully!\n\n", style="bold green")
-        text.append("Start building production-ready synthetic data pipelines:\n\n", style="dim white")
+        text.append(
+            "Start building production-ready synthetic data pipelines:\n\n", style="dim white"
+        )
         text.append("   📚 ", style="")
         text.append("docs.bespokelabs.ai", style="dim cyan link https://docs.bespokelabs.ai")
         text.append("\n   📦 ", style="")
-        text.append("github.com/bespokelabsai/curator", style="dim cyan link https://github.com/bespokelabsai/curator")
+        text.append(
+            "github.com/bespokelabsai/curator",
+            style="dim cyan link https://github.com/bespokelabsai/curator",
+        )
         text.append("\n   💬 ", style="")
-        text.append("discord.gg/KqpXvpzVBS", style="dim cyan link https://discord.com/invite/KqpXvpzVBS")
+        text.append(
+            "discord.gg/KqpXvpzVBS", style="dim cyan link https://discord.com/invite/KqpXvpzVBS"
+        )
         return text
 
 
 class PackageInstaller:
     """Class to handle the package installation process."""
+
     def __init__(self, package_name: str, version: Optional[str] = None):
         self.package_spec = f"{package_name}=={version}" if version else package_name
         self.ui = InstallationUI(package_name)
@@ -96,13 +106,13 @@ def run_pip_install(self) -> subprocess.Popen:
             stderr=subprocess.PIPE,
             text=True,
             bufsize=1,
-            universal_newlines=True
+            universal_newlines=True,
         )
 
     def parse_pip_output(self, line: str) -> Tuple[InstallationStage, float]:
         """Parse pip output to determine installation stage and progress."""
         line = line.strip().lower()
-        
+
         if "collecting" in line:
             return InstallationStage.COLLECTING, InstallationStage.COLLECTING.progress
         elif "downloading" in line:
@@ -118,52 +128,50 @@ def parse_pip_output(self, line: str) -> Tuple[InstallationStage, float]:
             return InstallationStage.INSTALLING, InstallationStage.INSTALLING.progress
         elif "successfully installed" in line:
             return InstallationStage.FINALIZING, InstallationStage.FINALIZING.progress
-        
+
         return InstallationStage.PREPARING, InstallationStage.PREPARING.progress
 
     def install(self) -> None:
         """Execute the installation with progress tracking and UI updates."""
-        spinner = Spinner("dots2", text=self.ui.create_loading_text(InstallationStage.PREPARING, 0), style="green")
-
-        with Live(
-            spinner, 
-            console=self.ui.console, 
-            refresh_per_second=30
-        ) as live:
+        spinner = Spinner(
+            "dots2", text=self.ui.create_loading_text(InstallationStage.PREPARING, 0), style="green"
+        )
+
+        with Live(spinner, console=self.ui.console, refresh_per_second=30) as live:
             try:
                 process = self.run_pip_install()
-                
+
                 while True:
                     output_line = process.stdout.readline()
-                    if output_line == '' and process.poll() is not None:
+                    if output_line == "" and process.poll() is not None:
                         break
-                    
+
                     stage, progress = self.parse_pip_output(output_line)
                     spinner.text = self.ui.create_loading_text(stage, progress)
-                
+
                 # Show completion
                 spinner.text = self.ui.create_loading_text(InstallationStage.COMPLETE, 1.0)
-                
+
                 if process.poll() == 0:
                     live.update(self.ui.create_success_text())
                 else:
                     error = process.stderr.read()
                     error_text = Text(error, style="red")
                     live.update(error_text)
                     sys.exit(1)
-                    
+
             except Exception as e:
                 error_text = Text(f"Error: {str(e)}", style="red")
                 live.update(error_text)
                 sys.exit(1)
-        
+
         self.ui.console.print()
 
 
 def enhanced_install(package_name: str, version: Optional[str] = None) -> None:
     """
     Enhance pip installation with a professional progress UI.
-    
+
     Args:
         package_name: Name of the package to install
         version: Optional specific version to install