Skip to content

Commit

Permalink
Merge pull request #23 from decodingml/feat/inference-pipeline-tweaks
Browse files Browse the repository at this point in the history
Feat/inference pipeline tweaks
  • Loading branch information
iusztinpaul authored May 31, 2024
2 parents 777f2a5 + 50d2406 commit c744bfe
Show file tree
Hide file tree
Showing 13 changed files with 199 additions and 28 deletions.
8 changes: 3 additions & 5 deletions course/module-4/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,15 @@ help:

list: help

create-qwak-project:
create-qwak-model: # Create a Qwak model before building it.
@echo "$(YELLOW)Creating Qwak project $(RESET)"
qwak models create "llm_twin" --project "llm-twin-course"

deploy:
build-llm-microservice: # Build the Qwak model.
@echo "$(YELLOW)Dumping poetry env requirements to $(RESET) $(GREEN) requirements.txt $(RESET)"
# poetry export -f requirements.txt --output finetuning/requirements.txt --without-hashes
@echo "$(GREEN)Triggering Qwak Model Build$(RESET)"
poetry run qwak models build -f build_config.yaml .

test:
test-llm-microservice-locally: # Test the Qwak model locally.
poetry run python test_local.py


11 changes: 5 additions & 6 deletions course/module-4/build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ build_env:
base_image: public.ecr.aws/qwak-us-east-1/qwak-base:0.0.13-gpu
cache: true
env_vars:
- HUGGINGFACE_ACCESS_TOKEN=
- COMET_API_KEY=
- COMET_WORKSPACE=
- COMET_PROJECT=llm-twin-course
- HUGGINGFACE_ACCESS_TOKEN=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
- COMET_API_KEY=5qccQFthifX6ZYUcLrTSjKSpw
- COMET_WORKSPACE=decodingml
- COMET_PROJECT=llm-twin-course
no_cache: false
params: []
push: true
push: true
python_env:
dependency_file_path: finetuning/requirements.txt
git_credentials: null
Expand Down Expand Up @@ -49,4 +49,3 @@ step:
validate_build_artifact: true
validate_build_artifact_timeout: 120
verbose: 0

5 changes: 5 additions & 0 deletions course/module-4/finetuning/logger_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import structlog


def get_logger(cls: str):
return structlog.get_logger().bind(cls=cls)
26 changes: 18 additions & 8 deletions course/module-4/finetuning/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import yaml
from comet_ml import Experiment
from datasets import DatasetDict, load_dataset

from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from qwak.model.adapters import DefaultOutputAdapter
from qwak.model.base import QwakModel
Expand All @@ -23,6 +24,7 @@

from finetuning.dataset_client import DatasetClient
from finetuning.settings import settings
from finetuning.utils import build_qlora_model


class CopywriterMistralModel(QwakModel):
Expand Down Expand Up @@ -80,6 +82,8 @@ def build(self) -> None:
logging.info(f"Finished saving model to {self.model_save_dir}")

if self.experiment:
self.experiment.log_model("llm-twin", self.model_save_dir)

self.experiment.end()

self._remove_model_class_attributes()
Expand Down Expand Up @@ -112,7 +116,7 @@ def init_model(self) -> None:
)
self.tokenizer.pad_token = self.tokenizer.eos_token
self.tokenizer.padding_side = "right"

logging.info(f"Initialized model {self.model_type} successfully")

def _initialize_qlora(self, model: PreTrainedModel) -> PeftModel:
Expand All @@ -124,7 +128,7 @@ def _initialize_qlora(self, model: PreTrainedModel) -> PeftModel:

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, self.qlora_config)

logging.info("Initialized QLoRA config successfully!")

return model
Expand All @@ -135,7 +139,7 @@ def _init_trainig_args(self) -> None:
self.training_arguments = TrainingArguments(**config["training_arguments"])
if self.experiment:
self.experiment.log_parameters(self.training_arguments)

logging.info("Initialized training arguments successfully!")

def load_dataset(self) -> DatasetDict:
Expand All @@ -155,7 +159,7 @@ def load_dataset(self) -> DatasetDict:
def preprocess_data_split(self, train_val_datasets: DatasetDict) -> tuple:
train_data = train_val_datasets["train"]
val_data = train_val_datasets["validation"]

generated_train_dataset = train_data.map(self.generate_prompt)
generated_train_dataset = generated_train_dataset.remove_columns(
["instruction", "content"]
Expand Down Expand Up @@ -192,10 +196,16 @@ def tokenize(self, prompt: str) -> dict:
return result

def initialize_model(self) -> None:
self.model = AutoModelForCausalLM.from_pretrained(
self.model_save_dir,
token=settings.HUGGINGFACE_ACCESS_TOKEN,
quantization_config=self.nf4_config,
# self.model = AutoModelForCausalLM.from_pretrained(
# self.model_save_dir,
# token=settings.HUGGINGFACE_ACCESS_TOKEN,
# quantization_config=self.nf4_config,
# )
self.model, self.tokenizer, _ = build_qlora_model(
pretrained_model_name_or_path=self.model_type,
peft_pretrained_model_name_or_path="llm-twin-lora",
bnb_config=self.nf4_config,
lora_config=self.qlora_config,
)
logging.info(f"Successfully loaded model from {self.model_save_dir}")

Expand Down
1 change: 1 addition & 0 deletions course/module-4/finetuning/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ bitsandbytes==0.42.0
pydantic_settings==2.2.1
scikit-learn==1.4.2
qwak-sdk==0.5.68
structlog==24.2.0
3 changes: 3 additions & 0 deletions course/module-4/finetuning/settings.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path
from pydantic_settings import BaseSettings, SettingsConfigDict


Expand All @@ -9,5 +10,7 @@ class AppSettings(BaseSettings):
COMET_API_KEY: str = ""
COMET_WORKSPACE: str = ""
COMET_PROJECT: str = ""

CACHE_DIR: Path = Path("./cache")

settings = AppSettings()
131 changes: 131 additions & 0 deletions course/module-4/finetuning/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import os
from pathlib import Path
from typing import Optional, Tuple

import torch
from peft import LoraConfig, PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

from finetuning.settings import settings
from finetuning import logger_utils


logger = logger_utils.get_logger(__name__)


def build_qlora_model(
pretrained_model_name_or_path: str,
peft_pretrained_model_name_or_path: Optional[str] = None,
bnb_config: Optional[BitsAndBytesConfig] = None,
lora_config: Optional[LoraConfig] = None,
cache_dir: Optional[Path] = None,
) -> Tuple[AutoModelForCausalLM, AutoTokenizer, PeftConfig]:
"""
Function that builds a QLoRA LLM model based on the given HuggingFace name:
1. Create and prepare the bitsandbytes configuration for QLoRa's quantization
2. Download, load, and quantize on-the-fly Falcon-7b
3. Create and prepare the LoRa configuration
4. Load and configuration Falcon-7B's tokenizer
"""

if bnb_config is None:
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
pretrained_model_name_or_path,
token=settings.HUGGINGFACE_ACCESS_TOKEN,
device_map=torch.cuda.current_device(),
quantization_config=bnb_config,
use_cache=False,
torchscript=True,
cache_dir=str(cache_dir) if cache_dir else None,
)

tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name_or_path,
token=settings.HUGGINGFACE_ACCESS_TOKEN,
cache_dir=str(cache_dir) if cache_dir else None,
)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

if peft_pretrained_model_name_or_path:
is_model_name = not os.path.isdir(peft_pretrained_model_name_or_path)
if is_model_name:
logger.info(
f"Downloading {peft_pretrained_model_name_or_path} from CometML Model Registry:"
)
peft_pretrained_model_name_or_path = download_from_model_registry(
model_id=peft_pretrained_model_name_or_path,
cache_dir=cache_dir,
)

logger.info(f"Loading Lora Confing from: {peft_pretrained_model_name_or_path}")
lora_config = LoraConfig.from_pretrained(peft_pretrained_model_name_or_path)
assert (
lora_config.base_model_name_or_path == pretrained_model_name_or_path
), f"Lora Model trained on different base model than the one requested: \
{lora_config.base_model_name_or_path} != {pretrained_model_name_or_path}"

logger.info(f"Loading Peft Model from: {peft_pretrained_model_name_or_path}")
model = PeftModel.from_pretrained(model, peft_pretrained_model_name_or_path)
else:
if lora_config is None:
lora_config = LoraConfig(
lora_alpha=16,
lora_dropout=0.1,
r=64,
bias="none",
task_type="CAUSAL_LM",
)

return model, tokenizer, lora_config


def download_from_model_registry(
model_id: str, cache_dir: Optional[Path] = None
) -> Path:
"""
Downloads a model from the Comet ML Learning model registry.
Args:
model_id (str): The ID of the model to download, in the format "workspace/model_name:version".
cache_dir (Optional[Path]): The directory to cache the downloaded model in. Defaults to the value of
`constants.CACHE_DIR`.
Returns:
Path: The path to the downloaded model directory.
"""

if cache_dir is None:
cache_dir = settings.CACHE_DIR
output_folder = cache_dir / "models" / model_id

already_downloaded = output_folder.exists()
if not already_downloaded:
workspace, model_id = model_id.split("/")
model_name, version = model_id.split(":")

api = API()
model = api.get_model(workspace=workspace, model_name=model_name)
model.download(version=version, output_folder=output_folder, expand=True)
else:
logger.info(f"Model {model_id=} already downloaded to: {output_folder}")

subdirs = [d for d in output_folder.iterdir() if d.is_dir()]
if len(subdirs) == 1:
model_dir = subdirs[0]
else:
raise RuntimeError(
f"There should be only one directory inside the model folder. \
Check the downloaded model at: {output_folder}"
)

logger.info(f"Model {model_id=} downloaded from the registry to: {model_dir}")

return model_dir
21 changes: 19 additions & 2 deletions course/module-4/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions course/module-4/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ bitsandbytes = "^0.42.0"
pydantic_settings="^2.2.1"
scikit-learn = "^1.4.2"
qwak-sdk="^0.5.68"
structlog = "^24.2.0"

[build-system]
requires = ["poetry-core"]
Expand Down
8 changes: 7 additions & 1 deletion course/module-5/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
help:
@grep -E '^[a-zA-Z0-9 -]+:.*#' Makefile | sort | while read -r l; do printf "\033[1;32m$$(echo $$l | cut -f 1 -d':')\033[00m:$$(echo $$l | cut -f 2- -d'#')\n"; done

call-inference-pipeline: # Test the inference pipeline.
deploy-llm-microservice: # Deploy the Qwak model.
qwak models deploy realtime --model-id "llm_twin" --instance "gpu.a10.2xl" --timeout 50000 --replicas 2 --server-workers 2

undeploy-llm-microservice: # Deploy the Qwak model.
qwak models undeploy --model-id "llm_twin"

call-inference-pipeline: # Call the inference pipeline.
poetry run python main.py

4 changes: 2 additions & 2 deletions course/module-5/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

response = inference_endpoint.generate(
query=query,
enable_rag=False,
enable_evaluation=True,
enable_rag=True,
enable_evaluation=False,
enable_monitoring=True,
)

Expand Down
2 changes: 1 addition & 1 deletion course/module-5/rag/retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, query: str) -> None:
def _search_single_query(
self, generated_query: str, metadata_filter_value: str, k: int
):
assert k > 3, "k should be greater than 3"
assert k >= 3, "k should be greater than 3"

query_vector = self._embedder.encode(generated_query).tolist()

Expand Down
6 changes: 3 additions & 3 deletions course/module-5/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ class AppSettings(BaseSettings):
QWAK_DEPLOYMENT_MODEL_ID: str = "llm_twin"

# RAG config
TOP_K: int = 5
KEEP_TOP_K: int = 5
EXPAND_N_QUERY: int = 5
TOP_K: int = 3
KEEP_TOP_K: int = 3
EXPAND_N_QUERY: int = 3


settings = AppSettings()

0 comments on commit c744bfe

Please sign in to comment.