Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kubernetes- endpoint- multi -model deployments #3423

Open
wants to merge 9 commits into
base: main
Choose a base branch
from

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
channels:
- conda-forge
dependencies:
- python=3.10.12
- pip<=22.3.1
- pip:
- mlflow
- cloudpickle
- numpy
- psutil
- joblib
- scikit-learn
- pandas
- scipy
- azureml-inference-server-http
- adlfs
- fsspec
- azureml-mlflow
- matplotlib
- tqdm
- jupyter
- ipykernel
- papermill
name: multi-model
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import joblib
import json
import os
import pandas as pd
import logging
from pathlib import Path

# Declare models dictionary to hold the loaded models
models = None

# Initialize the models
def init():
"""
This function is called when the container is initialized/started, typically after create/update of the deployment.
You can write the logic here to perform init operations like caching the models in memory.
"""
global models

# AZUREML_MODEL_DIR is an environment variable created during deployment.
model_dir = Path(os.getenv("AZUREML_MODEL_DIR")) / "models" # Path to the model directory
logging.info(f"Model directory: {model_dir}")

try:
# Load both churn and segmentation models
models = {
"churn_model": joblib.load(model_dir / "churn.joblib"), # For supervised classification
"segmentation_model": joblib.load(model_dir / "segmentation.joblib") # For unsupervised clustering
}
logging.info(f"Loaded models: {list(models.keys())}")
except FileNotFoundError as e:
logging.error(f"Model file not found: {e}")
except Exception as e:
logging.error(f"Error during model loading in init: {e}")

# Run function to perform predictions
def run(raw_data):
global models

try:
# Parse input data
input_json = json.loads(raw_data)

# Determine which model to run based on 'model_type' in input
model_type = input_json.get("model_type", None)
data = input_json.get("data", None)

if model_type not in ["churn", "segmentation"]:
raise ValueError("Invalid model_type. Choose either 'churn' or 'segmentation'.")

if data is None:
raise ValueError("Input data is missing.")

input_data = pd.DataFrame(data)

# Supervised classification: churn prediction
if model_type == "churn":
churn_model = models.get("churn_model")
if churn_model is None:
raise ValueError("Churn model not found.")

# Perform churn prediction
churn_predictions = churn_model.predict(input_data)
result = {
"churn_predictions": churn_predictions.tolist()
}

# Unsupervised clustering: segmentation
elif model_type == "segmentation":
segmentation_model = models.get("segmentation_model")
if segmentation_model is None:
raise ValueError("Segmentation model not found.")

# Perform segmentation prediction
segmentation_predictions = segmentation_model.predict(input_data)
result = {
"segmentation_predictions": segmentation_predictions.tolist()
}

return json.dumps(result)

except Exception as e:
error_message = str(e)
logging.error(f"Error during prediction: {error_message}")
return json.dumps({"error": error_message})

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
joblib
scikit-learn
pandas
numpy
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"# Churn Prediction and Customer Segmentation\n",
"\n",
"This notebook demonstrates the training of a **supervised Logistic Regression model** for churn prediction and an **unsupervised KMeans model** for customer segmentation.\n",
"\n",
"## Steps:\n",
"1. Data Loading\n",
"2. Model Training \n",
"3. Model Saving\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install sckit-learn joblib pandas "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#importing the required libraries\n",
"import joblib\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import accuracy_score\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"## Step 1: Data Loading\n",
"\n",
"We begin by loading the supervised and unsupervised datasets for churn prediction and customer segmentation.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load the supervised dataset\n",
"supervised_data = pd.read_csv(r'..\\...\\azureml-examples\\sdk\\python\\endpoints\\online\\aks_multi_model_deployment_kubernetes_endpoint\\artifacts\\data\\churn.csv')\n",
"# Load the unsupervised dataset\n",
"unsupervised_data = pd.read_csv(r'...\\...\\azureml-examples\\sdk\\python\\endpoints\\online\\aks_multi_model_deployment_kubernetes_endpoint\\artifacts\\data\\segmentation.csv')\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"# Step 2: Model Training\n",
"\n",
"### 2.1:Logistic Regression for churn prediction.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Load the supervised dataset\n",
"supervised_data = pd.read_csv(r'D:\\db_Contribute_SDK2\\azureml-examples\\sdk\\python\\endpoints\\online\\aks_multi_model_deployment_kubernetes_endpoint\\artifacts\\data\\churn.csv')\n",
"\n",
"# Separate features and target\n",
"X = supervised_data.drop('churn', axis=1)\n",
"y = supervised_data['churn']\n",
"\n",
"# Split the data for training and testing\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"# Scale the data for better performance\n",
"scaler = StandardScaler()\n",
"X_train_scaled = scaler.fit_transform(X_train)\n",
"X_test_scaled = scaler.transform(X_test)\n",
"\n",
"# Train a Logistic Regression model\n",
"supervised_model = LogisticRegression(random_state=42)\n",
"supervised_model.fit(X_train_scaled, y_train)\n",
"\n",
"# Test the supervised model\n",
"y_pred = supervised_model.predict(X_test_scaled)\n",
"accuracy = accuracy_score(y_test, y_pred)\n",
"print(f\"Supervised Model Accuracy: {accuracy * 100:.2f}%\")\n",
"\n",
"# Save the supervised model locally\n",
"joblib.dump(supervised_model, 'churn.joblib')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"### Step 2.2: KMeans for customer segmentation.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"# Train a KMeans clustering model (unsupervised)\n",
"unsupervised_model = KMeans(n_clusters=3, random_state=42)\n",
"unsupervised_model.fit(unsupervised_data)\n",
"\n",
"# Save the unsupervised model locally\n",
"joblib.dump(unsupervised_model, 'segmentation.joblib')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"This script generates a `requirements.txt` file with specific versions of required packages.\n",
"\n",
"Modules:\n",
" pkg_resources: Used to get the version of installed packages.\n",
"\n",
"Packages:\n",
" - joblib\n",
" - scikit-learn\n",
" - pandas\n",
" - numpy\n",
"\n",
"Functionality:\n",
" - Iterates over a list of specified packages.\n",
" - Retrieves the installed version of each package.\n",
" - Writes the package names and their versions to a `requirements.txt` file in the format `package==version`.\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import pkg_resources\n",
"\n",
"packages = ['joblib', 'scikit-learn', 'pandas', 'numpy']\n",
"\n",
"with open('requirements.txt', 'w') as f:\n",
" for package in packages:\n",
" version = pkg_resources.get_distribution(package).version\n",
" f.write(f\"{package}=={version}\\n\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "myenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}