manuelgilm · manuelgilm · May 27, 2025 · May 25, 2025 · May 27, 2025 · May 27, 2025
diff --git a/examples/README.md b/examples/README.md
@@ -52,7 +52,8 @@ poetry run mlflow models serve -m models:/Digit_Recognition_Model@production --e
 ```
 
 ## Diabetes Prediction
-This example utilizes a publicly available dataset from Kaggle. The project highlights the importance of including additional code dependencies within the model to ensure reproducibility and seamless deployment.
+
+This example uses a publicly available dataset from Kaggle and demonstrates the importance of packaging all necessary code dependencies with your model to ensure reproducibility and smooth deployment. It also introduces the MLflow command for building a Docker image tailored to a specific model, enabling consistent and portable deployments.
 
 * package: `examples/diabetes_prediction`
 
@@ -67,3 +68,37 @@ This example utilizes a publicly available dataset from Kaggle. The project high
 * `diabetes_pred_online_inference`:  Performs online inference. Before running this script, ensure that the model has been trained and registered (for example, using the `diabetes_pred_train` script). Next, deploy the model locally by executing:
 
 `poetry run mlflow models serve -m models:/Digit_Recognition_Model@production --env-manager local`
+
+### Generating a docker image. 
+
+You can easily build a Docker image for your model using MLflow. The following command generates the image, assuming Docker is installed and running on your machine:
+
+```shell
+poetry run mlflow models build-docker --model-uri <MODEL URI> -n IMAGE_NAME
+```
+
+For example, to generate a Docker image for the model, use the following command:
+
+```shell
+poetry run mlflow models build-docker --model-uri models:/Diabetes_Prediction_Model@production -n diabetes_prediction_model
+```
+
+
+## Walmart Sales
+This example uses a public dataset on Walmart sales over a specific period. The primary goal of this use case is to demonstrate how to deploy multiple models using a single serving endpoint. Additionally, it highlights some limitations of MLflow's ability to automatically generate the appropriate Dockerfile for model deployment.
+
+* package: `examples/walmart_sales_regression`
+
+* Dataset Source: [Kaggle Dataset](https://www.kaggle.com/datasets/mikhail1681/walmart-sales)
+
+* Associated Inference Notebook: [deploying_local_sales_regressor.ipynb](/mlflow_for_ml_dev/notebooks/local_model_serving/deploying_local_sales_regressor.ipynb)
+
+### Available Entrypoints:
+
+* `walmart_reg_train`: Trains the model and register it in the Model registry with name **walmart-store-sales-regressor**, The model is set with alias **production**
+* `walmart_reg_inference`: Perform batch inference.
+* `walmart_reg_online_inference`:  Performs online inference. Before running this script, ensure that the model has been trained and registered (for example, using the `walmart_reg_train` script). Next, deploy the model locally by executing:
+
+`poetry run mlflow models serve -m models:/walmart-store-sales-regressor@production -p 5000 --env-manager local`
+
+
diff --git a/examples/diabetes_prediction/core/base.py b/examples/diabetes_prediction/core/base.py
@@ -40,7 +40,7 @@ def fit(self, x_train, y_train):
         # train the model
         self.model = pipeline.fit(x_train, y_train)
 
-    def predict(self, context, model_input, params: Optional[Dict[str, str]] = {}):
+    def predict(self, context, model_input, params: Optional[Dict[str, str]] = None):
         """
         Predict method for the custom model.
         This method is called when making predictions with the model.
@@ -53,7 +53,7 @@ def predict(self, context, model_input, params: Optional[Dict[str, str]] = {}):
             print("Model not loaded")
             return None
 
-        if params.get("probabilities", None):
+        if params and params.get("probabilities", None):
             predictions_df = self._predict_with_probabilities(model_input)
             return predictions_df
 

diff --git a/examples/diabetes_prediction/online_inference.py b/examples/diabetes_prediction/online_inference.py
@@ -47,18 +47,6 @@ def main() -> None:
     # print(payload)
     payload = {
         "dataframe_split": {
-            "index": [
-                75721,
-                80184,
-                19864,
-                76699,
-                92991,
-                76434,
-                84004,
-                80917,
-                60767,
-                50074,
-            ],
             "columns": [
                 "gender",
                 "age",

diff --git a/examples/diabetes_prediction/train.py b/examples/diabetes_prediction/train.py
@@ -2,6 +2,7 @@
 from examples.diabetes_prediction.core.base import DiabetesPrediction
 from examples.diabetes_prediction.core.pipeline import get_model_signature
 from examples.diabetes_prediction.core.data import get_feature_spec
+from examples.utils.mlflow_utils import set_alias_to_latest_version
 from examples.utils.decorators import mlflow_tracking_uri
 from examples.utils.decorators import mlflow_client
 from examples.utils.decorators import mlflow_experiment
@@ -25,24 +26,37 @@ def main(**kwargs) -> None:
     with mlflow.start_run() as run:
         # Log the model
         registered_model_name = "Diabetes_Prediction_Model"
+
+        # registering the model with mlflow without infer_code_paths
         mlflow.pyfunc.log_model(
             artifact_path="model",
             python_model=diabetes_model,
             registered_model_name=registered_model_name,
             signature=signature,
+            input_example=x_test.sample(5),
+        )
+
+        # registering the model with mlflow with infer_code_paths
+        mlflow.pyfunc.log_model(
+            artifact_path="model_with_code",
+            python_model=diabetes_model,
+            registered_model_name=registered_model_name + "_code",
+            signature=signature,
+            input_example=x_test.sample(5),
             infer_code_paths=True,
         )
 
-        # Set model version alias to "production"
-        model_version = mlflow.search_model_versions(
-            filter_string=f"name='{registered_model_name}'", max_results=1
-        )[0]
-        client = kwargs["mlflow_client"]
-        client.set_registered_model_alias(
-            name=registered_model_name,
-            version=model_version.version,
+        set_alias_to_latest_version(
+            registered_model_name=registered_model_name,
             alias="production",
+            client=kwargs["mlflow_client"],
         )
+        set_alias_to_latest_version(
+            registered_model_name=registered_model_name + "_code",
+            alias="production",
+            client=kwargs["mlflow_client"],
+        )
+
         eval_data = x_test.copy()
         eval_data["diabetes"] = y_test
 

diff --git a/examples/walmart_sales_regression/online_inference.py b/examples/walmart_sales_regression/online_inference.py
@@ -9,7 +9,7 @@ def main():
     Perform online inference using a REST API.
 
     To deploy the model using the local server, run the following command:
-    `poetry run mlflow models serve -m models:/walmart-store-sales-regressor@production -p 5000 --no-conda`
+    `poetry run mlflow models serve -m models:/walmart-store-sales-regressor@production -p 5000 --env-manager local`
     """
 
     url = "http://localhost:5000/invocations"

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "mlflow_for_ml_dev"
-version = "1.4.4"
+version = "1.5.0"
 description = "Code examples for the youtube playlist 'MLflow for Machine Learning Development' by Manuel Gil"
 authors = ["Manuel Gil <[email protected]>"]
 readme = "README.md"