manuelgilm · manuelgilm · Jun 9, 2025 · Jun 8, 2025 · Jun 8, 2025 · Jun 9, 2025
diff --git a/examples/iris_classifier/inference.py b/examples/iris_classifier/inference.py
@@ -1,3 +1,4 @@
+# import necessary libraries
 import mlflow
 from examples.utils.decorators import mlflow_tracking_uri
 from examples.iris_classifier.data import get_train_test_data
@@ -8,14 +9,22 @@ def main():
     """
     Main function to run the batch inference process.
     """
+    # Load the test data
+    _, x_test, _, y_test = get_train_test_data()
+
     # Load the model from the specified path
-    _, x_test, _, _ = get_train_test_data()
     registered_model_name = "Iris_Classifier_Model"
     model_path = f"models:/{registered_model_name}@production"
     model = mlflow.sklearn.load_model(model_path)
 
     # Perform inference on the test data
     predictions = model.predict(x_test)
     x_test["predictions"] = predictions
+    x_test["target"] = y_test
+
+    # save the predictions to a CSV file
+    predictions_path = "examples/iris_classifier/inference_results/predictions.csv"
+    x_test.to_csv(predictions_path, index=False)
+
+    print(f"Inference completed. Predictions saved to {predictions_path}")
     print(x_test.head())
-    print("Batch inference completed successfully.")
diff --git a/examples/iris_classifier/inference_results/test_data.json b/examples/iris_classifier/inference_results/test_data.json
@@ -0,0 +1,84 @@
+{
+    "dataframe_split": {
+        "index": [
+            68,
+            45,
+            128,
+            26,
+            118,
+            131,
+            141,
+            143,
+            9,
+            104
+        ],
+        "columns": [
+            "sepal length (cm)",
+            "sepal width (cm)",
+            "petal length (cm)",
+            "petal width (cm)"
+        ],
+        "data": [
+            [
+                6.2,
+                2.2,
+                4.5,
+                1.5
+            ],
+            [
+                4.8,
+                3.0,
+                1.4,
+                0.3
+            ],
+            [
+                6.4,
+                2.8,
+                5.6,
+                2.1
+            ],
+            [
+                5.0,
+                3.4,
+                1.6,
+                0.4
+            ],
+            [
+                7.7,
+                2.6,
+                6.9,
+                2.3
+            ],
+            [
+                7.9,
+                3.8,
+                6.4,
+                2.0
+            ],
+            [
+                6.9,
+                3.1,
+                5.1,
+                2.3
+            ],
+            [
+                6.8,
+                3.2,
+                5.9,
+                2.3
+            ],
+            [
+                4.9,
+                3.1,
+                1.5,
+                0.1
+            ],
+            [
+                6.5,
+                3.0,
+                5.8,
+                2.2
+            ]
+        ]
+    }
+}
diff --git a/examples/iris_classifier/online_inference.py b/examples/iris_classifier/online_inference.py
@@ -1,63 +1,47 @@
+# import necessary libraries
 from examples.iris_classifier.data import get_train_test_data
 import httpx
 import json
 import pandas as pd
 
 
-def get_predictions_from_response(response):
-    """
-    Process the response from the REST API.
-
-    :param response: The response object from the HTTP request.
-    :return: The JSON content of the response.
-    """
-    if response.status_code == 200:
-        json_response = response.json()
-        predictions = json_response.get("predictions")
-        if predictions is not None:
-            return predictions
-        else:
-            raise Exception("No predictions found in the response.")
-    else:
-        raise Exception(f"Error: {response.status_code} - {response.text}")
-
-
-def get_payload(samples: int) -> dict:
+def main() -> None:
     """
-    Get the payload for online inference.
+    Perform online inference using a REST API.
 
-    :param samples: Number of samples to include in the payload.
-    :return: Dictionary containing the payload for online inference.
+    To deploy the model in the local server, run the following command:
+    `poetry run mlflow models serve -m models:/Iris_Classifier_Model@production --env-manager local`
     """
+    # Load the test data
     _, x_test, _, y_test = get_train_test_data()
+    samples = 3  # Number of samples to include in the payload
+
     # Uncomment the following line to make the api call fail
     # x_test["sepal length (cm)"] = ["" for _ in range(len(x_test))]
+
+    # define the payload for online inference
     payload = {
         "dataframe_split": x_test.iloc[0:samples].to_dict(orient="split"),
     }
-    return payload, y_test.iloc[0:samples]
-
 
-def main() -> None:
-    """
-    Perform online inference using a REST API.
-
-    To deploy the model in the local server, run the following command:
-    `poetry run mlflow models serve -m models:/Iris_Classifier_Model@production --env-manager local`
-
-    """
-    payload, labels = get_payload(1)
+    # Define the endpoint URL and headers for the REST API call
     url = "http://127.0.0.1:5000/invocations"
-
-    print(payload)
     headers = {"Content-Type": "application/json"}
+
+    # Make the REST API call to perform online inference
     response = httpx.post(url, data=json.dumps(payload), headers=headers)
-    predictions = get_predictions_from_response(response)
+    if response.status_code == 200:
+        json_response = response.json()
+        predictions = json_response.get("predictions")
+    else:
+        raise Exception(f"Error: {response.status_code} - {response.text}")
+
+    # Print the predictions and corresponding labels
     print(
         pd.DataFrame(
             {
                 "predictions": predictions,
-                "labels": labels,
+                "labels": y_test.iloc[0:samples].values,
             }
         )
     )
diff --git a/examples/iris_classifier/train.py b/examples/iris_classifier/train.py
@@ -1,49 +1,48 @@
+# imports
 from sklearn.ensemble import RandomForestClassifier
 from examples.iris_classifier.data import get_train_test_data
 from examples.utils.decorators import mlflow_tracking_uri
 from examples.utils.decorators import mlflow_client
 from examples.utils.decorators import mlflow_experiment
 from examples.utils.mlflow_utils import set_alias_to_latest_version
-from typing import Optional
-from typing import Dict
 
 from mlflow.models import infer_signature
 import mlflow
 
 
-def train(x_train, y_train, params: Optional[Dict[str, str]]) -> RandomForestClassifier:
-    """
-    Train a Random Forest Classifier on the provided training data.
-    The function returns the trained model.
-
-    :param x_train: The training features (input data).
-    :param y_train: The training labels (target data).
-    :return: The trained Random Forest Classifier model.
-    """
-    clf = RandomForestClassifier(**params)
-    clf.fit(x_train, y_train)
-    return clf
-
-
 @mlflow_tracking_uri
 @mlflow_experiment(name="iris_classifier")
 @mlflow_client
 def main(**kwargs) -> None:
-    # Example usage of the train function
+    """
+    Main function to train a Random Forest Classifier on the Iris dataset.
+    """
+
+    # Get training and test data
     x_train, x_test, y_train, y_test = get_train_test_data()
+
+    # define parameters and create the model
     params = {"n_estimators": 1, "max_depth": 10}
-    model = train(x_train, y_train, params)
+    model = RandomForestClassifier(**params)
+
+    # Train the model
+    model.fit(x_train, y_train)
+
+    # infer model signature
     model_signature = infer_signature(x_train, y_train)
 
+    # create evaluationn data
     eval_data = x_test.copy()
     eval_data["target"] = y_test
+
+    # Get the MLflow client
     client = kwargs["mlflow_client"]
     registered_model_name = "Iris_Classifier_Model"
     with mlflow.start_run(run_name="training-rfc-model") as run:
         # log parameters.
         mlflow.log_params(model.get_params())
 
-        # log model
+        # log model.
         mlflow.sklearn.log_model(
             sk_model=model,
             artifact_path="model",
@@ -59,10 +58,9 @@ def main(**kwargs) -> None:
             client=client,
         )
 
-        # model uri
-        model_uri = f"runs:/{run.info.run_id}/model"
+        # log evaluation metrics
         mlflow.evaluate(
-            model=model_uri,
+            model=f"runs:/{run.info.run_id}/model",
             data=eval_data,
             model_type="classifier",
             targets="target",

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "mlflow_for_ml_dev"
-version = "1.6.0"
+version = "1.6.1"
 description = "Code examples for the youtube playlist 'MLflow for Machine Learning Development' by Manuel Gil"
 authors = ["Manuel Gil <[email protected]>"]
 readme = "README.md"