mlflow · chauhang · Feb 12, 2023 · Nov 22, 2022 · Dec 7, 2022 · Dec 7, 2022
diff --git a/examples/BertNewsClassification/README.md b/examples/BertNewsClassification/README.md
@@ -82,6 +82,13 @@ python news_classifier.py \
     --model_save_path /home/ubuntu/mlflow-torchserve/examples/BertNewsClassification/models
 ```
 
+To run the training script in GPU environment:
+```
+torchrun news_classifier.py \
+    --max_epochs 5 \
+    --model_save_path /home/ubuntu/mlflow-torchserve/examples/BertNewsClassification/models
+```
+
 ## Starting TorchServe
 
 create an empty directory `model_store` and run the following command to start torchserve.

diff --git a/examples/IrisClassification/iris_classification.py b/examples/IrisClassification/iris_classification.py
@@ -21,9 +21,9 @@ class IrisClassification(pl.LightningModule):
     def __init__(self, **kwargs):
         super(IrisClassification, self).__init__()
 
-        self.train_acc = Accuracy()
-        self.val_acc = Accuracy()
-        self.test_acc = Accuracy()
+        self.train_acc = Accuracy(task="multiclass", num_classes=3)
+        self.val_acc = Accuracy(task="multiclass", num_classes=3)
+        self.test_acc = Accuracy(task="multiclass", num_classes=3)
         self.args = kwargs
 
         self.fc1 = nn.Linear(4, 10)

diff --git a/examples/IrisClassification/predict.py b/examples/IrisClassification/predict.py
@@ -30,7 +30,8 @@ def predict(parser_args):
         df[column] = df[column].astype("double")
 
     prediction = plugin.predict(deployment_name=parser_args["deployment_name"], df=input_data)
-    print("Prediction Result {}".format(prediction))
+
+    print("Prediction Result {}".format(prediction.to_json()))
 
 
 if __name__ == "__main__":

diff --git a/examples/MNIST/mnist_model.py b/examples/MNIST/mnist_model.py
@@ -120,9 +120,9 @@ def __init__(self, **kwargs):
         """
         super(LightningMNISTClassifier, self).__init__()
 
-        self.train_acc = Accuracy()
-        self.val_acc = Accuracy()
-        self.test_acc = Accuracy()
+        self.train_acc = Accuracy(task="multiclass", num_classes=10)
+        self.val_acc = Accuracy(task="multiclass", num_classes=10)
+        self.test_acc = Accuracy(task="multiclass", num_classes=10)
 
         # mnist images are (1, 28, 28) (channels, width, height)
         self.layer_1 = torch.nn.Linear(28 * 28, 128)
@@ -279,8 +279,6 @@ def get_model(trainer):
         if dict_args[argument] == "None":
             dict_args[argument] = None
 
-    mlflow.pytorch.autolog()
-
     model = LightningMNISTClassifier(**dict_args)
 
     dm = MNISTDataModule(**dict_args)
@@ -289,11 +287,14 @@ def get_model(trainer):
 
     trainer = pl.Trainer.from_argparse_args(args)
 
-    trainer.fit(model, dm)
-    trainer.test(datamodule=dm)
-
-    run = mlflow.active_run()
+    mlflow.pytorch.autolog()
+    with mlflow.start_run() as run:
+        trainer.fit(model, dm)
+        trainer.test(datamodule=dm)
+        active_run = mlflow.active_run()
     if dict_args["register"] == "true":
-        mlflow.register_model(model_uri=run.info.artifact_uri, name=dict_args["registration_name"])
+        mlflow.register_model(
+            model_uri=active_run.info.artifact_uri, name=dict_args["registration_name"]
+        )
     else:
         torch.save(trainer.lightning_module.state_dict(), "model.pth")
diff --git a/examples/MNIST/predict.py b/examples/MNIST/predict.py
@@ -15,7 +15,7 @@ def predict(parser_args):
 
     image_tensor = mnist_transforms(img)
     prediction = plugin.predict(parser_args["deployment_name"], image_tensor)
-    print("Prediction Result {}".format(prediction))
+    print("Prediction Result {}".format(prediction.to_json()))
 
 
 if __name__ == "__main__":

diff --git a/examples/TextClassification/predict.py b/examples/TextClassification/predict.py
@@ -10,7 +10,7 @@ def predict(parser_args):
         text = fp.read()
         plugin = get_deploy_client(parser_args["target"])
         prediction = plugin.predict(parser_args["deployment_name"], json.dumps(text))
-        print("Prediction Result {}".format(prediction))
+        print("Prediction Result {}".format(prediction.to_json()))
 
 
 if __name__ == "__main__":

diff --git a/examples/Titanic/README.md b/examples/Titanic/README.md
@@ -48,7 +48,7 @@ mlflow run . --no-conda
  * Step - 3: Create an empty directory model_store and run the following command to start torchserve.
 
     ```bash
-    torchserve --start --model-store model_store/ --ts-config config.properties
+    torchserve --start --model-store model_store/
     ```
 
 ## Creating a new deployment
@@ -67,13 +67,17 @@ For testing, we are going to use a sample test record placed in test_data folder
 
 Run the following command to invoke prediction on test record, whose output is stored in output.json file.
 
-`mlflow deployments predict --name titanic --target torchserve --input-path test_data/input.json  --output-path output.json`
+```
+mlflow deployments predict --name titanic --target torchserve --input-path test_data/input.json  --output-path output.json
+```
 
 This model will classify the test record as survived or not survived and store it in `output.json`
 
 
 Run the below command to invoke explain for feature importance attributions on test record. It will save the attribution image attributions_imp.png in test_data folder.
 
-` mlflow deployments explain -t torchserve --name titanic --input-path  test_data/input.json`
+```
+mlflow deployments explain -t torchserve --name titanic --input-path  test_data/input.json
+```
 
 this explanations command give us the average attribution for each feature. From the feature attribution information, we obtain some interesting insights regarding the importance of various features.
diff --git a/mlflow_torchserve/__init__.py b/mlflow_torchserve/__init__.py
@@ -1,26 +1,40 @@
-import docker
 import json
-import subprocess
-import time
 import logging
 import os
-import pandas as pd
-import torch
+import subprocess
+import time
 from pathlib import Path
 
+import docker
+import pandas as pd
 import requests
-from mlflow_torchserve.config import Config
-
+import torch
 from mlflow.deployments import BaseDeploymentClient, get_deploy_client
-from mlflow.tracking.artifact_utils import _download_artifact_from_uri
+from mlflow.deployments import PredictionsResponse
 from mlflow.models.model import Model
+from mlflow.tracking.artifact_utils import _download_artifact_from_uri
+
+from mlflow_torchserve.config import Config
 
 _logger = logging.getLogger(__name__)
 
 _DEFAULT_TORCHSERVE_LOCAL_INFERENCE_PORT = "8080"
 _DEFAULT_TORCHSERVE_LOCAL_MANAGEMENT_PORT = "8081"
 
 
+class CustomPredictionsResponse(PredictionsResponse):
+    def __init__(self, resp):
+        super(CustomPredictionsResponse, self).__init__(self)
+        self.resp = resp
+
+    def to_json(self, path=None):
+        if path is not None:
+            with open(path, "w") as f:
+                json.dump(self.resp, f)
+        else:
+            return self.resp
+
+
 class TorchServePlugin(BaseDeploymentClient):
     def __init__(self, uri):
 
@@ -268,7 +282,6 @@ def predict(self, deployment_name, df):
 
         :return: output - Returns the predicted value
         """
-
         url = "{api}/{predictions}/{name}".format(
             api=self.inference_api, predictions="predictions", name=deployment_name
         )
@@ -286,14 +299,14 @@ def predict(self, deployment_name, df):
                 raise ValueError("Unable to parse input json string: {}".format(e))
 
         resp = requests.post(url, data)
+        cust_resp = CustomPredictionsResponse(resp.text)
         if resp.status_code != 200:
             raise Exception(
                 "Unable to infer the results for the name %s. "
                 "Server returned status code %s and response: %s"
                 % (deployment_name, resp.status_code, resp.content)
             )
-
-        return resp.text
+        return cust_resp
 
     def explain(self, deployment_name, df):
         """
@@ -330,7 +343,6 @@ def explain(self, deployment_name, df):
                 "Server returned status code %s and response: %s"
                 % (deployment_name, resp.status_code, resp.content)
             )
-
         return resp.text
 
     def __generate_mar_file(