Merge pull request #838 from roboflow/feature/inference_cli_with_work…

…flows `inference-cli` with batch processing for Workflows
roboflow · Nov 28, 2024 · a011334 · a011334
2 parents ff98987 + fdac101
commit a011334
Show file tree

Hide file tree

Showing 41 changed files with 4,179 additions and 383 deletions.
diff --git a/.github/workflows/integration_tests_inference_cli_depending_on_inference_x86.yml b/.github/workflows/integration_tests_inference_cli_depending_on_inference_x86.yml
@@ -0,0 +1,47 @@
+name: INTEGRATION TESTS - inference CLI + inference CORE
+
+on:
+  pull_request:
+    branches: [main]
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+jobs:
+  call_is_mergeable:
+    uses: ./.github/workflows/is_mergeable.yml
+    secrets: inherit
+  build-dev-test:
+    needs: call_is_mergeable
+    if: ${{ github.event_name != 'pull_request' || needs.call_is_mergeable.outputs.mergeable_state != 'not_clean' }}
+    runs-on:
+      labels: depot-ubuntu-22.04-small
+      group: public-depot
+    timeout-minutes: 30
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10"]
+    steps:
+      - name: 🛎️ Checkout
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.head_ref }}
+      - name: 🐍 Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          check-latest: true
+      - name: 📦 Cache Python packages
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('requirements/**') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-${{ matrix.python-version }}-
+      - name: 📦 Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install --upgrade setuptools
+          pip install --extra-index-url https://download.pytorch.org/whl/cpu -r requirements/_requirements.txt -r requirements/requirements.cpu.txt -r requirements/requirements.sdk.http.txt -r requirements/requirements.test.unit.txt -r requirements/requirements.http.txt -r requirements/requirements.yolo_world.txt -r requirements/requirements.doctr.txt -r requirements/requirements.sam.txt -r requirements/requirements.transformers.txt -r requirements/requirements.cli.txt -r requirements/requirements.sdk.http.txt
+      - name: 🧪 Integration Tests of Inference CLI
+        run: RUN_TESTS_WITH_INFERENCE_PACKAGE=True INFERENCE_CLI_TESTS_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} python -m pytest tests/inference_cli/integration_tests/test_workflows.py
diff --git a/.github/workflows/integration_tests_inference_cli_x86.yml b/.github/workflows/integration_tests_inference_cli_x86.yml
@@ -44,4 +44,4 @@ jobs:
           pip install --upgrade setuptools
           pip install -r requirements/requirements.cli.txt -r requirements/requirements.sdk.http.txt -r requirements/requirements.test.unit.txt
       - name: 🧪 Integration Tests of Inference CLI
-        run: python -m pytest tests/inference_cli/integration_tests
+        run: RUN_TESTS_EXPECTING_ERROR_WHEN_INFERENCE_NOT_INSTALLED=True INFERENCE_CLI_TESTS_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} python -m pytest tests/inference_cli/integration_tests
diff --git a/.github/workflows/load_test_hosted_inference.yml b/.github/workflows/load_test_hosted_inference.yml
@@ -51,35 +51,36 @@ jobs:
       - name: 🏋️‍♂️ Load test 🚨 PRODUCTION 🚨 | object-detection 🔥🔥🔥🔥
         if: ${{ github.event.inputs.environment == 'production' && github.event.inputs.model_type == 'object-detection' }}
         run: |
-          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} python -m inference_cli.main benchmark api-speed -m coco/16 -d coco -rps 5 -br 500 -h https://detect.roboflow.com --yes --output_location test_results.json
+          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} python -m inference_cli.main benchmark api-speed -m coco/16 -d coco -rps 5 -br 500 -h https://detect.roboflow.com --yes --output_location test_results.json --max_error_rate 5.0
       - name: 🏋️‍♂️ Load test 🚨 PRODUCTION 🚨 | instance-segmentation 🔥🔥🔥🔥
         if: ${{ github.event.inputs.environment == 'production' && github.event.inputs.model_type == 'instance-segmentation' }}
         run: |
-          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} python -m inference_cli.main benchmark api-speed -m asl-poly-instance-seg/53 -d coco -rps 5 -br 500 -h https://outline.roboflow.com --yes --output_location test_results.json
+          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} python -m inference_cli.main benchmark api-speed -m asl-poly-instance-seg/53 -d coco -rps 5 -br 500 -h https://outline.roboflow.com --yes --output_location test_results.json --max_error_rate 5.0
       - name: 🏋️‍♂️ Load test 🚨 PRODUCTION 🚨 | classification 🔥🔥🔥🔥
         if: ${{ github.event.inputs.environment == 'production' && github.event.inputs.model_type == 'classification' }}
         run: |
-          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} python -m inference_cli.main benchmark api-speed -m vehicle-classification-eapcd/2 -d coco -rps 5 -br 500 -h https://classify.roboflow.com --yes --output_location test_results.json
+          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} python -m inference_cli.main benchmark api-speed -m vehicle-classification-eapcd/2 -d coco -rps 5 -br 500 -h https://classify.roboflow.com --yes --output_location test_results.json --max_error_rate 5.0
       - name: 🏋️‍♂️ Load test 🚨 PRODUCTION 🚨 | workflows 🔥🔥🔥🔥
         if: ${{ github.event.inputs.environment == 'production' && github.event.inputs.model_type == 'workflows' }}
         run: |
-          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} python -m inference_cli.main benchmark api-speed -wid workflows-production-test -wn paul-guerrie-tang1 -d coco -rps 5 -br 500 -h https://classify.roboflow.com --yes --output_location test_results.json
+          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} python -m inference_cli.main benchmark api-speed -wid workflows-production-test -wn paul-guerrie-tang1 -d coco -rps 5 -br 500 -h https://classify.roboflow.com --yes --output_location test_results.json --max_error_rate 5.0
 
       - name: 🏋️‍♂️ Load test 😎 STAGING 😎 | object-detection 🔥🔥🔥🔥
         if: ${{ github.event.inputs.environment == 'staging' && github.event.inputs.model_type == 'object-detection' }}
         run: |
-          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_STAGING_API_KEY }} python -m inference_cli.main benchmark api-speed -m eye-detection/35  -d coco -rps 5 -br 500 -h https://lambda-object-detection.staging.roboflow.com --legacy-endpoints --yes --output_location test_results.json
+          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_STAGING_API_KEY }} python -m inference_cli.main benchmark api-speed -m eye-detection/35  -d coco -rps 5 -br 500 -h https://lambda-object-detection.staging.roboflow.com --legacy-endpoints --yes --output_location test_results.json --max_error_rate 5.0
       - name: 🏋️‍♂️ Load test 😎 STAGING 😎 | instance-segmentation 🔥🔥🔥🔥
         if: ${{ github.event.inputs.environment == 'staging' && github.event.inputs.model_type == 'instance-segmentation' }}
         run: |
-          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_STAGING_API_KEY }} python -m inference_cli.main benchmark api-speed -m asl-instance-seg/116 -d coco -rps 5 -br 500 -h https://lambda-instance-segmentation.staging.roboflow.com --legacy-endpoints --yes --output_location test_results.json
+          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_STAGING_API_KEY }} python -m inference_cli.main benchmark api-speed -m asl-instance-seg/116 -d coco -rps 5 -br 500 -h https://lambda-instance-segmentation.staging.roboflow.com --legacy-endpoints --yes --output_location test_results.json --max_error_rate 5.0
       - name: 🏋️‍♂️ Load test 😎 STAGING 😎 | classification 🔥🔥🔥🔥
         if: ${{ github.event.inputs.environment == 'staging' && github.event.inputs.model_type == 'classification' }}
         run: |
-          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_STAGING_API_KEY }} python -m inference_cli.main benchmark api-speed -m catdog/28 -d coco -rps 5 -br 500 -h https://lambda-classification.staging.roboflow.com --legacy-endpoints --yes --output_location test_results.json
+          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_STAGING_API_KEY }} python -m inference_cli.main benchmark api-speed -m catdog/28 -d coco -rps 5 -br 500 -h https://lambda-classification.staging.roboflow.com --legacy-endpoints --yes --output_location test_results.json --max_error_rate 5.0
       - name: 🏋️‍♂️ Load test 😎 STAGING 😎 | workflows 🔥🔥🔥🔥
         if: ${{ github.event.inputs.environment == 'staging' && github.event.inputs.model_type == 'workflows' }}
         run: |
-          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_STAGING_API_KEY }} python -m inference_cli.main benchmark api-speed -wid workflows-staging-test -wn paul-guerrie -d coco -rps 5 -br 500 -h https://lambda-classification.staging.roboflow.com --legacy-endpoints --yes --output_location test_results.json
+          ROBOFLOW_API_KEY=${{ secrets.LOAD_TEST_STAGING_API_KEY }} python -m inference_cli.main benchmark api-speed -wid workflows-staging-test -wn paul-guerrie -d coco -rps 5 -br 500 -h https://lambda-classification.staging.roboflow.com --legacy-endpoints --yes --output_location test_results.json --max_error_rate 5.0
       - name: 📈 RESULTS
         run: cat test_results.json | jq
+        if: always()
diff --git a/.gitignore b/.gitignore
@@ -170,6 +170,6 @@ docs/workflows/gallery/*
 !tests/workflows/integration_tests/execution/assets/rock_paper_scissors/*.jpg
 !tests/workflows/unit_tests/core_steps/models/third_party/assets/*.png
 !tests/workflows/integration_tests/execution/assets/*.png
-
+tests/inference_cli/integration_tests/assets/test_images/
 inference_profiling
 tests/inference_sdk/unit_tests/http/inference_profiling
diff --git a/docs/inference_helpers/cli_commands/benchmark.md b/docs/inference_helpers/cli_commands/benchmark.md
@@ -0,0 +1,71 @@
+# Benchmarking `inference`
+
+`inference benchmark` offers you an easy way to check the performance of `inference` in your setup. The command 
+is capable of benchmarking both `inference` server and `inference` Python package.
+
+!!! Tip "Discovering command capabilities"
+
+    To check detail of the command, run:
+
+    ```bash
+    inference benchmark --help
+    ```
+
+    Additionally, help guide is also available for each sub-command:
+
+    ```bash
+    inference benchmark api-speed --help
+    ```
+
+## Benchmarking `inference` Python package
+
+!!! Important "`inference` needs to be installed"
+
+    Running this command, make sure `inference` package is installed.
+
+    ```bash
+    pip install inference
+    ```
+
+
+Basic benchmark can be run using the following command: 
+
+```bash
+inference benchmark python-package-speed \
+  -m {your_model_id} \
+  -d {pre-configured dataset name or path to directory with images} \
+  -o {output_directory}  
+```
+Command runs specified number of inferences using pointed model and saves statistics (including benchmark 
+parameter, throughput, latency, errors and platform details) in pointed directory.
+
+
+##  Benchmarking `inference` server
+
+!!! note
+
+    Before running API benchmark of your local `inference` server - make sure the server is up and running:
+
+    ```bash
+    inference server start
+    ```
+Basic benchmark can be run using the following command: 
+
+```bash
+inference benchmark api-speed \
+  -m {your_model_id} \
+  -d {pre-configured dataset name or path to directory with images} \
+  -o {output_directory}  
+```
+Command runs specified number of inferences using pointed model and saves statistics (including benchmark 
+parameter, throughput, latency, errors and platform details) in pointed directory.
+
+This benchmark has more configuration options to support different ways HTTP API profiling. In default mode,
+single client will be spawned, and it will send one request after another sequentially. This may be suboptimal
+in specific cases, so one may specify number of concurrent clients using `-c {number_of_clients}` option.
+Each client will send next request once previous is handled. This option will also not cover all scenarios
+of tests. For instance one may want to send `x` requests each second (which is closer to the scenario of
+production environment where multiple clients are sending requests concurrently). In this scenario, `--rps {value}` 
+option can be used (and `-c` will be ignored). Value provided in `--rps` option specifies how many requests 
+are to be spawned **each second** without waiting for previous requests to be handled. In I/O intensive benchmark 
+scenarios - we suggest running command from multiple separate processes and possibly multiple hosts.
diff --git a/docs/inference_helpers/cli_commands/cloud.md b/docs/inference_helpers/cli_commands/cloud.md
@@ -0,0 +1,123 @@
+# Deploying `inference` to Cloud
+
+You can deploy Roboflow Inference containers to virtual machines in the cloud. These VMs are configured to run CPU or 
+GPU-based Inference servers under the hood, so you don't have to deal with OS/GPU drivers/docker installations, etc! 
+The Inference cli currently supports deploying the Roboflow Inference container images into a virtual machine running 
+on Google (GCP) or Amazon cloud (AWS).
+
+The Roboflow Inference CLI assumes the corresponding cloud CLI is configured for the project you want to deploy the 
+virtual machine into. Read instructions for setting up [Google/GCP - gcloud cli](https://cloud.google.com/sdk/docs/install) or the [Amazon/AWS aws cli](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html).
+
+Roboflow Inference cloud deploy is powered by the popular [Skypilot project](https://github.com/skypilot-org/skypilot).
+
+!!! Important "Make sure `cloud-deploy` extras is installed"
+
+    To run commands presented below, you need to have `cloud-deploy` extras installed:
+
+    ```bash
+    pip install "inference-cli[cloud-deploy]"
+    ```
+
+!!! Tip "Discovering command capabilities"
+
+    To check detail of the command, run:
+
+    ```bash
+    inference cloud --help
+    ```
+
+    Additionally, help guide is also available for each sub-command:
+
+    ```bash
+    inference cloud deploy --help
+    ```
+
+## `inference cloud deploy`
+
+We illustrate Inference cloud deploy with some examples, below.
+
+*Deploy GPU or CPU inference to AWS or GCP*
+
+```bash
+# Deploy the roboflow Inference GPU container into a GPU-enabled VM in AWS
+
+inference cloud deploy --provider aws --compute-type gpu
+```
+
+```bash
+# Deploy the roboflow Inference CPU container into a CPU-only VM in GCP
+
+inference cloud deploy --provider gcp --compute-type cpu
+```
+
+Note the "cluster name" printed after the deployment completes. This handle is used in many subsequent commands.
+The deploy command also prints helpful debug and cost information about your VM.
+
+Deploying Inference into a cloud VM will also print out an endpoint of the form "http://1.2.3.4:9001"; you can now run inferences against this endpoint.
+
+Note that the port 9001 is automatically opened - check with your security admin if this is acceptable for your cloud/project.
+
+## `inference cloud status`
+
+To check the status of your deployment, run:
+
+```bash
+inference cloud status
+```
+
+## Stop and start deployments
+
+You can start and stop your deployment using:
+
+```bash
+inference cloud start <deployment_handle>
+```
+
+and
+
+```bash
+# Stop the VM, you only pay for disk storage while the VM is stopped
+inference cloud stop <deployment_handle>
+
+```
+
+## `inference cloud undeploy`
+
+To delete (undeploy) your deployment, run:
+
+```bash
+inference cloud undeploy <deployment_handle>
+```
+
+## SSH into the cloud deployment
+
+You can SSH into your cloud deployment with the following command:
+```bash
+ssh <deployment_handle>
+```
+
+The required SSH key is automatically added to your `~/.ssh/config`, you don't need to configure this manually.
+
+
+## Cloud Deploy Customization
+
+Roboflow Inference cloud deploy will create VMs based on internally tested templates.
+
+For advanced usecases and to customize the template, you can use your [sky yaml](https://skypilot.readthedocs.io/en/latest/reference/yaml-spec.html) template on the command-line, like so:
+
+```bash
+inference cloud deploy --custom /path/to/sky-template.yaml
+```
+
+If you want you can download the standard template stored in the roboflow cli and the modify it for your needs, this command will do that.
+
+```bash
+# This command will print out the standard gcp/cpu sky template.
+inference cloud deploy --dry-run --provider gcp --compute-type cpu
+```
+
+Then you can deploy a custom template based off your changes.
+
+As an aside, you can also use the [sky cli](https://skypilot.readthedocs.io/en/latest/reference/cli.html) to control your deployment(s) and access some more advanced functionality.
+
+Roboflow Inference deploy currently supports AWS and GCP, please open an issue on the [Inference GitHub repository](https://github.com/roboflow/inference/issues) if you would like to see other cloud providers supported.