Update code

neuralmagic · Dec 12, 2024 · 8d94e1f · 8d94e1f
1 parent 3d6c50a
commit 8d94e1f
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 17 deletions.
diff --git a/benchmarks/cutlass_benchmarks/sparse_fp8_benchmarks.py b/benchmarks/cutlass_benchmarks/sparse_fp8_benchmarks.py
@@ -3,12 +3,10 @@
 import dataclasses
 import itertools
 import multiprocessing as mp
-import os
 import pickle as pkl
 import time
 import traceback
 from multiprocessing import Process, Queue
-from pathlib import Path
 from queue import Empty
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
 
@@ -539,17 +537,17 @@ def bench_fp8(dtype: torch.dtype, with_cuda_graph: Optional[int],
     }
 
     # Prepare configs for all kernels
-    standard_kernels = [
-        {'kernel_type': 'pytorch_mm'},
-        {'kernel_type': 'pytorch_scaled_mm'},
-        {'kernel_type': 'pytorch_scaled_mm_fast'},
-        {
-            'kernel_type': 'cutlass_scaled_mm'
-        },
-        {
-            'kernel_type': 'cutlass_scaled_sparse_mm'
-        }
-    ]
+    standard_kernels = [{
+        'kernel_type': 'pytorch_mm'
+    }, {
+        'kernel_type': 'pytorch_scaled_mm'
+    }, {
+        'kernel_type': 'pytorch_scaled_mm_fast'
+    }, {
+        'kernel_type': 'cutlass_scaled_mm'
+    }, {
+        'kernel_type': 'cutlass_scaled_sparse_mm'
+    }]
 
     # Create configs for standard kernels
     all_configs = [{**base_config, **kernel} for kernel in standard_kernels]

diff --git a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
@@ -8,13 +8,12 @@
 import torch
 import torch.utils.benchmark as TBenchmark
 from torch.utils.benchmark import Measurement as TMeasurement
+from utils import make_rand_tensors
 from weight_shapes import WEIGHT_SHAPES
 
 from vllm import _custom_ops as ops
 from vllm.utils import FlexibleArgumentParser
 
-from utils import make_rand_tensors
-
 DEFAULT_MODELS = list(WEIGHT_SHAPES.keys())
 DEFAULT_BATCH_SIZES = [1, 16, 32, 64, 128, 256, 512]
 DEFAULT_TP_SIZES = [1]

diff --git a/tests/kernels/test_semi_structured.py b/tests/kernels/test_semi_structured.py
@@ -9,7 +9,6 @@
 from vllm import _custom_ops as ops
 from vllm.platforms import current_platform
 
-
 CUDA_DEVICES = [
     f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)
 ]
@@ -129,4 +128,3 @@ def test_cutlass_sparse_subset():
                                   out_dtype=torch.bfloat16)
 
     torch.testing.assert_close(out, baseline, rtol=1e-1, atol=1e0)
-