Skip to content

Commit

Permalink
Update code
Browse files Browse the repository at this point in the history
  • Loading branch information
Faraz9877 committed Dec 12, 2024
1 parent 3d6c50a commit 8d94e1f
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 17 deletions.
24 changes: 11 additions & 13 deletions benchmarks/cutlass_benchmarks/sparse_fp8_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
import dataclasses
import itertools
import multiprocessing as mp
import os
import pickle as pkl
import time
import traceback
from multiprocessing import Process, Queue
from pathlib import Path
from queue import Empty
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple

Expand Down Expand Up @@ -539,17 +537,17 @@ def bench_fp8(dtype: torch.dtype, with_cuda_graph: Optional[int],
}

# Prepare configs for all kernels
standard_kernels = [
{'kernel_type': 'pytorch_mm'},
{'kernel_type': 'pytorch_scaled_mm'},
{'kernel_type': 'pytorch_scaled_mm_fast'},
{
'kernel_type': 'cutlass_scaled_mm'
},
{
'kernel_type': 'cutlass_scaled_sparse_mm'
}
]
standard_kernels = [{
'kernel_type': 'pytorch_mm'
}, {
'kernel_type': 'pytorch_scaled_mm'
}, {
'kernel_type': 'pytorch_scaled_mm_fast'
}, {
'kernel_type': 'cutlass_scaled_mm'
}, {
'kernel_type': 'cutlass_scaled_sparse_mm'
}]

# Create configs for standard kernels
all_configs = [{**base_config, **kernel} for kernel in standard_kernels]
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/cutlass_benchmarks/w8a8_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@
import torch
import torch.utils.benchmark as TBenchmark
from torch.utils.benchmark import Measurement as TMeasurement
from utils import make_rand_tensors
from weight_shapes import WEIGHT_SHAPES

from vllm import _custom_ops as ops
from vllm.utils import FlexibleArgumentParser

from utils import make_rand_tensors

DEFAULT_MODELS = list(WEIGHT_SHAPES.keys())
DEFAULT_BATCH_SIZES = [1, 16, 32, 64, 128, 256, 512]
DEFAULT_TP_SIZES = [1]
Expand Down
2 changes: 0 additions & 2 deletions tests/kernels/test_semi_structured.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from vllm import _custom_ops as ops
from vllm.platforms import current_platform


CUDA_DEVICES = [
f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)
]
Expand Down Expand Up @@ -129,4 +128,3 @@ def test_cutlass_sparse_subset():
out_dtype=torch.bfloat16)

torch.testing.assert_close(out, baseline, rtol=1e-1, atol=1e0)

0 comments on commit 8d94e1f

Please sign in to comment.