Skip to content

Commit 39b98a2

Browse files
authored
Merge pull request #11 from VectorInstitute/develop
v0.3.2
2 parents f43d7bf + 9a07db8 commit 39b98a2

File tree

4 files changed

+23
-14
lines changed

4 files changed

+23
-14
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "vec-inf"
3-
version = "0.3.1"
3+
version = "0.3.2"
44
description = "Efficient LLM inference on Slurm clusters using vLLM."
55
authors = ["Marshall Wang <[email protected]>"]
66
license = "MIT license"

vec_inf/cli/_cli.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@ def cli():
6363
type=str,
6464
help='Time limit for job, this should comply with QoS, default to max walltime of the chosen QoS'
6565
)
66+
@click.option(
67+
"--vocab-size",
68+
type=int,
69+
help='Vocabulary size, this option is intended for custom models'
70+
)
6671
@click.option(
6772
"--data-type",
6873
type=str,
@@ -93,6 +98,7 @@ def launch(
9398
num_gpus: int=None,
9499
qos: str=None,
95100
time: str=None,
101+
vocab_size: int=None,
96102
data_type: str=None,
97103
venv: str=None,
98104
log_dir: str=None,
@@ -109,16 +115,20 @@ def launch(
109115

110116
models_df = load_models_df()
111117

112-
if model_name not in models_df['model_name'].values:
113-
raise ValueError(f"Model name {model_name} not found in available models")
114-
115-
default_args = load_default_args(models_df, model_name)
116-
117-
for arg in default_args:
118-
if arg in locals() and locals()[arg] is not None:
119-
default_args[arg] = locals()[arg]
120-
renamed_arg = arg.replace("_", "-")
121-
launch_cmd += f" --{renamed_arg} {default_args[arg]}"
118+
if model_name in models_df['model_name'].values:
119+
default_args = load_default_args(models_df, model_name)
120+
for arg in default_args:
121+
if arg in locals() and locals()[arg] is not None:
122+
default_args[arg] = locals()[arg]
123+
renamed_arg = arg.replace("_", "-")
124+
launch_cmd += f" --{renamed_arg} {default_args[arg]}"
125+
else:
126+
model_args = models_df.columns.tolist()
127+
excluded_keys = ['model_name', 'pipeline_parallelism']
128+
for arg in model_args:
129+
if arg not in excluded_keys and locals()[arg] is not None:
130+
renamed_arg = arg.replace("_", "-")
131+
launch_cmd += f" --{renamed_arg} {locals()[arg]}"
122132

123133
output = run_bash_command(launch_cmd)
124134

vec_inf/models/models.csv

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,5 @@ Mixtral-8x7B-Instruct-v0.1,Mixtral,8x7B-Instruct-v0.1,a40,m2,08:00:00,4,1,32000,
4242
Mixtral-8x22B-v0.1,Mixtral,8x22B-v0.1,a40,m2,08:00:00,4,2,32768,65536,auto,singularity,default,false
4343
Mixtral-8x22B-Instruct-v0.1,Mixtral,8x22B-Instruct-v0.1,a40,m2,08:00:00,4,2,32768,65536,auto,singularity,default,false
4444
Phi-3-medium-128k-instruct,Phi-3,medium-128k-instruct,a40,m2,08:00:00,2,1,32064,131072,auto,singularity,default,false
45-
Phi-3-vision-128k-instruct,Phi-3,vision-128k-instruct,a40,m2,08:00:00,2,1,32064,65536,auto,singularity,default,false
45+
Phi-3-vision-128k-instruct,Phi-3,vision-128k-instruct,a40,m2,08:00:00,2,1,32064,65536,auto,singularity,default,false
46+
Llama3-OpenBioLLM-70B,Llama3-OpenBioLLM,70B,a40,m2,08:00:00,4,1,128256,8192,auto,singularity,default,false

vec_inf/multinode_vllm.slurm

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ if [ "$VENV_BASE" = "singularity" ]; then
9393
--pipeline-parallel-size ${PIPELINE_PARALLEL_SIZE} \
9494
--tensor-parallel-size ${TENSOR_PARALLEL_SIZE} \
9595
--dtype ${VLLM_DATA_TYPE} \
96-
--load-format safetensors \
9796
--trust-remote-code \
9897
--max-logprobs ${VLLM_MAX_LOGPROBS} \
9998
--max-model-len ${VLLM_MAX_MODEL_LEN}
@@ -107,7 +106,6 @@ else
107106
--pipeline-parallel-size ${PIPELINE_PARALLEL_SIZE} \
108107
--tensor-parallel-size ${TENSOR_PARALLEL_SIZE} \
109108
--dtype ${VLLM_DATA_TYPE} \
110-
--load-format safetensors \
111109
--trust-remote-code \
112110
--max-logprobs ${VLLM_MAX_LOGPROBS} \
113111
--max-model-len ${VLLM_MAX_MODEL_LEN}

0 commit comments

Comments
 (0)