diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py index 07a94626409..1d07d945a42 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py @@ -1032,7 +1032,7 @@ def load_low_bit(cls, pretrained_model_name_or_path, *model_args, **kwargs): variant = kwargs.pop("variant", None) offload_folder = kwargs.pop("offload_folder", None) offload_state_dict = kwargs.pop("offload_state_dict", False) - torch_dtype = kwargs.get("torch_dtype", "auto") + torch_dtype = kwargs.pop("torch_dtype", "auto") cache_dir = kwargs.pop("cache_dir", None) force_download = kwargs.pop("force_download", False) proxies = kwargs.pop("proxies", None)