diff --git a/megatron/training/initialize.py b/megatron/training/initialize.py index a5c5fdb04c..b2ef8a8f45 100644 --- a/megatron/training/initialize.py +++ b/megatron/training/initialize.py @@ -254,8 +254,6 @@ def _initialize_distributed(get_embedding_ranks, get_position_embedding_ranks): 'rank': args.rank, 'timeout': timedelta(minutes=args.distributed_timeout_minutes), } - if packaging.version.Version(torch.__version__) >= packaging.version.Version("2.3.0"): - init_process_group_kwargs['device_id'] = device_id torch.distributed.init_process_group(**init_process_group_kwargs)