Skip to content

Commit

Permalink
2024-12-10 nightly release (fad795e)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed Dec 10, 2024
1 parent f87ab38 commit 876c8fc
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pyre.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
uses: actions/checkout@v2
- name: Install dependencies
run: >
conda install --yes pytorch cpuonly -c pytorch-nightly &&
pip install torch --index-url https://download.pytorch.org/whl/nightly/cpu &&
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu &&
pip install -r requirements.txt &&
pip install pyre-check-nightly==$(cat .pyre_configuration | grep version | awk '{print $2}' | sed 's/\"//g')
Expand Down
10 changes: 6 additions & 4 deletions .github/workflows/unittest_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,15 @@ jobs:
conda info
python --version
conda run -n build_binary python --version
conda install -n build_binary \
--yes \
pytorch pytorch-cuda=11.8 -c pytorch-nightly -c nvidia
conda run -n build_binary \
pip install torch --index-url https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }}
conda run -n build_binary \
python -c "import torch"
echo "torch succeeded"
conda run -n build_binary \
python -c "import torch.distributed"
conda run -n build_binary \
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu118
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }}
conda run -n build_binary \
python -c "import fbgemm_gpu"
echo "fbgemm_gpu succeeded"
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/unittest_ci_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@ jobs:
conda info
python --version
conda run -n build_binary python --version
conda install -n build_binary \
--yes \
pytorch cpuonly -c pytorch-nightly
conda run -n build_binary \
pip install torch --index-url https://download.pytorch.org/whl/nightly/cpu
conda run -n build_binary \
python -c "import torch"
echo "torch succeeded"
conda run -n build_binary \
python -c "import torch.distributed"
conda run -n build_binary \
Expand Down
4 changes: 4 additions & 0 deletions torchrec/distributed/model_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,7 @@ def sync(self, include_optimizer_state: bool = True) -> None:
all_weights = [
w
for emb_kernel in self._modules_to_sync
# pyre-fixme[29]: `Union[Module, Tensor]` is not a function.
for w in emb_kernel.split_embedding_weights()
]
handle = self._replica_pg.allreduce_coalesced(all_weights, opts=opts)
Expand All @@ -755,6 +756,7 @@ def sync(self, include_optimizer_state: bool = True) -> None:
# Sync accumulated square of grad of local optimizer shards
optim_list = []
for emb_kernel in self._modules_to_sync:
# pyre-fixme[29]: `Union[Module, Tensor]` is not a function.
all_optimizer_states = emb_kernel.get_optimizer_state()
momentum1 = [optim["sum"] for optim in all_optimizer_states]
optim_list.extend(momentum1)
Expand Down Expand Up @@ -864,6 +866,8 @@ def _find_sharded_modules(
if isinstance(module, SplitTableBatchedEmbeddingBagsCodegen):
sharded_modules.append(module)
if hasattr(module, "_lookups"):
# pyre-fixme[29]: `Union[(self: Tensor) -> Any, Module, Tensor]` is
# not a function.
for lookup in module._lookups:
_find_sharded_modules(lookup)
return
Expand Down

0 comments on commit 876c8fc

Please sign in to comment.