-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit c86c2b7
Showing
65 changed files
with
7,728 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
FROM nvcr.io/nvidia/pytorch:19.05-py3 | ||
|
||
# basic python packages | ||
RUN pip install pytorch-pretrained-bert==0.6.2 \ | ||
tensorboardX==1.7 ipdb==0.12 lz4==2.1.9 lmdb==0.97 | ||
|
||
####### horovod for multi-GPU (distributed) training ####### | ||
|
||
# update OpenMPI to avoid horovod bug | ||
RUN rm -r /usr/local/mpi &&\ | ||
wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.4.tar.gz &&\ | ||
gunzip -c openmpi-3.1.4.tar.gz | tar xf - &&\ | ||
cd openmpi-3.1.4 &&\ | ||
./configure --prefix=/usr/local/mpi --enable-orterun-prefix-by-default \ | ||
--with-verbs --disable-getpwuid &&\ | ||
make -j$(nproc) all && make install &&\ | ||
ldconfig &&\ | ||
cd - && rm -r openmpi-3.1.4 && rm openmpi-3.1.4.tar.gz | ||
|
||
ENV OPENMPI_VERSION=3.1.4 | ||
|
||
# horovod | ||
RUN HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_NCCL_LINK=SHARED HOROVOD_WITH_PYTORCH=1 \ | ||
pip install --no-cache-dir horovod==0.16.4 &&\ | ||
ldconfig | ||
|
||
# ssh | ||
RUN apt-get update &&\ | ||
apt-get install -y --no-install-recommends openssh-client openssh-server &&\ | ||
mkdir -p /var/run/sshd | ||
|
||
# Allow OpenSSH to talk to containers without asking for confirmation | ||
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \ | ||
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \ | ||
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config | ||
|
||
|
||
WORKDIR /src |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2019 Microsoft Corporation | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
# NSGDC | ||
|
||
Some codes in this repo are copied/modified from opensource implementations made available by | ||
[UNITER](https://github.com/ChenRocks/UNITER), | ||
[PyTorch](https://github.com/pytorch/pytorch), | ||
[HuggingFace](https://github.com/huggingface/transformers), | ||
[OpenNMT](https://github.com/OpenNMT/OpenNMT-py), | ||
and [Nvidia](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch). | ||
The image features are extracted using [BUTD](https://github.com/peteanderson80/bottom-up-attention). | ||
|
||
|
||
## Requirements | ||
This is following UNITER. We provide Docker image for easier reproduction. Please install the following: | ||
- [nvidia driver](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#package-manager-installation) (418+), | ||
- [Docker](https://docs.docker.com/install/linux/docker-ce/ubuntu/) (19.03+), | ||
- [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-docker#quickstart). | ||
|
||
Our scripts require the user to have the [docker group membership](https://docs.docker.com/install/linux/linux-postinstall/) | ||
so that docker commands can be run without sudo. | ||
We only support Linux with NVIDIA GPUs. We test on Ubuntu 18.04 and V100 cards. | ||
We use mixed-precision training hence GPUs with Tensor Cores are recommended. | ||
|
||
## Image-Text Retrieval | ||
### Download Data | ||
``` | ||
bash scripts/download_itm.sh $PATH_TO_STORAGE | ||
``` | ||
|
||
### Launch the Docker Container | ||
```bash | ||
# docker image should be automatically pulled | ||
source launch_container.sh $PATH_TO_STORAGE/txt_db $PATH_TO_STORAGE/img_db \ | ||
$PATH_TO_STORAGE/finetune $PATH_TO_STORAGE/pretrained | ||
``` | ||
|
||
In case you would like to reproduce the whole preprocessing pipeline. | ||
|
||
The launch script respects $CUDA_VISIBLE_DEVICES environment variable. | ||
Note that the source code is mounted into the container under `/src` instead | ||
of built into the image so that user modification will be reflected without | ||
re-building the image. (Data folders are mounted into the container separately | ||
for flexibility on folder structures.) | ||
|
||
|
||
### Image-Text Retrieval (Flickr30k) | ||
``` | ||
# Train wit the base setting | ||
bash run_cmds/tran_pnsgd_base_flickr.sh | ||
bash run_cmds/tran_pnsgd2_base_flickr.sh | ||
# Train wit the large setting | ||
bash run_cmds/tran_pnsgd_large_flickr.sh | ||
bash run_cmds/tran_pnsgd2_large_flickr.sh | ||
``` | ||
|
||
### Image-Text Retrieval (COCO) | ||
``` | ||
# Train wit the base setting | ||
bash run_cmds/tran_pnsgd_base_coco.sh | ||
bash run_cmds/tran_pnsgd2_base_coco.sh | ||
# Train wit the large setting | ||
bash run_cmds/tran_pnsgd_large_coco.sh | ||
bash run_cmds/tran_pnsgd2_large_coco.sh | ||
``` | ||
|
||
### Run Inference | ||
``` | ||
bash run_cmds/inf_nsgd.sh | ||
``` | ||
|
||
## Results | ||
|
||
Our models achieve the following performance. | ||
|
||
### MS-COCO | ||
<table> | ||
<tr> | ||
<th rowspan="2">Model</th> | ||
<th colspan="3">Image-to-Text</th> | ||
<th colspan="3">Text-to-Image</th> | ||
</tr > | ||
<tr> | ||
<td>R@1</td> | ||
<td>R@5</td> | ||
<td>R@110</td> | ||
<td>R@1</td> | ||
<td>R@5</td> | ||
<td>R@10</td> | ||
</tr> | ||
<tr> | ||
<td>NSGDC-Base</td> | ||
<td>66.6</td> | ||
<td>88.6</td> | ||
<td>94.0</td> | ||
<td>51.6</td> | ||
<td>79.1</td> | ||
<td>87.5</td> | ||
</tr> | ||
<tr> | ||
<td>NSGDC-Large</td> | ||
<td>67.8</td> | ||
<td>89.6</td> | ||
<td>94.2</td> | ||
<td>53.3</td> | ||
<td>80.0</td> | ||
<td>88.0</td> | ||
</tr> | ||
</table> | ||
|
||
### Flickr30K | ||
|
||
|
||
<table> | ||
<tr> | ||
<th rowspan="2">Model</th> | ||
<th colspan="3">Image-to-Text</th> | ||
<th colspan="3">Text-to-Image</th> | ||
</tr > | ||
<tr> | ||
<td>R@1</td> | ||
<td>R@5</td> | ||
<td>R@110</td> | ||
<td>R@1</td> | ||
<td>R@5</td> | ||
<td>R@10</td> | ||
</tr> | ||
<tr> | ||
<td>NSGDC-Base</td> | ||
<td>87.9</td> | ||
<td>98.1</td> | ||
<td>99.3</td> | ||
<td>74.5</td> | ||
<td>93.3</td> | ||
<td>96.3</td> | ||
</tr> | ||
<tr> | ||
<td>NSGDC-Large</td> | ||
<td>90.6</td> | ||
<td>98.8</td> | ||
<td>99.1</td> | ||
<td>77.3</td> | ||
<td>94.3</td> | ||
<td>97.3</td> | ||
</tr> | ||
</table> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
{ | ||
"compressed_db": false, | ||
"checkpoint": "log/pretrained/uniter-base.pt", | ||
"max_txt_len": 60, | ||
"conf_th": 0.2, | ||
"max_bb": 100, | ||
"min_bb": 10, | ||
"num_bb": 36, | ||
"train_batch_size": 32, | ||
"negative_size": 399, | ||
"hard_neg_size": 31, | ||
"inf_minibatch_size": 400, | ||
"margin": 0.2, | ||
"valid_steps": 500, | ||
"num_train_steps": 5000, | ||
"optim": "adamw", | ||
"betas": [ | ||
0.9, | ||
0.98 | ||
], | ||
"dropout": 0.1, | ||
"weight_decay": 0.01, | ||
"grad_norm": 2.0, | ||
"warmup_steps": 500, | ||
"seed": 42, | ||
"full_val": true, | ||
"fp16": true, | ||
"n_workers": 4, | ||
"pin_mem": true, | ||
"train_txt_dbs": [ | ||
"itm-data/txt_db3/itm_coco_train.db", | ||
"itm-data/txt_db3/itm_coco_restval.db" | ||
], | ||
"train_img_dbs": [ | ||
"itm-data/img_db/coco_train2014/", | ||
"itm-data/img_db/coco_val2014/" | ||
], | ||
"val_txt_db": "itm-data/txt_db3/itm_coco_val.db", | ||
"val_img_db": "itm-data/img_db/coco_val2014", | ||
"test_txt_db": "itm-data/txt_db3/itm_coco_test.db", | ||
"test_img_db": "itm-data/img_db/coco_val2014", | ||
"model_config": "config/uniter-base.json" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
{ | ||
"compressed_db": false, | ||
"checkpoint": "log/pretrained/uniter-base.pt", | ||
"max_txt_len": 60, | ||
"conf_th": 0.2, | ||
"max_bb": 100, | ||
"min_bb": 10, | ||
"num_bb": 36, | ||
"train_batch_size": 32, | ||
"negative_size": 399, | ||
"hard_neg_size": 31, | ||
"inf_minibatch_size": 400, | ||
"margin": 0.2, | ||
"valid_steps": 500, | ||
"num_train_steps": 5000, | ||
"optim": "adamw", | ||
"betas": [ | ||
0.9, | ||
0.98 | ||
], | ||
"dropout": 0.1, | ||
"weight_decay": 0.01, | ||
"grad_norm": 2.0, | ||
"warmup_steps": 500, | ||
"seed": 42, | ||
"full_val": true, | ||
"fp16": true, | ||
"n_workers": 4, | ||
"pin_mem": true, | ||
"train_txt_dbs": [ | ||
"itm-data/txt_db3/itm_flickr30k_train.db" | ||
], | ||
"train_img_dbs": [ | ||
"itm-data/img_db/flickr30k/" | ||
], | ||
"val_txt_db": "itm-data/txt_db3/itm_flickr30k_val.db", | ||
"val_img_db": "itm-data/img_db/flickr30k/", | ||
"test_txt_db": "itm-data/txt_db3/itm_flickr30k_test.db", | ||
"test_img_db": "itm-data/img_db/flickr30k/", | ||
"model_config": "config/uniter-base.json" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
{ | ||
"compressed_db": false, | ||
"checkpoint": "log/pretrained/uniter-large.pt", | ||
"max_txt_len": 60, | ||
"conf_th": 0.2, | ||
"max_bb": 100, | ||
"min_bb": 10, | ||
"num_bb": 36, | ||
"train_batch_size": 32, | ||
"negative_size": 399, | ||
"hard_neg_size": 31, | ||
"inf_minibatch_size": 400, | ||
"margin": 0.2, | ||
"valid_steps": 500, | ||
"num_train_steps": 5000, | ||
"optim": "adamw", | ||
"betas": [ | ||
0.9, | ||
0.98 | ||
], | ||
"dropout": 0.1, | ||
"weight_decay": 0.01, | ||
"grad_norm": 2.0, | ||
"warmup_steps": 500, | ||
"seed": 42, | ||
"full_val": true, | ||
"fp16": true, | ||
"n_workers": 4, | ||
"pin_mem": true, | ||
"train_txt_dbs": [ | ||
"itm-data/txt_db3/itm_coco_train.db", | ||
"itm-data/txt_db3/itm_coco_restval.db" | ||
], | ||
"train_img_dbs": [ | ||
"itm-data/img_db/coco_train2014/", | ||
"itm-data/img_db/coco_val2014/" | ||
], | ||
"val_txt_db": "itm-data/txt_db3/itm_coco_val.db", | ||
"val_img_db": "itm-data/img_db/coco_val2014", | ||
"test_txt_db": "itm-data/txt_db3/itm_coco_test.db", | ||
"test_img_db": "itm-data/img_db/coco_val2014", | ||
"model_config": "config/uniter-large.json" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
{ | ||
"compressed_db": false, | ||
"checkpoint": "log/pretrained/uniter-large.pt", | ||
"max_txt_len": 60, | ||
"conf_th": 0.2, | ||
"max_bb": 100, | ||
"min_bb": 10, | ||
"num_bb": 36, | ||
"train_batch_size": 16, | ||
"negative_size": 399, | ||
"hard_neg_size": 31, | ||
"inf_minibatch_size": 400, | ||
"margin": 0.2, | ||
"valid_steps": 500, | ||
"num_train_steps": 5000, | ||
"optim": "adamw", | ||
"betas": [ | ||
0.9, | ||
0.98 | ||
], | ||
"dropout": 0.1, | ||
"weight_decay": 0.01, | ||
"grad_norm": 2.0, | ||
"warmup_steps": 500, | ||
"seed": 42, | ||
"full_val": true, | ||
"fp16": true, | ||
"n_workers": 4, | ||
"pin_mem": true, | ||
"train_txt_dbs": [ | ||
"itm-data/txt_db3/itm_flickr30k_train.db" | ||
], | ||
"train_img_dbs": [ | ||
"itm-data/img_db/flickr30k/" | ||
], | ||
"val_txt_db": "itm-data/txt_db3/itm_flickr30k_val.db", | ||
"val_img_db": "itm-data/img_db/flickr30k/", | ||
"test_txt_db": "itm-data/txt_db3/itm_flickr30k_test.db", | ||
"test_img_db": "itm-data/img_db/flickr30k/", | ||
"model_config": "config/uniter-large.json" | ||
} |
Oops, something went wrong.