Skip to content

Commit

Permalink
update readme
Browse files Browse the repository at this point in the history
  • Loading branch information
ruili3 committed Apr 5, 2024
1 parent daa8197 commit 733f185
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 7 deletions.
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

**CVPR 2024**

<a href=""><img src='https://img.shields.io/badge/arXiv-KYN-red' alt='Paper PDF'></a>
<a href="https://arxiv.org/abs/2404.03658"><img src='https://img.shields.io/badge/arXiv-KYN-red' alt='Paper PDF'></a>
<a href='https://ruili3.github.io/kyn/'><img src='https://img.shields.io/badge/Project_Page-KYN-green' alt='Project Page'></a>
<a href='https://huggingface.co/'><img src='https://img.shields.io/badge/Hugging_Face-KYN-yellow' alt='Hugging Face (TBA)'></a>
<a href='https://huggingface.co/'><img src='https://img.shields.io/badge/Hugging_Face-KYN (coming soon)-yellow' alt='Hugging Face'></a>
</div>

This work presents _Know-Your-Neighbors_ (KYN), a single-view 3D reconstruction method that disambiguates occluded scene geometry by utilizing Vision-Language semantics and spatial reasoning.
Expand All @@ -24,7 +24,6 @@ This work presents _Know-Your-Neighbors_ (KYN), a single-view 3D reconstruction
python -m venv kyn
source kyn/bin/activate
pip install -r requirements.txt

```

### 🚀 Quick Start
Expand Down Expand Up @@ -87,7 +86,11 @@ python scripts/gen_kitti360_voxel.py -cn gen_voxel
```

### 💻 Training
TBA
Download the [LSeg model](https://drive.google.com/file/d/1ayk6NXURI_vIPlym16f_RG3ffxBWHxvb/view?usp=sharing) and put it into `./checkpoints`. Then run:
```bash
torchrun --nproc_per_node=<num_of_gpus> train.py -cn train_kyn
```
where `<num_of_gpus>` denotes the number of available GPUs. Models will be saved in `./result` by defualt.


### 📰 Citation
Expand All @@ -101,13 +104,13 @@ Please cite our paper if you use the code in this repository:
}
```

### 🌟 Star History
<!-- ### 🌟 Star History
<div style="text-align: center;">
<a href="https://star-history.com/#ruili3/Know-Your-Neighbors&Date">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=ruili3/Know-Your-Neighbors&type=Date&theme=dark" />
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=ruili3/Know-Your-Neighbors&type=Date" />
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=ruili3/Know-Your-Neighbors&type=Date" width="400"/>
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=ruili3/Know-Your-Neighbors&type=Date" width="600"/>
</picture>
</a>
</div>
</div> -->
104 changes: 104 additions & 0 deletions configs/train_kyn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
defaults:
- default
- data: kitti_360
- _self_

name: "kitti_360"
model: "kyn"
output_path: "./result"


backend: 'nccl'
num_epochs: 25
batch_size: 8

save_best:
metric: abs_rel
sign: -1
per_epoch_save: false

data:
data_fc: 2
image_size: [192, 640]
color_aug: true
is_preprocessed: true
fisheye_rotation: [0, -15]
data_path: "./KITTI-360"
pose_path: "./KITTI-360/data_poses"

model_conf:
arch: "KYN"
use_code: true
prediction_mode: default

code:
num_freqs: 6
freq_factor: 1.5
include_input: true

encoder:
type: "vl_encoder"
use_vision_model: true
use_vlseg_model: true
freeze_vision_model: false
freeze_vlseg_model: true
resnet_layers: 50
num_ch_dec: [32,32,64,128,256]
vision_d_out: 64
# language settings
ov_label_path: "datasets/ov_labels/outdoor_labels.txt"
lseg_model_path: "checkpoints/demo_e200.ckpt"
fuse_type: "concat_reg"
return_only_feat: false

mlp_coarse:
type : "vl_modulation_attention"
d_hidden : 128
attn_head: 4
attn_kv_ch: 32
lin_attn_type: "img_softmax_q"
use_q_residual: true
use_valid_pts_mask: true
n_blocks : 4
skip_layers: [2]
mlp_fine:
type : "empty"
n_blocks : 1
d_hidden : 128

z_near: 3
z_far: 80
inv_z: true

n_frames_encoder: 1
n_frames_render: 2
frame_sample_mode: kitti360-mono

sample_mode: patch
patch_size: 8
ray_batch_size: 4096

flip_augmentation: true

learn_empty: false
code_mode: z

loss:
criterion: "l1+ssim"
invalid_policy: weight_guided
lambda_edge_aware_smoothness: 0.001

scheduler:
type: step
step_size: 120000
gamma: 0.1

renderer:
n_coarse : 64
n_fine : 0
n_fine_depth : 0
depth_std : 1.0
sched : []
white_bkgd : false
lindisp: true
hard_alpha_cap: true

0 comments on commit 733f185

Please sign in to comment.