update readme

ruili3 · Apr 5, 2024 · 733f185 · 733f185
1 parent daa8197
commit 733f185
Show file tree

Hide file tree

Showing 2 changed files with 114 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -8,9 +8,9 @@
 
 **CVPR 2024**
 
-<a href=""><img src='https://img.shields.io/badge/arXiv-KYN-red' alt='Paper PDF'></a>
+<a href="https://arxiv.org/abs/2404.03658"><img src='https://img.shields.io/badge/arXiv-KYN-red' alt='Paper PDF'></a>
 <a href='https://ruili3.github.io/kyn/'><img src='https://img.shields.io/badge/Project_Page-KYN-green' alt='Project Page'></a>
-<a href='https://huggingface.co/'><img src='https://img.shields.io/badge/Hugging_Face-KYN-yellow' alt='Hugging Face (TBA)'></a>
+<a href='https://huggingface.co/'><img src='https://img.shields.io/badge/Hugging_Face-KYN (coming soon)-yellow' alt='Hugging Face'></a>
 </div>
 
 This work presents _Know-Your-Neighbors_ (KYN), a single-view 3D reconstruction method that disambiguates occluded scene geometry by utilizing Vision-Language semantics and spatial reasoning.
@@ -24,7 +24,6 @@ This work presents _Know-Your-Neighbors_ (KYN), a single-view 3D reconstruction
 python -m venv kyn
 source kyn/bin/activate
 pip install -r requirements.txt
-
 ```
 
 ### 🚀 Quick Start
@@ -87,7 +86,11 @@ python scripts/gen_kitti360_voxel.py -cn gen_voxel
 ```
 
 ### 💻 Training
-TBA
+Download the [LSeg model](https://drive.google.com/file/d/1ayk6NXURI_vIPlym16f_RG3ffxBWHxvb/view?usp=sharing) and put it into `./checkpoints`. Then run:
+```bash
+torchrun --nproc_per_node=<num_of_gpus> train.py -cn train_kyn
+```
+where `<num_of_gpus>` denotes the number of available GPUs. Models will be saved in `./result` by defualt. 
 
 
 ### 📰 Citation
@@ -101,13 +104,13 @@ Please cite our paper if you use the code in this repository:
 }
 ``` 
 
-### 🌟 Star History
+<!-- ### 🌟 Star History
 <div style="text-align: center;">
 <a href="https://star-history.com/#ruili3/Know-Your-Neighbors&Date">
   <picture>
     <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=ruili3/Know-Your-Neighbors&type=Date&theme=dark" />
     <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=ruili3/Know-Your-Neighbors&type=Date" />
-    <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=ruili3/Know-Your-Neighbors&type=Date" width="400"/>
+    <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=ruili3/Know-Your-Neighbors&type=Date" width="600"/>
   </picture>
 </a>
-</div>
+</div> -->
diff --git a/configs/train_kyn.yaml b/configs/train_kyn.yaml
@@ -0,0 +1,104 @@
+defaults:
+    - default
+    - data: kitti_360
+    - _self_
+
+name: "kitti_360"
+model: "kyn"
+output_path: "./result"
+
+
+backend: 'nccl'
+num_epochs: 25
+batch_size: 8
+
+save_best:
+    metric: abs_rel
+    sign: -1
+    per_epoch_save: false
+
+data:
+    data_fc: 2
+    image_size: [192, 640]
+    color_aug: true
+    is_preprocessed: true
+    fisheye_rotation: [0, -15]
+    data_path: "./KITTI-360"
+    pose_path: "./KITTI-360/data_poses"
+
+model_conf:
+    arch: "KYN"
+    use_code: true
+    prediction_mode: default
+
+    code:
+        num_freqs: 6
+        freq_factor: 1.5
+        include_input: true
+
+    encoder:
+        type: "vl_encoder"
+        use_vision_model: true
+        use_vlseg_model: true
+        freeze_vision_model: false
+        freeze_vlseg_model: true
+        resnet_layers: 50
+        num_ch_dec: [32,32,64,128,256]
+        vision_d_out: 64
+        # language settings
+        ov_label_path: "datasets/ov_labels/outdoor_labels.txt"
+        lseg_model_path: "checkpoints/demo_e200.ckpt"
+        fuse_type: "concat_reg"
+        return_only_feat: false
+
+    mlp_coarse:
+        type : "vl_modulation_attention"
+        d_hidden : 128
+        attn_head: 4
+        attn_kv_ch: 32
+        lin_attn_type: "img_softmax_q"
+        use_q_residual: true
+        use_valid_pts_mask: true
+        n_blocks : 4
+        skip_layers: [2]
+    mlp_fine:
+        type : "empty"
+        n_blocks : 1
+        d_hidden : 128
+
+    z_near: 3
+    z_far: 80
+    inv_z: true
+
+    n_frames_encoder: 1
+    n_frames_render: 2
+    frame_sample_mode: kitti360-mono
+
+    sample_mode: patch
+    patch_size: 8
+    ray_batch_size: 4096
+
+    flip_augmentation: true
+
+    learn_empty: false
+    code_mode: z
+
+loss:
+    criterion: "l1+ssim"
+    invalid_policy: weight_guided
+    lambda_edge_aware_smoothness: 0.001
+
+scheduler:
+    type: step
+    step_size: 120000
+    gamma: 0.1
+
+renderer:
+    n_coarse : 64
+    n_fine : 0
+    n_fine_depth : 0
+    depth_std : 1.0
+    sched : []
+    white_bkgd : false
+    lindisp: true
+    hard_alpha_cap: true