-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Feature] Support PGD and multi-view FCOS3D++ on Waymo (#2835)
Co-authored-by: JingweiZhang12 <[email protected]> Co-authored-by: sjh <sunjiahao1999>
- Loading branch information
1 parent
2dad86c
commit 88b8694
Showing
16 changed files
with
909 additions
and
127 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
# dataset settings | ||
# D3 in the config name means the whole dataset is divided into 3 folds | ||
# We only use one fold for efficient experiments | ||
dataset_type = 'WaymoDataset' | ||
data_root = 'data/waymo/kitti_format/' | ||
class_names = ['Pedestrian', 'Cyclist', 'Car'] | ||
metainfo = dict(classes=class_names) | ||
input_modality = dict(use_lidar=False, use_camera=True) | ||
|
||
# Example to use different file client | ||
# Method 1: simply set the data root and let the file I/O module | ||
# automatically infer from prefix (not support LMDB and Memcache yet) | ||
|
||
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/' | ||
|
||
# Method 2: Use backend_args, file_client_args in versions before 1.1.0 | ||
# backend_args = dict( | ||
# backend='petrel', | ||
# path_mapping=dict({ | ||
# './data/': 's3://openmmlab/datasets/detection3d/', | ||
# 'data/': 's3://openmmlab/datasets/detection3d/' | ||
# })) | ||
backend_args = None | ||
|
||
train_pipeline = [ | ||
dict(type='LoadImageFromFileMono3D', backend_args=backend_args), | ||
dict( | ||
type='LoadAnnotations3D', | ||
with_bbox=True, | ||
with_label=True, | ||
with_attr_label=False, | ||
with_bbox_3d=True, | ||
with_label_3d=True, | ||
with_bbox_depth=True), | ||
# base shape (1248, 832), scale (0.95, 1.05) | ||
dict( | ||
type='RandomResize3D', | ||
scale=(1248, 832), | ||
ratio_range=(0.95, 1.05), | ||
# ratio_range=(1., 1.), | ||
interpolation='nearest', | ||
keep_ratio=True, | ||
), | ||
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), | ||
dict( | ||
type='Pack3DDetInputs', | ||
keys=[ | ||
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d', | ||
'gt_labels_3d', 'centers_2d', 'depths' | ||
]), | ||
] | ||
|
||
test_pipeline = [ | ||
dict(type='LoadImageFromFileMono3D', backend_args=backend_args), | ||
dict( | ||
type='RandomResize3D', | ||
scale=(1248, 832), | ||
ratio_range=(1., 1.), | ||
interpolation='nearest', | ||
keep_ratio=True), | ||
dict( | ||
type='Pack3DDetInputs', | ||
keys=['img'], | ||
meta_keys=[ | ||
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor', | ||
'sample_idx', 'context_name', 'timestamp', 'lidar2cam' | ||
]), | ||
] | ||
# construct a pipeline for data and gt loading in show function | ||
# please keep its loading function consistent with test_pipeline (e.g. client) | ||
eval_pipeline = [ | ||
dict(type='LoadImageFromFileMono3D', backend_args=backend_args), | ||
dict( | ||
type='RandomResize3D', | ||
scale=(1248, 832), | ||
ratio_range=(1., 1.), | ||
interpolation='nearest', | ||
keep_ratio=True), | ||
dict( | ||
type='Pack3DDetInputs', | ||
keys=['img'], | ||
meta_keys=[ | ||
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor', | ||
'sample_idx', 'context_name', 'timestamp', 'lidar2cam' | ||
]), | ||
] | ||
|
||
train_dataloader = dict( | ||
batch_size=3, | ||
num_workers=3, | ||
persistent_workers=True, | ||
sampler=dict(type='DefaultSampler', shuffle=True), | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root=data_root, | ||
ann_file='waymo_infos_train.pkl', | ||
data_prefix=dict( | ||
pts='training/velodyne', | ||
CAM_FRONT='training/image_0', | ||
CAM_FRONT_LEFT='training/image_1', | ||
CAM_FRONT_RIGHT='training/image_2', | ||
CAM_SIDE_LEFT='training/image_3', | ||
CAM_SIDE_RIGHT='training/image_4'), | ||
pipeline=train_pipeline, | ||
modality=input_modality, | ||
test_mode=False, | ||
metainfo=metainfo, | ||
cam_sync_instances=True, | ||
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset | ||
# and box_type_3d='Depth' in sunrgbd and scannet dataset. | ||
box_type_3d='Camera', | ||
load_type='fov_image_based', | ||
# load one frame every three frames | ||
load_interval=3, | ||
backend_args=backend_args)) | ||
|
||
val_dataloader = dict( | ||
batch_size=1, | ||
num_workers=1, | ||
persistent_workers=True, | ||
drop_last=False, | ||
sampler=dict(type='DefaultSampler', shuffle=False), | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root=data_root, | ||
data_prefix=dict( | ||
pts='training/velodyne', | ||
CAM_FRONT='training/image_0', | ||
CAM_FRONT_LEFT='training/image_1', | ||
CAM_FRONT_RIGHT='training/image_2', | ||
CAM_SIDE_LEFT='training/image_3', | ||
CAM_SIDE_RIGHT='training/image_4'), | ||
ann_file='waymo_infos_val.pkl', | ||
pipeline=eval_pipeline, | ||
modality=input_modality, | ||
test_mode=True, | ||
metainfo=metainfo, | ||
cam_sync_instances=True, | ||
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset | ||
# and box_type_3d='Depth' in sunrgbd and scannet dataset. | ||
box_type_3d='Camera', | ||
load_type='fov_image_based', | ||
load_eval_anns=False, | ||
backend_args=backend_args)) | ||
|
||
test_dataloader = dict( | ||
batch_size=1, | ||
num_workers=1, | ||
persistent_workers=True, | ||
drop_last=False, | ||
sampler=dict(type='DefaultSampler', shuffle=False), | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root=data_root, | ||
data_prefix=dict( | ||
pts='training/velodyne', | ||
CAM_FRONT='training/image_0', | ||
CAM_FRONT_LEFT='training/image_1', | ||
CAM_FRONT_RIGHT='training/image_2', | ||
CAM_SIDE_LEFT='training/image_3', | ||
CAM_SIDE_RIGHT='training/image_4'), | ||
ann_file='waymo_infos_val.pkl', | ||
pipeline=eval_pipeline, | ||
modality=input_modality, | ||
test_mode=True, | ||
metainfo=metainfo, | ||
cam_sync_instances=True, | ||
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset | ||
# and box_type_3d='Depth' in sunrgbd and scannet dataset. | ||
box_type_3d='Camera', | ||
load_type='fov_image_based', | ||
backend_args=backend_args)) | ||
|
||
val_evaluator = dict( | ||
type='WaymoMetric', | ||
waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin', | ||
metric='LET_mAP', | ||
load_type='fov_image_based', | ||
result_prefix='./pgd_fov_pred') | ||
test_evaluator = val_evaluator | ||
|
||
vis_backends = [dict(type='LocalVisBackend')] | ||
visualizer = dict( | ||
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
# dataset settings | ||
# D3 in the config name means the whole dataset is divided into 3 folds | ||
# We only use one fold for efficient experiments | ||
dataset_type = 'WaymoDataset' | ||
data_root = 'data/waymo/kitti_format/' | ||
class_names = ['Pedestrian', 'Cyclist', 'Car'] | ||
metainfo = dict(classes=class_names) | ||
input_modality = dict(use_lidar=False, use_camera=True) | ||
|
||
# Example to use different file client | ||
# Method 1: simply set the data root and let the file I/O module | ||
# automatically infer from prefix (not support LMDB and Memcache yet) | ||
|
||
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/' | ||
|
||
# Method 2: Use backend_args, file_client_args in versions before 1.1.0 | ||
# backend_args = dict( | ||
# backend='petrel', | ||
# path_mapping=dict({ | ||
# './data/': 's3://openmmlab/datasets/detection3d/', | ||
# 'data/': 's3://openmmlab/datasets/detection3d/' | ||
# })) | ||
backend_args = None | ||
|
||
train_pipeline = [ | ||
dict(type='LoadImageFromFileMono3D', backend_args=backend_args), | ||
dict( | ||
type='LoadAnnotations3D', | ||
with_bbox=True, | ||
with_label=True, | ||
with_attr_label=False, | ||
with_bbox_3d=True, | ||
with_label_3d=True, | ||
with_bbox_depth=True), | ||
# base shape (1248, 832), scale (0.95, 1.05) | ||
dict( | ||
type='RandomResize3D', | ||
scale=(1248, 832), | ||
# ratio_range=(1., 1.), | ||
ratio_range=(0.95, 1.05), | ||
interpolation='nearest', | ||
keep_ratio=True, | ||
), | ||
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), | ||
dict( | ||
type='Pack3DDetInputs', | ||
keys=[ | ||
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d', | ||
'gt_labels_3d', 'centers_2d', 'depths' | ||
]), | ||
] | ||
|
||
test_pipeline = [ | ||
dict(type='LoadImageFromFileMono3D', backend_args=backend_args), | ||
dict( | ||
type='Resize3D', | ||
scale_factor=0.65, | ||
interpolation='nearest', | ||
keep_ratio=True), | ||
dict( | ||
type='Pack3DDetInputs', | ||
keys=['img'], | ||
meta_keys=[ | ||
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor', | ||
'sample_idx', 'context_name', 'timestamp', 'lidar2cam' | ||
]), | ||
] | ||
# construct a pipeline for data and gt loading in show function | ||
# please keep its loading function consistent with test_pipeline (e.g. client) | ||
eval_pipeline = [ | ||
dict(type='LoadImageFromFileMono3D', backend_args=backend_args), | ||
dict( | ||
type='Resize3D', | ||
scale_factor=0.65, | ||
interpolation='nearest', | ||
keep_ratio=True), | ||
dict( | ||
type='Pack3DDetInputs', | ||
keys=['img'], | ||
meta_keys=[ | ||
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor', | ||
'sample_idx', 'context_name', 'timestamp', 'lidar2cam' | ||
]), | ||
] | ||
|
||
train_dataloader = dict( | ||
batch_size=3, | ||
num_workers=3, | ||
persistent_workers=True, | ||
sampler=dict(type='DefaultSampler', shuffle=True), | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root=data_root, | ||
ann_file='waymo_infos_train.pkl', | ||
data_prefix=dict( | ||
pts='training/velodyne', | ||
CAM_FRONT='training/image_0', | ||
CAM_FRONT_LEFT='training/image_1', | ||
CAM_FRONT_RIGHT='training/image_2', | ||
CAM_SIDE_LEFT='training/image_3', | ||
CAM_SIDE_RIGHT='training/image_4'), | ||
pipeline=train_pipeline, | ||
modality=input_modality, | ||
test_mode=False, | ||
metainfo=metainfo, | ||
cam_sync_instances=True, | ||
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset | ||
# and box_type_3d='Depth' in sunrgbd and scannet dataset. | ||
box_type_3d='Camera', | ||
load_type='mv_image_based', | ||
# load one frame every three frames | ||
load_interval=3, | ||
backend_args=backend_args)) | ||
|
||
val_dataloader = dict( | ||
batch_size=1, | ||
num_workers=0, | ||
persistent_workers=False, | ||
drop_last=False, | ||
sampler=dict(type='DefaultSampler', shuffle=False), | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root=data_root, | ||
data_prefix=dict( | ||
pts='training/velodyne', | ||
CAM_FRONT='training/image_0', | ||
CAM_FRONT_LEFT='training/image_1', | ||
CAM_FRONT_RIGHT='training/image_2', | ||
CAM_SIDE_LEFT='training/image_3', | ||
CAM_SIDE_RIGHT='training/image_4'), | ||
ann_file='waymo_infos_val.pkl', | ||
pipeline=eval_pipeline, | ||
modality=input_modality, | ||
test_mode=True, | ||
metainfo=metainfo, | ||
cam_sync_instances=True, | ||
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset | ||
# and box_type_3d='Depth' in sunrgbd and scannet dataset. | ||
box_type_3d='Camera', | ||
load_type='mv_image_based', | ||
# load_eval_anns=False, | ||
backend_args=backend_args)) | ||
|
||
test_dataloader = dict( | ||
batch_size=1, | ||
num_workers=0, | ||
persistent_workers=False, | ||
drop_last=False, | ||
sampler=dict(type='DefaultSampler', shuffle=False), | ||
dataset=dict( | ||
type=dataset_type, | ||
data_root=data_root, | ||
data_prefix=dict( | ||
pts='training/velodyne', | ||
CAM_FRONT='training/image_0', | ||
CAM_FRONT_LEFT='training/image_1', | ||
CAM_FRONT_RIGHT='training/image_2', | ||
CAM_SIDE_LEFT='training/image_3', | ||
CAM_SIDE_RIGHT='training/image_4'), | ||
ann_file='waymo_infos_val.pkl', | ||
pipeline=eval_pipeline, | ||
modality=input_modality, | ||
test_mode=True, | ||
metainfo=metainfo, | ||
cam_sync_instances=True, | ||
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset | ||
# and box_type_3d='Depth' in sunrgbd and scannet dataset. | ||
box_type_3d='Camera', | ||
load_type='mv_image_based', | ||
load_eval_anns=False, | ||
backend_args=backend_args)) | ||
|
||
val_evaluator = dict( | ||
type='WaymoMetric', | ||
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin', | ||
metric='LET_mAP', | ||
load_type='mv_image_based', | ||
result_prefix='./pgd_mv_pred', | ||
nms_cfg=dict( | ||
use_rotate_nms=True, | ||
nms_across_levels=False, | ||
nms_pre=500, | ||
nms_thr=0.05, | ||
score_thr=0.001, | ||
min_bbox_size=0, | ||
max_per_frame=100)) | ||
test_evaluator = val_evaluator | ||
|
||
vis_backends = [dict(type='LocalVisBackend')] | ||
visualizer = dict( | ||
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') |
Oops, something went wrong.