diff --git a/hrdae/conf b/hrdae/conf index 0e066ce..3a69a73 160000 --- a/hrdae/conf +++ b/hrdae/conf @@ -1 +1 @@ -Subproject commit 0e066ce5b7bb03b9fd351c55bad58a5249d8a7e9 +Subproject commit 3a69a73ebed2e5eda896362b9abf71c954c6c75f diff --git a/tuning/ct.py b/tuning/ct.py index 6175ce1..d75fa42 100644 --- a/tuning/ct.py +++ b/tuning/ct.py @@ -20,7 +20,6 @@ from hrdae.models.networks.motion_encoder import ( MotionRNNEncoder2dOption, MotionConv3dEncoder2dOption, - MotionGuidedEncoder2dOption, MotionNormalEncoder2dOption, ) @@ -170,29 +169,33 @@ def objective(trial): raise RuntimeError("unreachable") motion_encoder_option = MotionRNNEncoder2dOption( in_channels=2, + hidden_channels=64, conv_params=interleave_arrays( [{"kernel_size": [3], "stride": [2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, ), + deconv_params=interleave_arrays( + [{"kernel_size": [3], "stride": [1, 1, 2], "padding": [1]}] + * num_reducible_layers, + [{"kernel_size": [3], "stride": [1], "padding": [1]}] + * motion_encoder_num_layers, + ), rnn=rnn_option, ) elif motion_encoder_name == "conv3d": motion_encoder_option = MotionConv3dEncoder2dOption( in_channels=2, + hidden_channels=64, conv_params=interleave_arrays( [{"kernel_size": [3], "stride": [1, 2, 2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, ), - ) - elif motion_encoder_name == "guided2d": - motion_encoder_option = MotionGuidedEncoder2dOption( - in_channels=2, - conv_params=interleave_arrays( - [{"kernel_size": [3], "stride": [2], "padding": [1]}] + deconv_params=interleave_arrays( + [{"kernel_size": [3], "stride": [1, 1, 2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, @@ -201,12 +204,19 @@ def objective(trial): elif motion_encoder_name == "normal2d": motion_encoder_option = MotionNormalEncoder2dOption( in_channels=2, + hidden_channels=64, conv_params=interleave_arrays( [{"kernel_size": [3], "stride": [2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, ), + deconv_params=interleave_arrays( + [{"kernel_size": [3], "stride": [1, 1, 2], "padding": [1]}] + * num_reducible_layers, + [{"kernel_size": [3], "stride": [1], "padding": [1]}] + * motion_encoder_num_layers, + ), ) else: raise RuntimeError("unreachable") @@ -243,10 +253,6 @@ def objective(trial): ), motion_encoder=motion_encoder_option, upsample_size=[d_, h_, w_], - aggregation_method=aggregation_method, - # aggregation_method=trial.suggest_categorical( - # "aggregation_method", ["concat", "sum"] - # ), ) elif network_name == "hrdae3d": network_option = HRDAE3dOption( @@ -260,10 +266,6 @@ def objective(trial): ), motion_encoder=motion_encoder_option, upsample_size=[d_, h_, w_], - aggregation_method=aggregation_method, - # aggregation_method=trial.suggest_categorical( - # "aggregation_method", ["concat", "sum"] - # ), ) else: raise RuntimeError("unreachable") @@ -274,14 +276,12 @@ def objective(trial): ) scheduler_option = OneCycleLRSchedulerOption( - lr=0.05, + max_lr=0.05, # max_lr=trial.suggest_float("max_lr", 1e-3, 1e-2, log=True), ) model_option = VRModelOption( loss_coef={"wmse": 1.0}, - phase=phase, - pred_diff=args.pred_diff, loss=loss_option, network=network_option, optimizer=optimizer_option, diff --git a/tuning/mmnist.py b/tuning/mmnist.py index db87a7a..3c65911 100644 --- a/tuning/mmnist.py +++ b/tuning/mmnist.py @@ -17,7 +17,6 @@ from hrdae.models.networks.motion_encoder import ( MotionRNNEncoder1dOption, MotionConv2dEncoder1dOption, - MotionGuidedEncoder1dOption, MotionNormalEncoder1dOption, ) @@ -146,29 +145,33 @@ def objective(trial): raise RuntimeError("unreachable") motion_encoder_option = MotionRNNEncoder1dOption( in_channels=3, + hidden_channels=64, conv_params=interleave_arrays( [{"kernel_size": [3], "stride": [2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, ), + deconv_params=interleave_arrays( + [{"kernel_size": [3], "stride": [1, 2], "padding": [1]}] + * num_reducible_layers, + [{"kernel_size": [3], "stride": [1], "padding": [1]}] + * motion_encoder_num_layers, + ), rnn=rnn_option, ) elif motion_encoder_name == "conv2d": motion_encoder_option = MotionConv2dEncoder1dOption( in_channels=3, + hidden_channels=64, conv_params=interleave_arrays( [{"kernel_size": [3], "stride": [1, 2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, ), - ) - elif motion_encoder_name == "guided1d": - motion_encoder_option = MotionGuidedEncoder1dOption( - in_channels=3, - conv_params=interleave_arrays( - [{"kernel_size": [3], "stride": [2], "padding": [1]}] + deconv_params=interleave_arrays( + [{"kernel_size": [3], "stride": [1, 2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, @@ -177,12 +180,19 @@ def objective(trial): elif motion_encoder_name == "normal1d": motion_encoder_option = MotionNormalEncoder1dOption( in_channels=3, + hidden_channels=64, conv_params=interleave_arrays( [{"kernel_size": [3], "stride": [2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, ), + deconv_params=interleave_arrays( + [{"kernel_size": [3], "stride": [1, 2], "padding": [1]}] + * num_reducible_layers, + [{"kernel_size": [3], "stride": [1], "padding": [1]}] + * motion_encoder_num_layers, + ), ) else: raise RuntimeError("unreachable") @@ -221,10 +231,6 @@ def objective(trial): 64 // 2**num_reducible_layers, 64 // 2**num_reducible_layers, ], - # aggregation_method=trial.suggest_categorical( - # "aggregation_method", ["concat", "sum"] - # ), - aggregation_method="concat", ) elif network_name == "hrdae2d": network_option = HRDAE2dOption( @@ -240,10 +246,6 @@ def objective(trial): 64 // 2**num_reducible_layers, 64 // 2**num_reducible_layers, ], - # aggregation_method=trial.suggest_categorical( - # "aggregation_method", ["concat", "sum"] - # ), - aggregation_method="concat", ) else: raise RuntimeError("unreachable") @@ -259,8 +261,6 @@ def objective(trial): model_option = VRModelOption( loss_coef={"wmse": 1.0}, - phase=phase, - pred_diff=False, loss=loss_option, network=network_option, optimizer=optimizer_option, diff --git a/tuning/pvr_mmnist.py b/tuning/pvr_mmnist.py index 901a339..61775a9 100644 --- a/tuning/pvr_mmnist.py +++ b/tuning/pvr_mmnist.py @@ -18,7 +18,6 @@ from hrdae.models.networks.motion_encoder import ( MotionRNNEncoder1dOption, MotionConv2dEncoder1dOption, - MotionGuidedEncoder1dOption, MotionNormalEncoder1dOption, ) @@ -162,29 +161,33 @@ def objective(trial): raise RuntimeError("unreachable") motion_encoder_option = MotionRNNEncoder1dOption( in_channels=motion_encoder_in_channels, + hidden_channels=64, conv_params=interleave_arrays( [{"kernel_size": [3], "stride": [2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, ), + deconv_params=interleave_arrays( + [{"kernel_size": [3], "stride": [1, 2], "padding": [1]}] + * num_reducible_layers, + [{"kernel_size": [3], "stride": [1], "padding": [1]}] + * motion_encoder_num_layers, + ), rnn=rnn_option, ) elif motion_encoder_name == "conv2d": motion_encoder_option = MotionConv2dEncoder1dOption( in_channels=motion_encoder_in_channels, + hidden_channels=64, conv_params=interleave_arrays( [{"kernel_size": [3], "stride": [1, 2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, ), - ) - elif motion_encoder_name == "guided1d": - motion_encoder_option = MotionGuidedEncoder1dOption( - in_channels=motion_encoder_in_channels, - conv_params=interleave_arrays( - [{"kernel_size": [3], "stride": [2], "padding": [1]}] + deconv_params=interleave_arrays( + [{"kernel_size": [3], "stride": [1, 2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, @@ -193,22 +196,29 @@ def objective(trial): elif motion_encoder_name == "normal1d": motion_encoder_option = MotionNormalEncoder1dOption( in_channels=motion_encoder_in_channels, + hidden_channels=64, conv_params=interleave_arrays( [{"kernel_size": [3], "stride": [2], "padding": [1]}] * num_reducible_layers, [{"kernel_size": [3], "stride": [1], "padding": [1]}] * motion_encoder_num_layers, ), + deconv_params=interleave_arrays( + [{"kernel_size": [3], "stride": [1, 2], "padding": [1]}] + * num_reducible_layers, + [{"kernel_size": [3], "stride": [1], "padding": [1]}] + * motion_encoder_num_layers, + ), ) else: raise RuntimeError("unreachable") latent_dim = 8 content_encoder_num_layers = 0 - aggregation_method = "sum" network_option = RDAE2dOption( in_channels=1, + hidden_channels=64, latent_dim=latent_dim, conv_params=interleave_arrays( [{"kernel_size": [3], "stride": [2], "padding": [1]}] @@ -221,7 +231,6 @@ def objective(trial): 64 // 2**num_reducible_layers, 64 // 2**num_reducible_layers, ], - aggregation_method=aggregation_method, ) lr = 0.005