minimal working example of 2-channel interferometer training - additional rewriting of the VelocityDataset

npeard · npeard · commit 86640fc6e567 · 2024-09-24T17:33:51.000-07:00
diff --git a/decoder.py b/decoder.py
@@ -25,7 +25,9 @@ def __init__(self, model_name, model_hparams, optimizer_name,
         self.save_hyperparameters()
         # Create model
         self.model = self.create_model(model_name, model_hparams)
-        print(summary(self.model, input_size=(misc_hparams['batch_size'], 1, 256)))
+        print(summary(self.model, input_size=(misc_hparams['batch_size'],
+                                              model_hparams['in_channels'],
+                                              256)))
         # Create loss module
         self.loss_function = nn.MSELoss()
 
diff --git a/interferometers.py b/interferometers.py
@@ -96,6 +96,9 @@ def write_pretraining_data(num_shots, num_channels, file_path):
             _, signal, _, velocity = interferometer.get_buffer()
             signal = np.expand_dims(signal, axis=-1)
             velocity = np.expand_dims(velocity, axis=-1)
+            # Want to end up with these shapes in h5 file:
+            # signal: (num_shots, buffer_size, 1)
+            # velocity: (num_shots, buffer_size, 1)
             entries = {"signal": signal, "velocity": velocity}
             util.write_data(file_path, entries)
     elif num_channels == 2:
@@ -107,8 +110,12 @@ def write_pretraining_data(num_shots, num_channels, file_path):
             _, signal2, _, _ = interferometer2.get_buffer()
             signal = np.stack((signal1, signal2), axis=-1)
             velocity = np.expand_dims(velocity, axis=-1)
-            print(signal.shape)
+            # Want to end up with these shapes in h5 file:
+            # signal: (num_shots, buffer_size, num_channels)
+            # velocity: (num_shots, buffer_size, 1)
             entries = {"signal": signal, "velocity": velocity}
+            print(signal.shape)
+            print(velocity.shape)
             util.write_data(file_path, entries)
 
 
diff --git a/main.py b/main.py
@@ -13,14 +13,19 @@
     if len(sys.argv) == 1:
         """Run functions in this scratch area. 
         """
-        valid_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\valid_max10kHz_30to1kHz_2kshots_dec=256_randampl.h5py'
-        train_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\training_max10kHz_30to1kHz_10kshots_dec=256_randampl.h5py'
-        test_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\test_max10kHz_30to1kHz_2kshots_dec=256_randampl.h5py'
+        # valid_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\valid_max10kHz_30to1kHz_2kshots_dec=256_randampl.h5py'
+        # train_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\training_max10kHz_30to1kHz_10kshots_dec=256_randampl.h5py'
+        # test_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\test_max10kHz_30to1kHz_2kshots_dec=256_randampl.h5py'
+        
+        train_file = "/Users/nolanpeard/Desktop/test.h5"
+        valid_file = "/Users/nolanpeard/Desktop/test.h5"
+        test_file = "/Users/nolanpeard/Desktop/test.h5"
 
         print('begin main', datetime.datetime.now())
-        step_list = [256, 128, 64, 32] # step sizes for rolling input
+        step_list = [256]#, 128, 64, 32] # step sizes for rolling input
         for step in step_list:
-            runner = train.TrainingRunner(train_file, valid_file, test_file, step)
+            runner = train.TrainingRunner(train_file, valid_file, test_file,
+                                          step)
             runner.scan_hyperparams()
 
     else:
diff --git a/models.py b/models.py
@@ -7,11 +7,12 @@
 
 
 class CNN(nn.Module):
-    def __init__(self, input_size, output_size, ch_in=1, activation='LeakyReLU'):
+    def __init__(self, input_size, output_size, in_channels=1, activation='LeakyReLU'):
         super(CNN, self).__init__()
-        self.ch_in = ch_in
+        self.in_channels = in_channels
+        print("self.in_channels = ", self.in_channels)
         self.conv_layers = nn.Sequential(
-            nn.Conv1d(ch_in, 16, kernel_size=7),  # Lout = 250, given L = 256
+            nn.Conv1d(in_channels, 16, kernel_size=7),  # Lout = 250, given L = 256
             act_fn_by_name[activation],
             nn.MaxPool1d(2),  # Lout = 125, given L = 250
             nn.Conv1d(16, 32, kernel_size=7),  # Lout = 119, given L = 125
@@ -26,16 +27,18 @@ def __init__(self, input_size, output_size, ch_in=1, activation='LeakyReLU'):
             nn.MaxPool1d(2)  # Lout = 10, given L = 20
         )
         self.fc_layers = nn.Sequential(
-            nn.Linear(640, 16),
+            #nn.Linear(640, 16),
+            nn.Linear(320, 16),
             nn.ReLU(),
             nn.Linear(16, output_size)
         )
 
     def forward(self, x):
+        print("x.shape", x.shape)
         out = self.conv_layers(x)
         # print(f"post conv out size: {out.size()}")  # [128, 64, 10]
-        out = out.view(out.size(0), self.ch_in, -1)
-        # print(f"post conv out reshaped size: {out.size()}")  # confirmed [128, 1, 640]
+        out = out.view(out.size(0), self.in_channels, -1)
+        print(f"post conv out reshaped size: {out.size()}")  # confirmed [128, 1, 640]
         out = self.fc_layers(out)  # expect out [128, 1, 1]
         # print(f"post fc out size: {out.size()}") # confirmed: [128, 1, 1]
         return out
diff --git a/train.py b/train.py
@@ -26,15 +26,21 @@ def __init__(self, test_mode, h5_file, step, group_size=256):
         self.h5_file = h5_file
         self.step = step
         self.group_size = group_size
-        with h5py.File(self.h5_file, 'r') as f:
-            num_groups = (f['Time (s)'].shape[1] - group_size) // step + 1
-            if test_mode:
-                self.length = len(f['Time (s)'])  # in test_mode, length of dataset = num shots
-            else:
-                self.length = len(f['Time (s)']) * num_groups
+        self.length = self.get_length(h5_file, step, group_size, test_mode)
         print(self.h5_file)
         self.opened_flag = False
         self.test_mode = test_mode
+        
+    def get_length(self, h5_file, step, group_size, test_mode):
+        with h5py.File(self.h5_file, 'r') as f:
+            num_groups = (f['signal'].shape[1] - group_size) // step + 1
+            if test_mode:
+                length = len(f['signal'])
+                # in test_mode, length of dataset = num shots
+            else:
+                length = len(f['signal']) * num_groups
+                
+            return length
 
     def open_hdf5(self, rolling=True, step=256, group_size=256):
         """Set up inputs and targets. For each shot, buffer is split into groups of sequences.
@@ -58,44 +64,60 @@ def open_hdf5(self, rolling=True, step=256, group_size=256):
         signal = torch.Tensor(np.array(self.file['signal']))
         # [num_shots, buffer_size, num_channels]
         velocity = torch.Tensor(np.array(self.file['velocity']))
-        # [num_shots, buffer_size]
+        # [num_shots, buffer_size, 1]
         
         num_channels = signal.shape[-1]
-        if num_channels == 1:
-            signal = torch.squeeze(signal, dim=-1)
-        else:
-            raise ValueError('num_channels must be 1')
-            pass
+        velocity = velocity.squeeze(dim=-1)
  
         if rolling:
             # ROLLING INPUT INDICES
             num_groups = (signal.shape[1] - group_size) // step + 1
-            start_idxs = torch.arange(num_groups) * step  # starting indices for each group
+            start_idxs = torch.arange(num_groups) * step
+            # starting indices for each group
             idxs = torch.arange(group_size)[:, None] + start_idxs
-            idxs = torch.transpose(idxs, dim0=0, dim1=1)  # indices in shape [num_groups, group_size]
+            idxs = torch.transpose(idxs, dim0=0, dim1=1)
+            # indices in shape [num_groups, group_size]
             if self.test_mode:
-                self.inputs = signal  # [num_shots, buffer_size]
-                grouped_vels = velocity[:, idxs]  # [num_shots, num_groups, group_size]
-                self.targets = torch.mean(grouped_vels, dim=2)  # [num_shots, num_groups]
+                self.inputs = signal  # [num_shots, buffer_size, num_channels]
+                grouped_vels = velocity[:, idxs]
+                # [num_shots, num_groups, group_size]
+                self.targets = torch.mean(grouped_vels, dim=2)
+                # [num_shots, num_groups]
             else:
-                self.inputs = signal[:, idxs].reshape(-1, group_size)  # [num_shots * num_groups, group_size]
-                grouped_vels = velocity[:, idxs].reshape(-1, group_size)  # [num_shots * num_groups, group_size]
-                self.targets = torch.unsqueeze(torch.mean(grouped_vels, dim=1), dim=1)  # [num_shots * num_groups, 1]
+                self.inputs = signal[:, idxs, :].reshape(-1, group_size,
+                                                         num_channels)
+                # [num_shots * num_groups, group_size, num_channels]
+                grouped_vels = velocity[:, idxs].reshape(-1, group_size)
+                # [num_shots * num_groups, group_size]
+                self.targets = torch.unsqueeze(torch.mean(grouped_vels, dim=1),
+                                               dim=1)
+                # [num_shots * num_groups, 1]
         else:
             # STEP INPUT
             if self.test_mode:
-                assert False, 'test_mode not implemented for step input. use rolling step=256'
+                raise NotImplementedError("test_mode not implemented for step "
+                                          "input. use rolling step=256")
             else:
+                self.inputs = torch.cat(torch.split(signal, group_size,
+                                                    dim=1), dim=0)
+                # [num_shots * num_groups, group_size, num_channels]
+                grouped_vels = torch.cat(torch.split(velocity, group_size,
+                                                     dim=1), dim=0)
                 # [num_shots * num_groups, group_size]
-                self.inputs = torch.cat(torch.split(signal, group_size, dim=1), dim=0)
-                grouped_vels = torch.cat(torch.split(velocity, group_size, dim=1), dim=0)
-                self.targets = torch.unsqueeze(torch.mean(grouped_vels, dim=1), dim=1)  # [num_shots * num_groups, 1]
+                self.targets = torch.unsqueeze(torch.mean(grouped_vels,
+                                                          dim=1), dim=1)
+                # [num_shots * num_groups, 1]
 
         if num_channels == 1:
-            self.inputs = torch.unsqueeze(self.inputs, dim=1)
-            self.targets = torch.unsqueeze(self.targets, dim=1)
+            # self.inputs = torch.unsqueeze(self.inputs, dim=1)
+            # self.targets = torch.unsqueeze(self.targets, dim=1)
+            self.inputs = torch.reshape(self.inputs, (-1, 1, group_size))
+            self.targets = torch.reshape(self.targets, (-1, 1, 1))
         else:
-            assert False, 'ch > 1 not implemented'
+            self.inputs = torch.reshape(self.inputs, (-1, num_channels, group_size))
+            self.targets = torch.reshape(self.targets, (-1, 1, 1))
+            print(self.inputs.shape)
+            print(self.targets.shape)
 
         # total number of group_size length sequences = num_shots * num_groups
         # print("open_hdf5 input size", self.inputs.size())  # [self.length, 256]
@@ -148,14 +170,13 @@ def __init__(self, training_h5, validation_h5, testing_h5, step=256,
         self.checkpoint_dir = "./checkpoints"
         print('TrainingRunner initialized', datetime.datetime.now())
 
-    def get_custom_dataloader(self, test_mode, h5_file, batch_size=128, shuffle=True,
-                              velocity_only=True):
-        # if velocity_only:
+    def get_custom_dataloader(self, test_mode, h5_file, batch_size=128, shuffle=True):
+        
         dataset = VelocityDataset(test_mode, h5_file, self.step)
         print("dataset initialized")
         # We can use DataLoader to get batches of data
         dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle,
-                                num_workers=16, persistent_workers=True,
+                                num_workers=1, persistent_workers=True,
                                 pin_memory=True)
         print("dataloader initialized")
         return dataloader
@@ -197,8 +218,8 @@ def train_model(self, model_name, save_name=None, **kwargs):
         # Create a PyTorch Lightning trainer with the generation callback
         trainer = L.Trainer(
             default_root_dir=os.path.join(self.checkpoint_dir, save_name),
-            accelerator="gpu",
-            devices=[0],
+            accelerator="cpu",
+            #devices=[0],
             max_epochs=800,
             callbacks=[early_stop_callback, checkpoint_callback],
             check_val_every_n_epoch=5,
@@ -229,10 +250,11 @@ def scan_hyperparams(self):
         lr_list = [1e-3, 1e-4]  # [1e-3, 1e-4, 1e-5]
         act_list = ['LeakyReLU']  # , 'ReLU']
         optim_list = ['Adam']  # , 'SGD']
-        for lr, activation, optim in product(lr_list, act_list, optim_list):  # , 1e-2, 3e-2]:
+        for lr, activation, optim in product(lr_list, act_list, optim_list):
             model_config = {"input_size": self.input_size,
                             "output_size": self.output_size,
-                            "activation": activation}
+                            "activation": activation,
+                            "in_channels": 2}
             optimizer_config = {"lr": lr}
             # "momentum": 0.9,}
             misc_config = {"batch_size": self.batch_size, "step": self.step}