diff --git a/decoder.py b/decoder.py index b4404c7..7dfac85 100644 --- a/decoder.py +++ b/decoder.py @@ -70,7 +70,7 @@ def training_step(self, batch, batch_idx): loss = self.loss_function(preds, y) acc = (preds == y).float().mean() self.log("train_acc", acc, on_step=False, on_epoch=True) - self.log("train_loss", loss, prog_bar=True) + self.log("train_loss", loss, on_step=True, prog_bar=True) return loss def validation_step(self, batch, batch_idx): diff --git a/main.py b/main.py index dc6a1df..c5462c7 100644 --- a/main.py +++ b/main.py @@ -18,7 +18,7 @@ test_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\test_30to1kHz_2kshots_dec=256_randampl.h5py' print('begin main', datetime.datetime.now()) - step_list = [256, 128, 64] + step_list = [256, 128, 64] # step sizes for rolling input for step in step_list: runner = train.TrainingRunner(train_file, valid_file, test_file, step) runner.scan_hyperparams() diff --git a/models.py b/models.py index cf8e745..4a0b8c8 100644 --- a/models.py +++ b/models.py @@ -6,10 +6,10 @@ act_fn_by_name = {'LeakyReLU': nn.LeakyReLU(), 'ReLU': nn.ReLU()} class CNN(nn.Module): - def __init__(self, input_size, output_size, activation='LeakyReLU'): + def __init__(self, input_size, output_size, ch_in=1, activation='LeakyReLU'): super(CNN, self).__init__() self.conv_layers = nn.Sequential( - nn.Conv1d(input_size, 16, kernel_size=7), # Lout = 250, given L = 256 + nn.Conv1d(ch_in, 16, kernel_size=7), # Lout = 250, given L = 256 act_fn_by_name[activation], nn.MaxPool1d(2), # Lout = 125, given L = 250 nn.Conv1d(16, 32, kernel_size=7), # Lout = 119, given L = 125 @@ -26,11 +26,11 @@ def __init__(self, input_size, output_size, activation='LeakyReLU'): self.fc_layers = nn.Sequential( nn.Linear(10, 16), nn.ReLU(), - nn.Linear(16, 1) + nn.Linear(16, output_size) ) def forward(self, x): - out = self.conv_layers(x) # expect out [128, num_groups, 10] - out = self.fc_layers(out) # expect out [128, num_groups, 1] + out = self.conv_layers(x) # expect out [128*num_groups, 10] + out = self.fc_layers(out) # expect out [128*num_groups, 1] return out \ No newline at end of file diff --git a/train.py b/train.py index 50c007d..a6dd57c 100644 --- a/train.py +++ b/train.py @@ -21,49 +21,46 @@ class VelocityDataset(Dataset): def __init__(self, h5_file, step): self.h5_file = h5_file self.step = step - with h5py.File(self.h5_file, 'r') as f: - self.length = len(f['Time (s)']) # num shots print(self.h5_file) self.opened_flag = False - def open_hdf5(self, rolling=True, group_size=256, step=128): - """Set up inputs and targets. For each shot, buffer is split into rolling data. - Inputs include grouped photodiode trace of 'group_size', spaced interval 'step' apart. - Targets include average velocity of each group. - Input shape is [num_shots, num_groups, group_size] and target shape is [num_shots, num_groups, 1], + def open_hdf5(self, rolling=True, step, group_size=256): + """Set up inputs and targets. For each shot, buffer is split into groups of sequences. + Inputs include grouped photodiode trace of 'group_size', spaced interval 'step' apart for each buffer. + Targets include average velocity of each group. + Input shape is [num_shots * num_groups, group_size] and target shape is [num_shots * num_groups, 1], where num_groups = (buffer_len - group_size)/step + 1, given that buffer_len - group_size is a multiple of step. If the given 'group_size' and 'step' do not satisfy the above requirement, the data will not be cleanly grouped. Args: + step (int): Size of step between group starts. buffer_len - grou_size = 0 (mod step). group_size (int, optional): Size of each group. buffer_len - group_size = 0 (mod step). Defaults to 256. - step (int, optional): Size of step between group starts. buffer_len - grou_size = 0 (mod step). Defaults to 1. """ # solves issue where hdf5 file opened in __init__ prevents multiple # workers: https://github.com/pytorch/pytorch/issues/11929 self.file = h5py.File(self.h5_file, 'r') - # print(torch.cuda.get_device_name(0)) - pds = torch.Tensor(np.array(self.file['PD (V)'])) # [num_shots, buffer_size] - vels = torch.Tensor(np.array(self.file['Speaker (Microns/s)'])) # [num_shots, buffer_size] + pds = torch.Tensor(np.array(self.file['PD (V)'])) # [num_shots, buffer_size] + vels = torch.Tensor(np.array(self.file['Speaker (Microns/s)'])) # [num_shots, buffer_size] if rolling: - # ROLLING INPUT INDICES + # ROLLING INPUT INDICES num_groups = (pds.shape[1] - group_size) // step + 1 start_idxs = torch.arange(num_groups) * step # starting indices for each group idxs = torch.arange(group_size)[:, None] + start_idxs - idxs = torch.transpose(idxs, dim0=0, dim1=1) - self.inputs = pds[:, idxs] - self.targets = torch.unsqueeze(torch.mean(vels[:, idxs], dim=2), dim=2) + idxs = torch.transpose(idxs, dim0=0, dim1=1) # indices in shape [num_groups, group_size] + self.inputs = torch.cat(list(pds[:, idxs]), dim=0) # [num_shots * num_groups, group_size] + grouped_vels = torch.cat(list(vels[:, idxs]), dim=0) # [num_shots * num_groups, group_size] + self.targets = torch.unsqueeze(torch.mean(grouped_vels, dim=1), dim=1) # [num_shots * num_groups, 1] else: # STEP INPUT - grouped_pds = torch.stack(torch.split(pds, group_size, dim=1)) - self.inputs = torch.transpose(grouped_pds, dim0=0, dim1=1) - grouped_vels = torch.stack(torch.split(vels, group_size, dim=1)) - grouped_vels = torch.transpose(grouped_vels, dim0=0, dim1=1) - self.targets = torch.unsqueeze(torch.mean(grouped_vels, dim=2), dim=2) + self.inputs = torch.cat(torch.split(pds, group_size, dim=1), dim=0) # [num_shots * num_groups, group_size] + grouped_vels = torch.cat(torch.split(vels, group_size, dim=1), dim=0) + self.targets = torch.unsqueeze(torch.mean(grouped_vels, dim=1), dim=1) # [num_shots * num_groups, 1] - # print(self.inputs.size()) # [2k, 64, 256] - # print(self.targets.size()) # [2k, 64, 1] + self.length = self.inputs.shape[0] # total number of group_size length sequences = num_shots * num_groups + # print(self.inputs.size()) # [10k*64, 256] + # print(self.targets.size()) # [10k*64, 1] def __len__(self): return self.length @@ -95,15 +92,14 @@ def __init__(self, training_h5, validation_h5, testing_h5, step=256, # get dataloaders self.set_dataloaders() print("dataloaders set:", datetime.datetime.now()) - # dimensions input_ref = next(iter(self.train_loader)) # print("loaded next(iter", datetime.datetime.now()) - self.input_size = input_ref[0].shape[1] # num_groups - self.output_size = input_ref[1].shape[1] # num_groups + self.input_size = input_ref[0].shape[1] # group_size + self.output_size = input_ref[1].shape[1] # 1 print(f"input ref {len(input_ref)} , {input_ref[0].size()}") print(f"output ref {len(input_ref)} , {input_ref[1].size()}") - # print(f"train.py input_size {self.input_size}") - # print(f"train.py output_size {self.output_size}") + print(f"train.py input_size {self.input_size}") + print(f"train.py output_size {self.output_size}") # directories self.checkpoint_dir = "./checkpoints" @@ -116,7 +112,7 @@ def get_custom_dataloader(self, h5_file, batch_size=128, shuffle=True, print("dataset initialized") # We can use DataLoader to get batches of data dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, - num_workers=8, persistent_workers=True, + num_workers=16, persistent_workers=True, pin_memory=True) print("dataloader initialized") return dataloader @@ -186,22 +182,23 @@ def train_model(self, model_name, save_name=None, **kwargs): return model, result def scan_hyperparams(self): - lr_list = [1e-3, 1e-4] - act_list = ['LeakyReLU', 'ReLU'] - optim_list = ['Adam', 'SGD'] - for lr, activation, step in product(lr_list, act_list, step_list): #, 1e-2, 3e-2]: - - model_config = {"input_size": self.input_size, - "output_size": self.output_size} - optimizer_config = {"lr": lr} - #"momentum": 0.9,} - misc_config = {"batch_size": self.batch_size, "step": self.step} - - self.train_model(model_name="CNN", - model_hparams=model_config, - optimizer_name="Adam", - optimizer_hparams=optimizer_config, - misc_hparams=misc_config) + lr_list = [1e-3, 1e-4] + act_list = ['LeakyReLU', 'ReLU'] + optim_list = ['Adam', 'SGD'] + for lr, activation, optim in product(lr_list, act_list, optim_list): #, 1e-2, 3e-2]: + model_config = {"input_size": self.input_size, + "output_size": self.output_size, + "activation": activation} + optimizer_config = {"lr": lr} + #"momentum": 0.9,} + misc_config = {"batch_size": self.batch_size, "step": self.step} + + self.train_model(model_name="CNN", + model_hparams=model_config, + optimizer_name=optim, + optimizer_hparams=optimizer_config, + misc_hparams=misc_config) + def load_model(self, model_tag, model_name='CNN'): # Check whether pretrained model exists. If yes, load it and skip training