From a23080a220936020df2809bc9f25a461354618a2 Mon Sep 17 00:00:00 2001 From: Angela Yueran Jia Date: Sun, 4 Aug 2024 20:26:28 -0700 Subject: [PATCH] Run model with no error --- decoder.py | 9 +++-- main.py | 22 +++++++++++ models.py | 7 ++-- train.py | 112 ++++++++++++++++++++++++++++------------------------- 4 files changed, 91 insertions(+), 59 deletions(-) create mode 100644 main.py diff --git a/decoder.py b/decoder.py index e284cfd..d17bacc 100644 --- a/decoder.py +++ b/decoder.py @@ -5,6 +5,8 @@ import lightning as L from models import CNN +model_dict = {"CNN": CNN} + class VelocityDecoder(L.LightningModule): def __init__(self, model_name, model_hparams, optimizer_name, optimizer_hparams, misc_hparams): @@ -64,7 +66,6 @@ def training_step(self, batch, batch_idx): # training_step defines the train loop. # it is independent of forward x, y = batch - x = x.view(-1, x.size(1)**2) preds = self.model(x) loss = self.loss_function(preds, y) acc = (preds == y).float().mean() @@ -75,7 +76,9 @@ def training_step(self, batch, batch_idx): def validation_step(self, batch, batch_idx): # validation_step defines the validation loop. x, y = batch - x = x.view(-1, x.size(1)**2) + print(type(x)) + print(f"x size {x.size()}") + print(f"y size {y.size()}") preds = self.model(x) loss = self.loss_function(preds, y) acc = (preds == y).float().mean() @@ -85,7 +88,6 @@ def validation_step(self, batch, batch_idx): def test_step(self, batch, batch_idx): x, y = batch - x = x.view(-1, x.size(1)**2) preds = self.model(x) loss = self.loss_function(preds, y) acc = (preds == y).float().mean() @@ -95,6 +97,5 @@ def test_step(self, batch, batch_idx): def predict_step(self, batch, batch_idx, dataloader_idx=0): x, y = batch - x = x.view(-1, x.size(1)**2) y_hat = self.model(x) return y_hat, y \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..c573c09 --- /dev/null +++ b/main.py @@ -0,0 +1,22 @@ +import numpy as np +import sys +import torch + +import train +import decoder +import models + +if __name__ == '__main__': + np.random.seed(0x5EED+3) + if len(sys.argv) == 1: + """Run functions in this scratch area. + """ + valid_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\valid_data.h5py' + train_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\training_data.h5py' + test_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\test_data.h5py' + + runner = train.TrainingRunner(train_file, valid_file, test_file) + runner.scan_hyperparams() + + else: + print("Error: Unsupported number of command-line arguments") \ No newline at end of file diff --git a/models.py b/models.py index 0237dec..91c5713 100644 --- a/models.py +++ b/models.py @@ -11,20 +11,21 @@ def __init__(self, input_size, output_size): nn.MaxPool1d(2), # Lout = 125, given L = 250 nn.Conv1d(16, 32, kernel_size=7), # Lout = 119, given L = 125 nn.MaxPool1d(2), # Lout = 59, given L = 119 - nn.Conv1d(32, 64, kernel_size=7) # Lout = 53, given L = 59 + nn.Conv1d(32, 64, kernel_size=7), # Lout = 53, given L = 59 nn.MaxPool1d(2), # Lout = 26, given L = 53 nn.Dropout(0.1), nn.Conv1d(64, 64, kernel_size=7), # Lout = 20, given L = 26 nn.MaxPool1d(2) # Lout = 10, given L = 20 ) self.fc_layers = nn.Sequential( - nn.Linear(640, 16), + nn.Linear(10, 16), nn.Linear(16, 1) ) def forward(self, x): out = self.conv_layers(x) - out = self.view(640) + print(f"out size after conv: {out.size()}") # expect [8192, 10] out = self.fc_layers(out) + print(f"out size after fc: {out.size()}") # expect [8192, 1] return out \ No newline at end of file diff --git a/train.py b/train.py index 58a04ee..7cebe86 100644 --- a/train.py +++ b/train.py @@ -19,16 +19,18 @@ class VelocityDataset(Dataset): def __init__(self, h5_file): self.h5_file = h5_file with h5py.File(self.h5_file, 'r') as f: - self.length = len(f['time_data']) # num shots + self.length = len(f['Time (s)']) # num shots - def open_hdf5(self, group_size=64, num_groups=256): + def open_hdf5(self, num_groups=64, group_size=256): # solves issue where hdf5 file opened in __init__ prevents multiple # workers: https://github.com/pytorch/pytorch/issues/11929 self.file = h5py.File(self.h5_file, 'r') - self.inputs = self.file['PD (V)'][:, ::group_size] # take num_groups evenly spaced points, [num_shots, num_groups] + grouped_pd = np.array(np.hsplit(self.file['PD (V)'], num_groups)) # [num_groups, num_shots, group_size] + self.inputs = np.transpose(grouped_pd, [1, 0, 2]) # [num_shots, num_groups, group_size] grouped_velocities = np.array(np.hsplit(self.file['Speaker (Microns/s)'], num_groups)) # [num_groups, num_shots, group_size] grouped_velocities = np.transpose(grouped_velocities, [1, 0, 2]) # [num_shots, num_groups, group_size] - self.targets = np.average(grouped_velocities, axis=3) # store average velocity per group per shot: [num_shots, num_groups] + grouped_velocities = np.average(grouped_velocities, axis=2) # store average velocity per group per shot: [num_shots, num_groups] + self.targets = np.expand_dims(grouped_velocities, axis=2) # [num_shots, num_groups, 1] def __len__(self): return self.length @@ -40,7 +42,7 @@ def __getitem__(self, idx): class TrainingRunner: def __init__(self, training_h5, validation_h5, testing_h5, - velocity_only=False): + velocity_only=False, num_groups=64): self.training_h5 = training_h5 self.validation_h5 = validation_h5 self.testing_h5 = testing_h5 @@ -50,16 +52,22 @@ def __init__(self, training_h5, validation_h5, testing_h5, self.set_dataloaders() # dimensions - self.input_size = next(iter(self.train_loader))[0].size(-1) ** 2 - self.output_size = next(iter(self.train_loader))[1].size(-1) + input_ref = next(iter(self.train_loader)) + output_ref = next(iter(self.train_loader)) + self.input_size = num_groups #input_ref[0].size(-1) #** 2 + self.output_size = num_groups # output_ref[1].size(-1) + print(f"input ref {len(input_ref)} , {input_ref[0].size()}") + print(f"output ref {len(output_ref)} , {output_ref[1].size()}") + print(f"train.py input_size {self.input_size}") + print(f"train.py output_size {self.output_size}") # directories self.checkpoint_dir = "./checkpoints" def get_custom_dataloader(self, h5_file, batch_size=128, shuffle=True, velocity_only=True): - if velocity_only: - dataset = VelocityDataset(h5_file) + # if velocity_only: + dataset = VelocityDataset(h5_file) # We can use DataLoader to get batches of data dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, @@ -109,7 +117,7 @@ def train_model(self, model_name, save_name=None, **kwargs): devices=[0], max_epochs=180, callbacks=[early_stop_callback, checkpoint_callback], - check_val_every_n_epoch=10, + check_val_every_n_epoch=1, #10, logger=logger ) @@ -133,45 +141,45 @@ def train_model(self, model_name, save_name=None, **kwargs): return model, result -def scan_hyperparams(self): - for lr in [1e-3, 1e-2, 3e-2]: - - model_config = {"input_size": self.input_size, - "output_size": self.output_size} - optimizer_config = {"lr": lr} - #"momentum": 0.9,} - misc_config = {"batch_size": self.batch_size} - - self.train_model(model_name="CNN", - model_hparams=model_config, - optimizer_name="Adam", - optimizer_hparams=optimizer_config, - misc_hparams=misc_config) - -def load_model(self): - Check whether pretrained model exists. If yes, load it and skip training - pretrained_filename = os.path.join(self.checkpoint_dir, "SMI", "f63rieqp", - "checkpoints", "*" + ".ckpt") - print(pretrained_filename) - if os.path.isfile(glob.glob(pretrained_filename)[0]): - pretrained_filename = glob.glob(pretrained_filename)[0] - print( - f"Found pretrained model at {pretrained_filename}, loading...") - # Automatically loads the model with the saved hyperparameters - model = VelocityDecoder.load_from_checkpoint(pretrained_filename) - - # Create a PyTorch Lightning trainer with the generation callback - trainer = L.Trainer( - accelerator="gpu", - devices=[0] - ) - - # Test best model on validation and test set - val_result = trainer.test(model, dataloaders=self.valid_loader, - verbose=False) - test_result = trainer.test(model, dataloaders=self.test_loader, - verbose=False) - result = {"test": test_result[0]["test_acc"], - "val": val_result[0]["test_acc"]} - - return model, result \ No newline at end of file + def scan_hyperparams(self): + for lr in [1e-3]:#, 1e-2, 3e-2]: + + model_config = {"input_size": self.input_size, + "output_size": self.output_size} + optimizer_config = {"lr": lr} + #"momentum": 0.9,} + misc_config = {"batch_size": self.batch_size} + + self.train_model(model_name="CNN", + model_hparams=model_config, + optimizer_name="Adam", + optimizer_hparams=optimizer_config, + misc_hparams=misc_config) + + def load_model(self): + # Check whether pretrained model exists. If yes, load it and skip training + pretrained_filename = os.path.join(self.checkpoint_dir, "SMI", "f63rieqp", + "checkpoints", "*" + ".ckpt") + print(pretrained_filename) + if os.path.isfile(glob.glob(pretrained_filename)[0]): + pretrained_filename = glob.glob(pretrained_filename)[0] + print( + f"Found pretrained model at {pretrained_filename}, loading...") + # Automatically loads the model with the saved hyperparameters + model = VelocityDecoder.load_from_checkpoint(pretrained_filename) + + # Create a PyTorch Lightning trainer with the generation callback + trainer = L.Trainer( + accelerator="gpu", + devices=[0] + ) + + # Test best model on validation and test set + val_result = trainer.test(model, dataloaders=self.valid_loader, + verbose=False) + test_result = trainer.test(model, dataloaders=self.test_loader, + verbose=False) + result = {"test": test_result[0]["test_acc"], + "val": val_result[0]["test_acc"]} + + return model, result \ No newline at end of file