Run model with no error

npeard · Aug 5, 2024 · a23080a · a23080a
1 parent 43e5a20
commit a23080a
Show file tree

Hide file tree

Showing 4 changed files with 91 additions and 59 deletions.
diff --git a/decoder.py b/decoder.py
@@ -5,6 +5,8 @@
 import lightning as L
 from models import CNN
 
+model_dict = {"CNN": CNN}
+
 class VelocityDecoder(L.LightningModule):
     def __init__(self, model_name, model_hparams, optimizer_name,
                  optimizer_hparams, misc_hparams):
@@ -64,7 +66,6 @@ def training_step(self, batch, batch_idx):
         # training_step defines the train loop.
         # it is independent of forward
         x, y = batch
-        x = x.view(-1, x.size(1)**2)
         preds = self.model(x)
         loss = self.loss_function(preds, y)
         acc = (preds == y).float().mean()
@@ -75,7 +76,9 @@ def training_step(self, batch, batch_idx):
     def validation_step(self, batch, batch_idx):
         # validation_step defines the validation loop.
         x, y = batch
-        x = x.view(-1, x.size(1)**2)
+        print(type(x))
+        print(f"x size {x.size()}")
+        print(f"y size {y.size()}")
         preds = self.model(x)
         loss = self.loss_function(preds, y)
         acc = (preds == y).float().mean()
@@ -85,7 +88,6 @@ def validation_step(self, batch, batch_idx):
 
     def test_step(self, batch, batch_idx):
         x, y = batch
-        x = x.view(-1, x.size(1)**2)
         preds = self.model(x)
         loss = self.loss_function(preds, y)
         acc = (preds == y).float().mean()
@@ -95,6 +97,5 @@ def test_step(self, batch, batch_idx):
 
     def predict_step(self, batch, batch_idx, dataloader_idx=0):
         x, y = batch
-        x = x.view(-1, x.size(1)**2)
         y_hat = self.model(x)
         return y_hat, y
diff --git a/main.py b/main.py
@@ -0,0 +1,22 @@
+import numpy as np
+import sys
+import torch
+
+import train
+import decoder
+import models
+
+if __name__ == '__main__':
+    np.random.seed(0x5EED+3)
+    if len(sys.argv) == 1:
+        """Run functions in this scratch area. 
+        """
+        valid_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\valid_data.h5py'
+        train_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\training_data.h5py'
+        test_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\test_data.h5py'
+
+        runner = train.TrainingRunner(train_file, valid_file, test_file)
+        runner.scan_hyperparams()
+
+    else:
+        print("Error: Unsupported number of command-line arguments")
diff --git a/models.py b/models.py
@@ -11,20 +11,21 @@ def __init__(self, input_size, output_size):
             nn.MaxPool1d(2), # Lout = 125, given L = 250
             nn.Conv1d(16, 32, kernel_size=7), # Lout = 119, given L = 125
             nn.MaxPool1d(2), # Lout = 59, given L = 119
-            nn.Conv1d(32, 64, kernel_size=7) # Lout = 53, given L = 59
+            nn.Conv1d(32, 64, kernel_size=7), # Lout = 53, given L = 59
             nn.MaxPool1d(2), # Lout = 26, given L = 53
             nn.Dropout(0.1), 
             nn.Conv1d(64, 64, kernel_size=7), # Lout = 20, given L = 26
             nn.MaxPool1d(2) # Lout = 10, given L = 20
         )
         self.fc_layers = nn.Sequential(
-            nn.Linear(640, 16),
+            nn.Linear(10, 16),
             nn.Linear(16, 1)
         )
 
     def forward(self, x):
         out = self.conv_layers(x)
-        out = self.view(640)
+        print(f"out size after conv: {out.size()}") # expect [8192, 10]
         out = self.fc_layers(out)
+        print(f"out size after fc: {out.size()}") # expect [8192, 1]
         return out
 
diff --git a/train.py b/train.py
@@ -19,16 +19,18 @@ class VelocityDataset(Dataset):
     def __init__(self, h5_file):
         self.h5_file = h5_file
         with h5py.File(self.h5_file, 'r') as f:
-            self.length = len(f['time_data']) # num shots 
+            self.length = len(f['Time (s)']) # num shots
 
-    def open_hdf5(self, group_size=64, num_groups=256):
+    def open_hdf5(self, num_groups=64, group_size=256):
         # solves issue where hdf5 file opened in __init__ prevents multiple
         # workers: https://github.com/pytorch/pytorch/issues/11929
         self.file = h5py.File(self.h5_file, 'r')
-        self.inputs = self.file['PD (V)'][:, ::group_size] # take num_groups evenly spaced points, [num_shots, num_groups]
+        grouped_pd = np.array(np.hsplit(self.file['PD (V)'], num_groups))  # [num_groups, num_shots, group_size]
+        self.inputs = np.transpose(grouped_pd, [1, 0, 2]) # [num_shots, num_groups, group_size]
         grouped_velocities = np.array(np.hsplit(self.file['Speaker (Microns/s)'], num_groups)) # [num_groups, num_shots, group_size]
         grouped_velocities = np.transpose(grouped_velocities, [1, 0, 2]) # [num_shots, num_groups, group_size]
-        self.targets = np.average(grouped_velocities, axis=3) # store average velocity per group per shot: [num_shots, num_groups]
+        grouped_velocities = np.average(grouped_velocities, axis=2) # store average velocity per group per shot: [num_shots, num_groups]
+        self.targets = np.expand_dims(grouped_velocities, axis=2) # [num_shots, num_groups, 1]
 
     def __len__(self):
         return self.length
@@ -40,7 +42,7 @@ def __getitem__(self, idx):
 
 class TrainingRunner:
     def __init__(self, training_h5, validation_h5, testing_h5,
-                 velocity_only=False):
+                 velocity_only=False, num_groups=64):
         self.training_h5 = training_h5
         self.validation_h5 = validation_h5
         self.testing_h5 = testing_h5
@@ -50,16 +52,22 @@ def __init__(self, training_h5, validation_h5, testing_h5,
         self.set_dataloaders()
 
         # dimensions
-        self.input_size = next(iter(self.train_loader))[0].size(-1) ** 2
-        self.output_size = next(iter(self.train_loader))[1].size(-1)
+        input_ref = next(iter(self.train_loader))
+        output_ref = next(iter(self.train_loader))
+        self.input_size = num_groups #input_ref[0].size(-1) #** 2
+        self.output_size = num_groups # output_ref[1].size(-1)
+        print(f"input ref {len(input_ref)} , {input_ref[0].size()}")
+        print(f"output ref {len(output_ref)} , {output_ref[1].size()}")
+        print(f"train.py input_size {self.input_size}")
+        print(f"train.py output_size {self.output_size}")
 
         # directories
         self.checkpoint_dir = "./checkpoints"
 
     def get_custom_dataloader(self, h5_file, batch_size=128, shuffle=True,
                               velocity_only=True):
-        if velocity_only:
-            dataset = VelocityDataset(h5_file)
+        # if velocity_only:
+        dataset = VelocityDataset(h5_file)
 
         # We can use DataLoader to get batches of data
         dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle,
@@ -109,7 +117,7 @@ def train_model(self, model_name, save_name=None, **kwargs):
             devices=[0],
             max_epochs=180,
             callbacks=[early_stop_callback, checkpoint_callback],
-            check_val_every_n_epoch=10,
+            check_val_every_n_epoch=1, #10,
             logger=logger
         )
 
@@ -133,45 +141,45 @@ def train_model(self, model_name, save_name=None, **kwargs):
 
         return model, result
 
-def scan_hyperparams(self):
-        for lr in [1e-3, 1e-2, 3e-2]:
-
-            model_config = {"input_size": self.input_size,
-                            "output_size": self.output_size}
-            optimizer_config = {"lr": lr}
-                                #"momentum": 0.9,}
-            misc_config = {"batch_size": self.batch_size}
-
-            self.train_model(model_name="CNN",
-                             model_hparams=model_config,
-                             optimizer_name="Adam",
-                             optimizer_hparams=optimizer_config,
-                             misc_hparams=misc_config)
-
-def load_model(self):
-    Check whether pretrained model exists. If yes, load it and skip training
-    pretrained_filename = os.path.join(self.checkpoint_dir, "SMI", "f63rieqp",
-                                        "checkpoints", "*" + ".ckpt")
-    print(pretrained_filename)
-    if os.path.isfile(glob.glob(pretrained_filename)[0]):
-        pretrained_filename = glob.glob(pretrained_filename)[0]
-        print(
-            f"Found pretrained model at {pretrained_filename}, loading...")
-        # Automatically loads the model with the saved hyperparameters
-        model = VelocityDecoder.load_from_checkpoint(pretrained_filename)
-
-        # Create a PyTorch Lightning trainer with the generation callback
-        trainer = L.Trainer(
-            accelerator="gpu",
-            devices=[0]
-        )
-
-        # Test best model on validation and test set
-        val_result = trainer.test(model, dataloaders=self.valid_loader,
-                                    verbose=False)
-        test_result = trainer.test(model, dataloaders=self.test_loader,
-                                    verbose=False)
-        result = {"test": test_result[0]["test_acc"],
-                    "val": val_result[0]["test_acc"]}
-
-        return model, result
+    def scan_hyperparams(self):
+            for lr in [1e-3]:#, 1e-2, 3e-2]:
+
+                model_config = {"input_size": self.input_size,
+                                "output_size": self.output_size}
+                optimizer_config = {"lr": lr}
+                                    #"momentum": 0.9,}
+                misc_config = {"batch_size": self.batch_size}
+
+                self.train_model(model_name="CNN",
+                                 model_hparams=model_config,
+                                 optimizer_name="Adam",
+                                 optimizer_hparams=optimizer_config,
+                                 misc_hparams=misc_config)
+
+    def load_model(self):
+        # Check whether pretrained model exists. If yes, load it and skip training
+        pretrained_filename = os.path.join(self.checkpoint_dir, "SMI", "f63rieqp",
+                                            "checkpoints", "*" + ".ckpt")
+        print(pretrained_filename)
+        if os.path.isfile(glob.glob(pretrained_filename)[0]):
+            pretrained_filename = glob.glob(pretrained_filename)[0]
+            print(
+                f"Found pretrained model at {pretrained_filename}, loading...")
+            # Automatically loads the model with the saved hyperparameters
+            model = VelocityDecoder.load_from_checkpoint(pretrained_filename)
+
+            # Create a PyTorch Lightning trainer with the generation callback
+            trainer = L.Trainer(
+                accelerator="gpu",
+                devices=[0]
+            )
+
+            # Test best model on validation and test set
+            val_result = trainer.test(model, dataloaders=self.valid_loader,
+                                        verbose=False)
+            test_result = trainer.test(model, dataloaders=self.test_loader,
+                                        verbose=False)
+            result = {"test": test_result[0]["test_acc"],
+                        "val": val_result[0]["test_acc"]}
+
+            return model, result