diff --git a/decoder.py b/decoder.py
index b4404c7..7dfac85 100644
--- a/decoder.py
+++ b/decoder.py
@@ -70,7 +70,7 @@ def training_step(self, batch, batch_idx):
         loss = self.loss_function(preds, y)
         acc = (preds == y).float().mean()
         self.log("train_acc", acc, on_step=False, on_epoch=True)
-        self.log("train_loss", loss, prog_bar=True)
+        self.log("train_loss", loss, on_step=True, prog_bar=True)
         return loss
 
     def validation_step(self, batch, batch_idx):
diff --git a/main.py b/main.py
index dc6a1df..c5462c7 100644
--- a/main.py
+++ b/main.py
@@ -18,7 +18,7 @@
         test_file = 'C:\\Users\\aj14\\Desktop\\SMI\\data\\test_30to1kHz_2kshots_dec=256_randampl.h5py'
 
         print('begin main', datetime.datetime.now())
-        step_list = [256, 128, 64]
+        step_list = [256, 128, 64] # step sizes for rolling input 
         for step in step_list:
             runner = train.TrainingRunner(train_file, valid_file, test_file, step)
             runner.scan_hyperparams()
diff --git a/models.py b/models.py
index cf8e745..4a0b8c8 100644
--- a/models.py
+++ b/models.py
@@ -6,10 +6,10 @@
 act_fn_by_name = {'LeakyReLU': nn.LeakyReLU(), 'ReLU': nn.ReLU()}
 
 class CNN(nn.Module):
-    def __init__(self, input_size, output_size, activation='LeakyReLU'):
+    def __init__(self, input_size, output_size, ch_in=1, activation='LeakyReLU'):
         super(CNN, self).__init__()
         self.conv_layers = nn.Sequential(
-            nn.Conv1d(input_size, 16, kernel_size=7),  # Lout = 250, given L = 256
+            nn.Conv1d(ch_in, 16, kernel_size=7),  # Lout = 250, given L = 256
             act_fn_by_name[activation], 
             nn.MaxPool1d(2),  # Lout = 125, given L = 250
             nn.Conv1d(16, 32, kernel_size=7),  # Lout = 119, given L = 125
@@ -26,11 +26,11 @@ def __init__(self, input_size, output_size, activation='LeakyReLU'):
         self.fc_layers = nn.Sequential(
             nn.Linear(10, 16),
             nn.ReLU(),
-            nn.Linear(16, 1)
+            nn.Linear(16, output_size)
         )
         
     def forward(self, x):
-        out = self.conv_layers(x)  # expect out [128, num_groups, 10]
-        out = self.fc_layers(out)  # expect out [128, num_groups, 1]
+        out = self.conv_layers(x)  # expect out [128*num_groups, 10]
+        out = self.fc_layers(out)  # expect out [128*num_groups, 1]
         return out
         
\ No newline at end of file
diff --git a/train.py b/train.py
index 50c007d..a6dd57c 100644
--- a/train.py
+++ b/train.py
@@ -21,49 +21,46 @@ class VelocityDataset(Dataset):
     def __init__(self, h5_file, step):
         self.h5_file = h5_file
         self.step = step
-        with h5py.File(self.h5_file, 'r') as f:
-            self.length = len(f['Time (s)']) # num shots
         print(self.h5_file)
         self.opened_flag = False
 
-    def open_hdf5(self, rolling=True, group_size=256, step=128):
-        """Set up inputs and targets. For each shot, buffer is split into rolling data.
-        Inputs include grouped photodiode trace of 'group_size', spaced interval 'step' apart.
-        Targets include average velocity of each group. 
-        Input shape is [num_shots, num_groups, group_size] and target shape is [num_shots, num_groups, 1],
+    def open_hdf5(self, rolling=True, step, group_size=256):
+        """Set up inputs and targets. For each shot, buffer is split into groups of sequences.
+        Inputs include grouped photodiode trace of 'group_size', spaced interval 'step' apart for each buffer. 
+        Targets include average velocity of each group.
+        Input shape is [num_shots * num_groups, group_size] and target shape is [num_shots * num_groups, 1],
         where num_groups = (buffer_len - group_size)/step + 1, given that buffer_len - group_size is a multiple of step. 
         If the given 'group_size' and 'step' do not satisfy the above requirement, 
         the data will not be cleanly grouped.
 
         Args:
+            step (int): Size of step between group starts. buffer_len - grou_size = 0 (mod step).
             group_size (int, optional): Size of each group. buffer_len - group_size = 0 (mod step). Defaults to 256.
-            step (int, optional): Size of step between group starts. buffer_len - grou_size = 0 (mod step). Defaults to 1.
         """
         # solves issue where hdf5 file opened in __init__ prevents multiple
         # workers: https://github.com/pytorch/pytorch/issues/11929
         self.file = h5py.File(self.h5_file, 'r')
-        # print(torch.cuda.get_device_name(0))
-        pds = torch.Tensor(np.array(self.file['PD (V)'])) # [num_shots, buffer_size]
-        vels = torch.Tensor(np.array(self.file['Speaker (Microns/s)'])) # [num_shots, buffer_size]
+        pds = torch.Tensor(np.array(self.file['PD (V)']))  # [num_shots, buffer_size]
+        vels = torch.Tensor(np.array(self.file['Speaker (Microns/s)']))  # [num_shots, buffer_size]
 
         if rolling:
-            # ROLLING INPUT INDICES
+            # ROLLING INPUT INDICES 
             num_groups = (pds.shape[1] - group_size) // step + 1
             start_idxs = torch.arange(num_groups) * step  # starting indices for each group
             idxs = torch.arange(group_size)[:, None] + start_idxs
-            idxs = torch.transpose(idxs, dim0=0, dim1=1)
-            self.inputs = pds[:, idxs]
-            self.targets = torch.unsqueeze(torch.mean(vels[:, idxs], dim=2), dim=2)
+            idxs = torch.transpose(idxs, dim0=0, dim1=1)  # indices in shape [num_groups, group_size]
+            self.inputs = torch.cat(list(pds[:, idxs]), dim=0)  # [num_shots * num_groups, group_size]
+            grouped_vels = torch.cat(list(vels[:, idxs]), dim=0)  # [num_shots * num_groups, group_size]
+            self.targets = torch.unsqueeze(torch.mean(grouped_vels, dim=1), dim=1)  # [num_shots * num_groups, 1]
         else:
             # STEP INPUT
-            grouped_pds = torch.stack(torch.split(pds, group_size, dim=1))
-            self.inputs = torch.transpose(grouped_pds, dim0=0, dim1=1)
-            grouped_vels = torch.stack(torch.split(vels, group_size, dim=1))
-            grouped_vels = torch.transpose(grouped_vels, dim0=0, dim1=1)
-            self.targets = torch.unsqueeze(torch.mean(grouped_vels, dim=2), dim=2)
+            self.inputs = torch.cat(torch.split(pds, group_size, dim=1), dim=0)  # [num_shots * num_groups, group_size]
+            grouped_vels = torch.cat(torch.split(vels, group_size, dim=1), dim=0)
+            self.targets = torch.unsqueeze(torch.mean(grouped_vels, dim=1), dim=1)  # [num_shots * num_groups, 1]
 
-        # print(self.inputs.size())  # [2k, 64, 256]
-        # print(self.targets.size())  # [2k, 64, 1]
+        self.length = self.inputs.shape[0]  # total number of group_size length sequences = num_shots * num_groups
+        # print(self.inputs.size())  # [10k*64, 256]
+        # print(self.targets.size())  # [10k*64, 1]
 
     def __len__(self):
         return self.length
@@ -95,15 +92,14 @@ def __init__(self, training_h5, validation_h5, testing_h5, step=256,
         # get dataloaders
         self.set_dataloaders()
         print("dataloaders set:", datetime.datetime.now())
-        # dimensions
         input_ref = next(iter(self.train_loader))
         # print("loaded next(iter", datetime.datetime.now())
-        self.input_size = input_ref[0].shape[1]  # num_groups
-        self.output_size = input_ref[1].shape[1]  # num_groups
+        self.input_size = input_ref[0].shape[1]  # group_size
+        self.output_size = input_ref[1].shape[1]  # 1
         print(f"input ref {len(input_ref)} , {input_ref[0].size()}")
         print(f"output ref {len(input_ref)} , {input_ref[1].size()}")
-        # print(f"train.py input_size {self.input_size}")
-        # print(f"train.py output_size {self.output_size}")
+        print(f"train.py input_size {self.input_size}")
+        print(f"train.py output_size {self.output_size}")
 
         # directories
         self.checkpoint_dir = "./checkpoints"
@@ -116,7 +112,7 @@ def get_custom_dataloader(self, h5_file, batch_size=128, shuffle=True,
         print("dataset initialized")
         # We can use DataLoader to get batches of data
         dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle,
-                                num_workers=8, persistent_workers=True,
+                                num_workers=16, persistent_workers=True,
                                 pin_memory=True)
         print("dataloader initialized")
         return dataloader
@@ -186,22 +182,23 @@ def train_model(self, model_name, save_name=None, **kwargs):
         return model, result
     
     def scan_hyperparams(self):
-            lr_list = [1e-3, 1e-4]
-            act_list = ['LeakyReLU', 'ReLU']
-            optim_list = ['Adam', 'SGD']
-            for lr, activation, step in product(lr_list, act_list, step_list): #, 1e-2, 3e-2]:
-
-                model_config = {"input_size": self.input_size,
-                                "output_size": self.output_size}
-                optimizer_config = {"lr": lr}
-                                    #"momentum": 0.9,}
-                misc_config = {"batch_size": self.batch_size, "step": self.step}
-
-                self.train_model(model_name="CNN",
-                                 model_hparams=model_config,
-                                 optimizer_name="Adam",
-                                 optimizer_hparams=optimizer_config,
-                                 misc_hparams=misc_config)
+        lr_list = [1e-3, 1e-4]
+        act_list = ['LeakyReLU', 'ReLU']
+        optim_list = ['Adam', 'SGD']
+        for lr, activation, optim in product(lr_list, act_list, optim_list): #, 1e-2, 3e-2]:
+            model_config = {"input_size": self.input_size,
+                            "output_size": self.output_size,
+                            "activation": activation}
+            optimizer_config = {"lr": lr}
+                                #"momentum": 0.9,}
+            misc_config = {"batch_size": self.batch_size, "step": self.step}
+
+            self.train_model(model_name="CNN",
+                            model_hparams=model_config,
+                            optimizer_name=optim,
+                            optimizer_hparams=optimizer_config,
+                            misc_hparams=misc_config)
+
 
     def load_model(self, model_tag, model_name='CNN'):
         # Check whether pretrained model exists. If yes, load it and skip training