Skip to content

Commit

Permalink
Add ML code
Browse files Browse the repository at this point in the history
  • Loading branch information
ange1a-j14 committed Aug 1, 2024
1 parent 8408e9f commit 43e5a20
Show file tree
Hide file tree
Showing 4 changed files with 311 additions and 5 deletions.
9 changes: 4 additions & 5 deletions acquire_automatic.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,8 @@ def run_one_shot(start_freq=1, end_freq=1000, decimation=8192, store_data=False,
# Wait for trigger
while 1:
rp_s.tx_txt('ACQ:TRig:STAT?') # Get Trigger Status
print('got status')
if rp_s.rx_txt() == 'TD': # Triggered?
break
print('triggered')
## ! OS 2.00 or higher only ! ##
while 1:
rp_s.tx_txt('ACQ:TRig:FILL?')
Expand Down Expand Up @@ -130,7 +128,8 @@ def run_one_shot(start_freq=1, end_freq=1000, decimation=8192, store_data=False,
rp_s.tx_txt('GEN:RST')
rp_s.tx_txt('ACQ:RST')


num_shots = 1
num_shots = 200
for i in range(num_shots):
run_one_shot(store_data=False, plot_data=True)
if i % 100 == 0:
print(i)
run_one_shot(1, 1000, decimation=256, store_data=True, plot_data=False)
100 changes: 100 additions & 0 deletions decoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/usr/bin/env python3

from torch import optim, nn
import torch
import lightning as L
from models import CNN

class VelocityDecoder(L.LightningModule):
def __init__(self, model_name, model_hparams, optimizer_name,
optimizer_hparams, misc_hparams):
"""Decoder for the target's velocity
Args:
model_name: Name of the model/CNN to run. Used for creating the model (see function below)
model_hparams: Hyperparameters for the model, as dictionary.
optimizer_name: Name of the optimizer to use. Currently supported: Adam, SGD
optimizer_hparams: Hyperparameters for the optimizer, as dictionary. This includes learning rate, weight decay, etc.
"""
super().__init__()
# Exports the hyperparameters to a YAML file, and create "self.hparams" namespace
self.save_hyperparameters()
# Create model
self.model = self.create_model(model_name, model_hparams)
# Create loss module
self.loss_function = nn.MSELoss()

torch.set_float32_matmul_precision('medium')

def create_model(self, model_name, model_hparams):
if model_name in model_dict:
return model_dict[model_name](**model_hparams)
else:
assert False, f'Unknown model name "{model_name}". Available models are: {str(model_dict.keys())}'

def forward(self, x):
return self.model(x)

def configure_optimizers(self):
# We will support Adam or SGD as optimizers.
if self.hparams.optimizer_name == "Adam":
# AdamW is Adam with a correct implementation of weight decay (see here
# for details: https://arxiv.org/pdf/1711.05101.pdf)
optimizer = optim.AdamW(self.parameters(),
**self.hparams.optimizer_hparams)
elif self.hparams.optimizer_name == "SGD":
optimizer = optim.SGD(self.parameters(),
**self.hparams.optimizer_hparams)
else:
assert False, f'Unknown optimizer: "{self.hparams.optimizer_name}"'

# We will reduce the learning rate by 0.1 after 100 and 150 epochs
scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
milestones=[100, 150],
gamma=0.1)
return [optimizer], [scheduler]

def get_loss_function(self, loss_hparams):
if loss_hparams["loss_name"] == "mse":
self.loss_function = nn.MSELoss()
else:
assert False, f'Unknown loss: "{loss_hparams["loss_name"]}"'

def training_step(self, batch, batch_idx):
# training_step defines the train loop.
# it is independent of forward
x, y = batch
x = x.view(-1, x.size(1)**2)
preds = self.model(x)
loss = self.loss_function(preds, y)
acc = (preds == y).float().mean()
self.log("train_acc", acc, on_step=False, on_epoch=True)
self.log("train_loss", loss, prog_bar=True)
return loss

def validation_step(self, batch, batch_idx):
# validation_step defines the validation loop.
x, y = batch
x = x.view(-1, x.size(1)**2)
preds = self.model(x)
loss = self.loss_function(preds, y)
acc = (preds == y).float().mean()
self.log("val_acc", acc, on_step=False, on_epoch=True)
self.log("val_loss", loss, prog_bar=True)
return loss

def test_step(self, batch, batch_idx):
x, y = batch
x = x.view(-1, x.size(1)**2)
preds = self.model(x)
loss = self.loss_function(preds, y)
acc = (preds == y).float().mean()
self.log("test_acc", acc, on_step=False, on_epoch=True)
self.log("test_loss", loss, prog_bar=True)
return loss

def predict_step(self, batch, batch_idx, dataloader_idx=0):
x, y = batch
x = x.view(-1, x.size(1)**2)
y_hat = self.model(x)
return y_hat, y
30 changes: 30 additions & 0 deletions models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env python3

from torch import nn
import torch

class CNN(nn.Module):
def __init__(self, input_size, output_size):
super(CNN, self).__init__()
self.conv_layers = nn.Sequential(
nn.Conv1d(input_size, 16, kernel_size=7), # Lout = 250, given L = 256
nn.MaxPool1d(2), # Lout = 125, given L = 250
nn.Conv1d(16, 32, kernel_size=7), # Lout = 119, given L = 125
nn.MaxPool1d(2), # Lout = 59, given L = 119
nn.Conv1d(32, 64, kernel_size=7) # Lout = 53, given L = 59
nn.MaxPool1d(2), # Lout = 26, given L = 53
nn.Dropout(0.1),
nn.Conv1d(64, 64, kernel_size=7), # Lout = 20, given L = 26
nn.MaxPool1d(2) # Lout = 10, given L = 20
)
self.fc_layers = nn.Sequential(
nn.Linear(640, 16),
nn.Linear(16, 1)
)

def forward(self, x):
out = self.conv_layers(x)
out = self.view(640)
out = self.fc_layers(out)
return out

177 changes: 177 additions & 0 deletions train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#!/usr/bin/env python3

import os, sys, glob
import torch
from torch.utils.data import Dataset, DataLoader
import h5py
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelCheckpoint
from decoder import VelocityDecoder
from torch import FloatTensor
import numpy as np
import matplotlib.pyplot as plt
import lightning as L
from lightning.pytorch.loggers import WandbLogger
from itertools import product

# Define a custom Dataset class
class VelocityDataset(Dataset):
def __init__(self, h5_file):
self.h5_file = h5_file
with h5py.File(self.h5_file, 'r') as f:
self.length = len(f['time_data']) # num shots

def open_hdf5(self, group_size=64, num_groups=256):
# solves issue where hdf5 file opened in __init__ prevents multiple
# workers: https://github.com/pytorch/pytorch/issues/11929
self.file = h5py.File(self.h5_file, 'r')
self.inputs = self.file['PD (V)'][:, ::group_size] # take num_groups evenly spaced points, [num_shots, num_groups]
grouped_velocities = np.array(np.hsplit(self.file['Speaker (Microns/s)'], num_groups)) # [num_groups, num_shots, group_size]
grouped_velocities = np.transpose(grouped_velocities, [1, 0, 2]) # [num_shots, num_groups, group_size]
self.targets = np.average(grouped_velocities, axis=3) # store average velocity per group per shot: [num_shots, num_groups]

def __len__(self):
return self.length

def __getitem__(self, idx):
if not hasattr(self, self.h5_file):
self.open_hdf5()
return FloatTensor(self.inputs[idx]), FloatTensor(self.targets[idx])

class TrainingRunner:
def __init__(self, training_h5, validation_h5, testing_h5,
velocity_only=False):
self.training_h5 = training_h5
self.validation_h5 = validation_h5
self.testing_h5 = testing_h5
self.velocity_only = velocity_only

# get dataloaders
self.set_dataloaders()

# dimensions
self.input_size = next(iter(self.train_loader))[0].size(-1) ** 2
self.output_size = next(iter(self.train_loader))[1].size(-1)

# directories
self.checkpoint_dir = "./checkpoints"

def get_custom_dataloader(self, h5_file, batch_size=128, shuffle=True,
velocity_only=True):
if velocity_only:
dataset = VelocityDataset(h5_file)

# We can use DataLoader to get batches of data
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle,
num_workers=16, persistent_workers=True,
pin_memory=True)

return dataloader

def set_dataloaders(self, batch_size=128):
self.batch_size = batch_size
self.train_loader = self.get_custom_dataloader(self.training_h5, velocity_only=self.velocity_only, batch_size=self.batch_size)
self.valid_loader = self.get_custom_dataloader(self.validation_h5, velocity_only=self.velocity_only, batch_size=self.batch_size, shuffle=False)
self.test_loader = self.get_custom_dataloader(self.testing_h5, velocity_only=self.velocity_only, batch_size=self.batch_size, shuffle=False)


def train_model(self, model_name, save_name=None, **kwargs):
"""Train model.
Args:
model_name: Name of the model you want to run. Is used to look up the class in "model_dict"
save_name (optional): If specified, this name will be used for creating the checkpoint and logging directory.
"""
if save_name is None:
save_name = model_name

# logger
logger = WandbLogger(project='SMI',
group=model_name, log_model=True,
save_dir=os.path.join(self.checkpoint_dir, save_name))

# callbacks
# early stopping
early_stop_callback = EarlyStopping(monitor="val_loss",
min_delta=0.00,
patience=5,
verbose=True,
mode="min")
checkpoint_callback = ModelCheckpoint(save_weights_only=True,
mode="max", monitor="val_acc")
# Save the best checkpoint based on the maximum val_acc recorded.
# Saves only weights and not optimizer

# Create a PyTorch Lightning trainer with the generation callback
trainer = L.Trainer(
default_root_dir=os.path.join(self.checkpoint_dir, save_name),
accelerator="gpu",
devices=[0],
max_epochs=180,
callbacks=[early_stop_callback, checkpoint_callback],
check_val_every_n_epoch=10,
logger=logger
)

# L.seed_everything(42) # To be reproducible
model = VelocityDecoder(model_name=model_name, **kwargs)
trainer.fit(model, self.train_loader, self.valid_loader)

# Load best checkpoint after training
model = VelocityDecoder.load_from_checkpoint(
trainer.checkpoint_callback.best_model_path)

# Test best model on validation and test set
val_result = trainer.test(model, dataloaders=self.valid_loader,
verbose=False)
test_result = trainer.test(model, dataloaders=self.test_loader,
verbose=False)
result = {"test": test_result[0]["test_acc"],
"val": val_result[0]["test_acc"]}

logger.experiment.finish()

return model, result

def scan_hyperparams(self):
for lr in [1e-3, 1e-2, 3e-2]:

model_config = {"input_size": self.input_size,
"output_size": self.output_size}
optimizer_config = {"lr": lr}
#"momentum": 0.9,}
misc_config = {"batch_size": self.batch_size}

self.train_model(model_name="CNN",
model_hparams=model_config,
optimizer_name="Adam",
optimizer_hparams=optimizer_config,
misc_hparams=misc_config)

def load_model(self):
Check whether pretrained model exists. If yes, load it and skip training
pretrained_filename = os.path.join(self.checkpoint_dir, "SMI", "f63rieqp",
"checkpoints", "*" + ".ckpt")
print(pretrained_filename)
if os.path.isfile(glob.glob(pretrained_filename)[0]):
pretrained_filename = glob.glob(pretrained_filename)[0]
print(
f"Found pretrained model at {pretrained_filename}, loading...")
# Automatically loads the model with the saved hyperparameters
model = VelocityDecoder.load_from_checkpoint(pretrained_filename)

# Create a PyTorch Lightning trainer with the generation callback
trainer = L.Trainer(
accelerator="gpu",
devices=[0]
)

# Test best model on validation and test set
val_result = trainer.test(model, dataloaders=self.valid_loader,
verbose=False)
test_result = trainer.test(model, dataloaders=self.test_loader,
verbose=False)
result = {"test": test_result[0]["test_acc"],
"val": val_result[0]["test_acc"]}

return model, result

0 comments on commit 43e5a20

Please sign in to comment.