Skip to content

Commit 7f0771c

Browse files
author
Katya Govorkova
committed
Move to pyTorch form TF!!
1 parent 67ffe98 commit 7f0771c

File tree

6 files changed

+242
-164
lines changed

6 files changed

+242
-164
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,4 +159,6 @@ cython_debug/
159159
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
160160
#.idea/
161161

162-
output/
162+
*.DS_Store
163+
output/
164+
.snakemake/

Snakefile

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
rule train_model:
2-
input:
2+
params:
3+
data_path = '/home/katya.govorkova/challenge_datasets/ligo_datasets/output'
34
output:
4-
model = directory('output/saved_model')
5+
model = 'output/model.pth'
56
shell:
6-
'python3 scripts/train.py {output.model}'
7+
'python3 scripts/train.py {params.data_path} {output.model}'
8+
79

810
rule evaluate_on_blackbox:
911
input:
10-
rules.train_model.output.model
12+
model = rules.train_model.output.model
13+
params:
14+
data_path = '/home/katya.govorkova/challenge_datasets/ligo_datasets/output'
1115
output:
16+
submission = 'output/submission.npy'
1217
shell:
13-
'python3 scripts/evaluate.py'
18+
'python3 scripts/evaluate.py {params.data_path} {input.model} {output.submission}'

scripts/dataset.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import torch
2+
from torch.utils.data import Dataset
3+
4+
5+
class TorchADDataset(Dataset):
6+
'Characterizes a dataset for PyTorch'
7+
def __init__(self, features, labels, device):
8+
'Initialization'
9+
self.device = device
10+
self.features = torch.from_numpy(features).to(dtype=torch.float32, device=self.device)
11+
self.labels = torch.from_numpy(labels).to(dtype=torch.float32, device=self.device)
12+
13+
def __len__(self):
14+
'Denotes the total number of samples'
15+
return len(self.features)
16+
17+
def __getitem__(self, index):
18+
'Generates one sample of data'
19+
# Load data and get label
20+
X = self.features[index]
21+
y = self.labels[index]
22+
23+
return X, y

scripts/evaluate.py

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,59 @@
1-
pretrained_model = tf.keras.models.load_model(os.path.join(data_path, 'saved_model/my_model'))
1+
import os
2+
import argparse
3+
import numpy as np
4+
from sklearn import metrics
25

3-
# Check its architecture
4-
pretrained_model.summary()
6+
import torch
7+
import torch.nn as nn
8+
import torch.nn.functional as F
59

6-
# load challenge test data
7-
blackbox = np.load(os.path.join(data_path, 'ligo_blackbox.npz'))['data'].reshape((-1,200,2))
8-
print('Blackbox shape:', blackbox.shape)
910

10-
blackbox_prediction = model.predict(blackbox)
11-
np.save('submission.npy', blackbox_prediction)
11+
from model import ADClassifier
12+
13+
14+
def main(args):
15+
16+
# Model class must be defined somewhere
17+
18+
pretrained_model = ADClassifier()
19+
pretrained_model.load_state_dict(torch.load(args.modeldir))
20+
pretrained_model.eval()
21+
22+
# Check its architecture
23+
print(pretrained_model)
24+
25+
# load challenge test data
26+
blackbox = np.load(os.path.join(args.data_path, 'ligo_blackbox.npz'))['data'].reshape((-1,200,2))
27+
print('Blackbox shape:', blackbox.shape)
28+
29+
# transform to float64
30+
x = torch.from_numpy(blackbox)
31+
x = x.to(torch.float32)
32+
33+
blackbox_prediction = pretrained_model(x)
34+
np.save(args.submission_path, blackbox_prediction.detach().numpy())
35+
36+
# scores = pretrained_model(x_val)
37+
# fpr, tpr, thresholds = metrics.roc_curve(y_val, scores)
38+
# auc = metrics.roc_auc_score(y_val, scores)
39+
# print(f'The total AUC is {auc*100:.1f} %')
40+
# plt.plot(fpr, tpr)
41+
# plt.xlabel('FPR')
42+
# plt.ylabel('TPR')
43+
# plt.savefig('output/ROC.pdf')
44+
45+
46+
if __name__ == '__main__':
47+
48+
parser = argparse.ArgumentParser()
49+
50+
# Required arguments
51+
parser.add_argument('data_path', type=str,
52+
help='Path to the input dataset')
53+
parser.add_argument('modeldir', type=str,
54+
help='Where to save the model')
55+
parser.add_argument('submission_path', type=str,
56+
help='Where to save the model')
57+
58+
args = parser.parse_args()
59+
main(args)

scripts/model.py

Lines changed: 19 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,26 @@
1-
import os
2-
import numpy as np
1+
import torch
2+
import torch.nn as nn
3+
import torch.nn.functional as F
34

4-
import tensorflow as tf
5-
from tensorflow import keras
6-
from tensorflow.keras import layers
75

8-
from matplotlib import pyplot as plt
9-
from sklearn import metrics
10-
from sklearn.model_selection import train_test_split
6+
class ADClassifier(nn.Module):
117

8+
def __init__(self):
9+
super(ADClassifier, self).__init__()
1210

13-
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
14-
"""
15-
## Build the model
11+
self.n_head = 1
12+
self.flat1 = nn.Flatten()
13+
self.encoder_layer = nn.TransformerEncoderLayer(d_model=400, nhead=self.n_head)
14+
self.fc1 = nn.Linear(400*self.n_head, 400)
15+
self.flat2 = nn.Flatten()
16+
self.fc2 = nn.Linear(400, 1)
1617

17-
Our model processes a tensor of shape `(batch size, sequence length, features)`,
18-
where `sequence length` is the number of time steps and `features` is each input
19-
timeseries.
18+
def forward(self, x):
2019

21-
We include residual connections, layer normalization, and dropout.
22-
The resulting layer can be stacked multiple times.
20+
x = self.flat1(x)
21+
x = self.encoder_layer(x)
22+
x = F.relu(self.fc1(x))
23+
x = self.flat2(x)
24+
x = F.sigmoid(self.fc2(x))
2325

24-
The projection layers are implemented through `keras.layers.Conv1D`.
25-
26-
"""
27-
# Attention and Normalization
28-
x = layers.MultiHeadAttention(
29-
key_dim=head_size, num_heads=num_heads, dropout=dropout
30-
)(inputs, inputs)
31-
x = layers.Dropout(dropout)(x)
32-
x = layers.LayerNormalization(epsilon=1e-6)(x)
33-
res = x + inputs
34-
35-
# Feed Forward Part
36-
x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res)
37-
x = layers.Dropout(dropout)(x)
38-
x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
39-
x = layers.LayerNormalization(epsilon=1e-6)(x)
40-
return x + res
41-
42-
43-
def build_model(
44-
input_shape,
45-
head_size,
46-
num_heads,
47-
ff_dim,
48-
num_transformer_blocks,
49-
mlp_units,
50-
dropout=0,
51-
mlp_dropout=0,
52-
):
53-
"""
54-
The main part of our model is now complete. We can stack multiple of those
55-
`transformer_encoder` blocks and we can also proceed to add the final
56-
Multi-Layer Perceptron classification head. Apart from a stack of `Dense`
57-
layers, we need to reduce the output tensor of the `TransformerEncoder` part of
58-
our model down to a vector of features for each data point in the current
59-
batch. A common way to achieve this is to use a pooling layer. For
60-
this example, a `GlobalAveragePooling1D` layer is sufficient.
61-
"""
62-
inputs = keras.Input(shape=input_shape)
63-
x = inputs
64-
for _ in range(num_transformer_blocks):
65-
x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
66-
67-
x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
68-
for dim in mlp_units:
69-
x = layers.Dense(dim, activation="relu")(x)
70-
x = layers.Dropout(mlp_dropout)(x)
71-
outputs = layers.Dense(1, activation="sigmoid")(x)
72-
return keras.Model(inputs, outputs)
26+
return x

0 commit comments

Comments
 (0)