From 6d8249ffc1608556bae9f809f95a80230551efbc Mon Sep 17 00:00:00 2001 From: Olga Lyashevska Date: Mon, 10 Jul 2023 09:38:09 +0000 Subject: [PATCH 1/6] experimenting 3 layers --- bird_cloud_gnn/gnn_model.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/bird_cloud_gnn/gnn_model.py b/bird_cloud_gnn/gnn_model.py index e25ab2b..9d02eea 100644 --- a/bird_cloud_gnn/gnn_model.py +++ b/bird_cloud_gnn/gnn_model.py @@ -40,8 +40,12 @@ def __init__(self, in_feats: int, h_feats: int, num_classes: int): self.in_feats = in_feats self.h_feats = h_feats self.num_classes = num_classes + # self.conv1 = GraphConv(in_feats, h_feats) + # self.conv2 = GraphConv(h_feats, h_feats) + # self.conv3 = GraphConv(h_feats, num_classes) self.conv1 = GraphConv(in_feats, h_feats) self.conv2 = GraphConv(h_feats, num_classes) + # self.dropout = nn.Dropout(0.2) def oneline_description(self): """Description of the model to uniquely identify it in logs""" @@ -70,6 +74,9 @@ def forward(self, g, in_feat): h = self.conv1(g, in_feat) h = F.relu(h) h = self.conv2(g, h) + # h = F.relu(h) + # h = self.dropout(h) + # h = self.conv3(g, h) g.ndata["h"] = h return dgl.mean_nodes(g, "h") @@ -222,8 +229,12 @@ def fit_and_evaluate( epoch_values["Accuracy/test"] = num_correct / num_total epoch_values["Layer/conv1"] = self.conv1.weight.detach() epoch_values["Layer/conv2"] = self.conv2.weight.detach() +<<<<<<< HEAD for i, pg in enumerate(optimizer.param_groups): epoch_values[f"LearningRate/ParGrp{i}"] = pg["lr"] +======= + # epoch_values["Layer/conv3"] = self.conv3.weight.detach() +>>>>>>> experimenting 3 layers if self.num_classes == 2: epoch_values["FalseNegativeRate/test"] = num_false_negative / num_total epoch_values["FalsePositiveRate/test"] = num_false_positive / num_total From 541c5b54db92230e8a3de1c7da11529c4570cfd7 Mon Sep 17 00:00:00 2001 From: Olga Lyashevska Date: Tue, 11 Jul 2023 14:12:49 +0000 Subject: [PATCH 2/6] Added dynamic layers --- bird_cloud_gnn/gnn_model.py | 63 +++++++++++++++++++------------------ tests/test_gnn_model.py | 11 ++++--- 2 files changed, 38 insertions(+), 36 deletions(-) diff --git a/bird_cloud_gnn/gnn_model.py b/bird_cloud_gnn/gnn_model.py index 9d02eea..efe76da 100644 --- a/bird_cloud_gnn/gnn_model.py +++ b/bird_cloud_gnn/gnn_model.py @@ -3,12 +3,13 @@ import os import dgl import numpy as np -import torch.nn.functional as F + from dgl.dataloading import GraphDataLoader from dgl.nn.pytorch.conv import GraphConv from torch import nn from torch import optim from tqdm import tqdm +from torch.nn.modules import Module os.environ["DGLBACKEND"] = "pytorch" @@ -17,11 +18,11 @@ class GCN(nn.Module): """Graph Convolutional Network construction module - A two-layer GCN is constructed from input dimension, hidden dimensions and number of classes. - Each layer computes new node representations by aggregating neighbor information. + A n-layer GCN is constructed from input features and list of layers + Each layer computes new node representations by aggregating neighbour information. """ - def __init__(self, in_feats: int, h_feats: int, num_classes: int): + def __init__(self, in_feats: int, layers_data: list): """ The __init__ function is the constructor for a class. It is called when an object of that class is instantiated. It can have multiple arguments and it will always be called before __new__(). @@ -30,36 +31,38 @@ def __init__(self, in_feats: int, h_feats: int, num_classes: int): Args: self: Access variables that belongs to the class object in_feats: the number of input features - h_feats: the number of hidden features that we want to use for our first graph convolutional layer - num_classes: the number of classes that we want to predict + layers_data: is a list of tuples of size of hidden layer and activation function Returns: The self object """ super().__init__() self.in_feats = in_feats - self.h_feats = h_feats - self.num_classes = num_classes - # self.conv1 = GraphConv(in_feats, h_feats) - # self.conv2 = GraphConv(h_feats, h_feats) - # self.conv3 = GraphConv(h_feats, num_classes) - self.conv1 = GraphConv(in_feats, h_feats) - self.conv2 = GraphConv(h_feats, num_classes) - # self.dropout = nn.Dropout(0.2) + self.layers = nn.ModuleList() + self.name = "" + for size, activation in layers_data: + self.layers.append(GraphConv(in_feats, size)) + self.name = self.name + f"{in_feats}-{size}_" + in_feats = size # For the next layer + if activation is not None: + assert isinstance( + activation, Module + ), "Each tuples should contain a size (int) and a torch.nn.modules.Module." + self.layers.append(activation) + self.name = self.name + "ReLU_" + self.num_classes = size # the last size should correspond to the number of classes were predicting def oneline_description(self): """Description of the model to uniquely identify it in logs""" return "-".join( [ - "in", - f"GC_{self.h_feats}", - "RELU", - f"GC_{self.num_classes}", + "in_", + f"{self.name}", "mean-out", ] ) - def forward(self, g, in_feat): + def forward(self, g, in_feats): """ The forward function computes the output of the model. @@ -71,13 +74,13 @@ def forward(self, g, in_feat): Returns: The output of the second convolutional layer """ - h = self.conv1(g, in_feat) - h = F.relu(h) - h = self.conv2(g, h) - # h = F.relu(h) - # h = self.dropout(h) - # h = self.conv3(g, h) - g.ndata["h"] = h + for layer in self.layers: + if isinstance(layer, nn.ReLU): + in_feats = layer(in_feats) + else: + in_feats = layer(g, in_feats) + + g.ndata["h"] = in_feats return dgl.mean_nodes(g, "h") def fit(self, train_dataloader, learning_rate=0.01, num_epochs=20): @@ -227,14 +230,12 @@ def fit_and_evaluate( epoch_values["Loss/test"] = test_loss epoch_values["Accuracy/test"] = num_correct / num_total - epoch_values["Layer/conv1"] = self.conv1.weight.detach() - epoch_values["Layer/conv2"] = self.conv2.weight.detach() -<<<<<<< HEAD + # to visualise distribution of tensors + # epoch_values["Layer/conv1"] = self.conv1.weight.detach() + for i, pg in enumerate(optimizer.param_groups): epoch_values[f"LearningRate/ParGrp{i}"] = pg["lr"] -======= # epoch_values["Layer/conv3"] = self.conv3.weight.detach() ->>>>>>> experimenting 3 layers if self.num_classes == 2: epoch_values["FalseNegativeRate/test"] = num_false_negative / num_total epoch_values["FalsePositiveRate/test"] = num_false_positive / num_total diff --git a/tests/test_gnn_model.py b/tests/test_gnn_model.py index b3509c0..f73e4ee 100644 --- a/tests/test_gnn_model.py +++ b/tests/test_gnn_model.py @@ -6,6 +6,7 @@ from bird_cloud_gnn.callback import EarlyStopperCallback from bird_cloud_gnn.callback import TensorboardCallback from bird_cloud_gnn.gnn_model import GCN +from torch import nn def test_gnn_model(dataset_fixture): @@ -30,7 +31,7 @@ def test_gnn_model(dataset_fixture): drop_last=False, ) - model = GCN(len(dataset_fixture.features), 16, 2) + model = GCN(len(dataset_fixture.features), [(16, nn.ReLU()), (2, None)]) model.fit(train_dataloader) model.evaluate(test_dataloader) @@ -53,13 +54,13 @@ class TestBasicBehaviour: def test_field_access(self): """Test field access""" - model = GCN(in_feats=10, h_feats=16, num_classes=2) + model = GCN(in_feats=10, layers_data=[(16, nn.ReLU()), (2, None)]) assert model.in_feats == 10 - assert model.h_feats == 16 + assert model.name == "10-16_ReLU_16-2_" assert model.num_classes == 2 def test_inequality(self): """Test inequality of created GCN classes""" - model1 = GCN(in_feats=10, h_feats=16, num_classes=2) - model2 = GCN(in_feats=15, h_feats=16, num_classes=5) + model1 = GCN(in_feats=10, layers_data=[(16, nn.ReLU()), (2, None)]) + model2 = GCN(in_feats=15, layers_data=[(16, nn.ReLU()), (2, None)]) assert model1 != model2 From 3ee23ce8df2801318e73a57a96631f022980f2dc Mon Sep 17 00:00:00 2001 From: Olga Lyashevska Date: Tue, 11 Jul 2023 14:30:32 +0000 Subject: [PATCH 3/6] testing dynamic layers --- bird_cloud_gnn/gnn_model.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/bird_cloud_gnn/gnn_model.py b/bird_cloud_gnn/gnn_model.py index efe76da..75cd684 100644 --- a/bird_cloud_gnn/gnn_model.py +++ b/bird_cloud_gnn/gnn_model.py @@ -8,8 +8,8 @@ from dgl.nn.pytorch.conv import GraphConv from torch import nn from torch import optim -from tqdm import tqdm from torch.nn.modules import Module +from tqdm import tqdm os.environ["DGLBACKEND"] = "pytorch" @@ -50,17 +50,13 @@ def __init__(self, in_feats: int, layers_data: list): ), "Each tuples should contain a size (int) and a torch.nn.modules.Module." self.layers.append(activation) self.name = self.name + "ReLU_" - self.num_classes = size # the last size should correspond to the number of classes were predicting + self.num_classes = ( + size + ) # the last size should correspond to the number of classes were predicting def oneline_description(self): """Description of the model to uniquely identify it in logs""" - return "-".join( - [ - "in_", - f"{self.name}", - "mean-out", - ] - ) + return "-".join(["in_", f"{self.name}", "mean-out"]) def forward(self, g, in_feats): """ @@ -263,10 +259,7 @@ def infer(self, dataset, batch_size=1024): """ self.eval() dataloader = GraphDataLoader( - shuffle=False, - dataset=dataset, - batch_size=batch_size, - drop_last=False, + shuffle=False, dataset=dataset, batch_size=batch_size, drop_last=False ) labels = np.array([]) for batched_graph, _ in dataloader: From 2564fbaca043f18c05430342dd7069fb48e4a6be Mon Sep 17 00:00:00 2001 From: Olga Lyashevska Date: Tue, 11 Jul 2023 16:45:40 +0200 Subject: [PATCH 4/6] Cross validation tests --- bird_cloud_gnn/cross_validation.py | 16 +++++++--------- bird_cloud_gnn/gnn_model.py | 10 +++------- tests/test_cross_validation.py | 11 +++++++---- tests/test_gnn_model.py | 12 +++++++----- 4 files changed, 24 insertions(+), 25 deletions(-) diff --git a/bird_cloud_gnn/cross_validation.py b/bird_cloud_gnn/cross_validation.py index b7bf797..91c1277 100644 --- a/bird_cloud_gnn/cross_validation.py +++ b/bird_cloud_gnn/cross_validation.py @@ -2,11 +2,11 @@ """ import numpy as np import pandas as pd +from bird_cloud_gnn.gnn_model import GCN from dgl.dataloading import GraphDataLoader from sklearn.model_selection import StratifiedKFold from torch.utils.data.sampler import SubsetRandomSampler from tqdm import tqdm -from bird_cloud_gnn.gnn_model import GCN def get_dataloaders(dataset, train_idx, test_idx, batch_size): @@ -32,7 +32,7 @@ def get_dataloaders(dataset, train_idx, test_idx, batch_size): # pylint: disable=too-many-arguments, too-many-locals def kfold_evaluate( dataset, - h_feats=16, + layers_data, n_splits=5, learning_rate=0.01, num_epochs=100, @@ -43,7 +43,7 @@ def kfold_evaluate( Args: dataset (RadarDataset): The dataset - h_feats (int, optional): The number of hidden features of the model + layers_data (list): The list of input size and activation n_splits (int, optional): Number of folds. Defaults to 5. learning_rate (float, optional): Learning rate. Defaults to 0.01. num_epochs (int, optional): Training epochs. Defaults to 20. @@ -62,8 +62,7 @@ def kfold_evaluate( model = GCN( in_feats=len(dataset.features), - h_feats=h_feats, - num_classes=2, + layers_data=layers_data, ) model.fit(train_dataloader, learning_rate=learning_rate, num_epochs=num_epochs) @@ -76,7 +75,7 @@ def kfold_evaluate( def leave_one_origin_out_evaluate( dataset, - h_feats=16, + layers_data, learning_rate=0.01, num_epochs=100, batch_size=512, @@ -88,7 +87,7 @@ def leave_one_origin_out_evaluate( Args: dataset (RadarDataset): The dataset. - h_feats (int, optional): The number of hidden features of the model + layers_data (list): The list of input size and activation n_splits (int, optional): Number of folds. Defaults to 5. learning_rate (float, optional): Learning rate. Defaults to 0.01. num_epochs (int, optional): Training epochs. Defaults to 20. @@ -110,8 +109,7 @@ def leave_one_origin_out_evaluate( model = GCN( in_feats=len(dataset.features), - h_feats=h_feats, - num_classes=2, + layers_data=layers_data, ) model.fit(train_dataloader, learning_rate=learning_rate, num_epochs=num_epochs) diff --git a/bird_cloud_gnn/gnn_model.py b/bird_cloud_gnn/gnn_model.py index 75cd684..96cab62 100644 --- a/bird_cloud_gnn/gnn_model.py +++ b/bird_cloud_gnn/gnn_model.py @@ -1,17 +1,15 @@ """Module for creating GCN class""" import os + import dgl import numpy as np - from dgl.dataloading import GraphDataLoader from dgl.nn.pytorch.conv import GraphConv -from torch import nn -from torch import optim +from torch import nn, optim from torch.nn.modules import Module from tqdm import tqdm - os.environ["DGLBACKEND"] = "pytorch" @@ -50,9 +48,7 @@ def __init__(self, in_feats: int, layers_data: list): ), "Each tuples should contain a size (int) and a torch.nn.modules.Module." self.layers.append(activation) self.name = self.name + "ReLU_" - self.num_classes = ( - size - ) # the last size should correspond to the number of classes were predicting + self.num_classes = size # the last size should correspond to the number of classes were predicting def oneline_description(self): """Description of the model to uniquely identify it in logs""" diff --git a/tests/test_cross_validation.py b/tests/test_cross_validation.py index 6062338..ea6fe9c 100644 --- a/tests/test_cross_validation.py +++ b/tests/test_cross_validation.py @@ -1,6 +1,9 @@ """Tests for cross_validation""" -from bird_cloud_gnn.cross_validation import kfold_evaluate -from bird_cloud_gnn.cross_validation import leave_one_origin_out_evaluate +from bird_cloud_gnn.cross_validation import ( + kfold_evaluate, + leave_one_origin_out_evaluate, +) +from torch import nn def test_kfold_evaluate(dataset_fixture): @@ -8,7 +11,7 @@ def test_kfold_evaluate(dataset_fixture): kfold_evaluate( dataset_fixture, - h_feats=32, + layers_data=[(32, nn.ReLU()), (2, None)], ) @@ -17,5 +20,5 @@ def test_leave_one_out_evaluate(dataset_fixture): leave_one_origin_out_evaluate( dataset_fixture, - h_feats=32, + layers_data=[(32, nn.ReLU()), (2, None)], ) diff --git a/tests/test_gnn_model.py b/tests/test_gnn_model.py index f73e4ee..60c3a22 100644 --- a/tests/test_gnn_model.py +++ b/tests/test_gnn_model.py @@ -1,12 +1,14 @@ """Tests for gnn_model module""" import torch -from dgl.dataloading import GraphDataLoader -from torch.utils.data.sampler import SubsetRandomSampler -from bird_cloud_gnn.callback import CombinedCallback -from bird_cloud_gnn.callback import EarlyStopperCallback -from bird_cloud_gnn.callback import TensorboardCallback +from bird_cloud_gnn.callback import ( + CombinedCallback, + EarlyStopperCallback, + TensorboardCallback, +) from bird_cloud_gnn.gnn_model import GCN +from dgl.dataloading import GraphDataLoader from torch import nn +from torch.utils.data.sampler import SubsetRandomSampler def test_gnn_model(dataset_fixture): From c3c21edffd3e264048ebbbf5f845d7c20cb7afcc Mon Sep 17 00:00:00 2001 From: Bart Date: Tue, 11 Jul 2023 18:43:21 +0200 Subject: [PATCH 5/6] optionally allow for differen activations --- bird_cloud_gnn/cross_validation.py | 2 +- bird_cloud_gnn/gnn_model.py | 9 +++++---- tests/test_cross_validation.py | 6 ++---- tests/test_gnn_model.py | 17 +++++++++++------ 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/bird_cloud_gnn/cross_validation.py b/bird_cloud_gnn/cross_validation.py index 91c1277..6bdc7c9 100644 --- a/bird_cloud_gnn/cross_validation.py +++ b/bird_cloud_gnn/cross_validation.py @@ -2,11 +2,11 @@ """ import numpy as np import pandas as pd -from bird_cloud_gnn.gnn_model import GCN from dgl.dataloading import GraphDataLoader from sklearn.model_selection import StratifiedKFold from torch.utils.data.sampler import SubsetRandomSampler from tqdm import tqdm +from bird_cloud_gnn.gnn_model import GCN def get_dataloaders(dataset, train_idx, test_idx, batch_size): diff --git a/bird_cloud_gnn/gnn_model.py b/bird_cloud_gnn/gnn_model.py index 96cab62..401d9f3 100644 --- a/bird_cloud_gnn/gnn_model.py +++ b/bird_cloud_gnn/gnn_model.py @@ -1,15 +1,16 @@ """Module for creating GCN class""" import os - import dgl import numpy as np from dgl.dataloading import GraphDataLoader from dgl.nn.pytorch.conv import GraphConv -from torch import nn, optim +from torch import nn +from torch import optim from torch.nn.modules import Module from tqdm import tqdm + os.environ["DGLBACKEND"] = "pytorch" @@ -47,7 +48,7 @@ def __init__(self, in_feats: int, layers_data: list): activation, Module ), "Each tuples should contain a size (int) and a torch.nn.modules.Module." self.layers.append(activation) - self.name = self.name + "ReLU_" + self.name = self.name + repr(activation).split("(", 1)[0] + "_" self.num_classes = size # the last size should correspond to the number of classes were predicting def oneline_description(self): @@ -67,7 +68,7 @@ def forward(self, g, in_feats): The output of the second convolutional layer """ for layer in self.layers: - if isinstance(layer, nn.ReLU): + if isinstance(layer, (nn.ReLU, nn.LeakyReLU, nn.ELU)): in_feats = layer(in_feats) else: in_feats = layer(g, in_feats) diff --git a/tests/test_cross_validation.py b/tests/test_cross_validation.py index ea6fe9c..954e664 100644 --- a/tests/test_cross_validation.py +++ b/tests/test_cross_validation.py @@ -1,9 +1,7 @@ """Tests for cross_validation""" -from bird_cloud_gnn.cross_validation import ( - kfold_evaluate, - leave_one_origin_out_evaluate, -) from torch import nn +from bird_cloud_gnn.cross_validation import kfold_evaluate +from bird_cloud_gnn.cross_validation import leave_one_origin_out_evaluate def test_kfold_evaluate(dataset_fixture): diff --git a/tests/test_gnn_model.py b/tests/test_gnn_model.py index 60c3a22..aa95440 100644 --- a/tests/test_gnn_model.py +++ b/tests/test_gnn_model.py @@ -1,14 +1,12 @@ """Tests for gnn_model module""" import torch -from bird_cloud_gnn.callback import ( - CombinedCallback, - EarlyStopperCallback, - TensorboardCallback, -) -from bird_cloud_gnn.gnn_model import GCN from dgl.dataloading import GraphDataLoader from torch import nn from torch.utils.data.sampler import SubsetRandomSampler +from bird_cloud_gnn.callback import CombinedCallback +from bird_cloud_gnn.callback import EarlyStopperCallback +from bird_cloud_gnn.callback import TensorboardCallback +from bird_cloud_gnn.gnn_model import GCN def test_gnn_model(dataset_fixture): @@ -66,3 +64,10 @@ def test_inequality(self): model1 = GCN(in_feats=10, layers_data=[(16, nn.ReLU()), (2, None)]) model2 = GCN(in_feats=15, layers_data=[(16, nn.ReLU()), (2, None)]) assert model1 != model2 + + def test_inequality_activation(self): + """Test inequality of created GCN classes with different activation""" + model1 = GCN(in_feats=10, layers_data=[(16, nn.ReLU()), (2, None)]) + model2 = GCN(in_feats=10, layers_data=[(16, nn.ELU()), (2, None)]) + assert model1 != model2 + assert model2.name == "10-16_ELU_16-2_" From ed18edbe232d3fdef95602f150752683fe24c2c6 Mon Sep 17 00:00:00 2001 From: Bart Date: Tue, 11 Jul 2023 19:08:08 +0200 Subject: [PATCH 6/6] restore layer info in tensor board --- bird_cloud_gnn/gnn_model.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bird_cloud_gnn/gnn_model.py b/bird_cloud_gnn/gnn_model.py index 401d9f3..4698139 100644 --- a/bird_cloud_gnn/gnn_model.py +++ b/bird_cloud_gnn/gnn_model.py @@ -223,12 +223,13 @@ def fit_and_evaluate( epoch_values["Loss/test"] = test_loss epoch_values["Accuracy/test"] = num_correct / num_total - # to visualise distribution of tensors - # epoch_values["Layer/conv1"] = self.conv1.weight.detach() for i, pg in enumerate(optimizer.param_groups): epoch_values[f"LearningRate/ParGrp{i}"] = pg["lr"] - # epoch_values["Layer/conv3"] = self.conv3.weight.detach() + # to visualise distribution of tensors + for i, layer in enumerate(self.layers): + if not isinstance(layer, (nn.ReLU, nn.LeakyReLU, nn.ELU)): + epoch_values[f"Layer/conv{i}"] = layer.weight.detach() if self.num_classes == 2: epoch_values["FalseNegativeRate/test"] = num_false_negative / num_total epoch_values["FalsePositiveRate/test"] = num_false_positive / num_total