diff --git a/examples/example_02.py b/examples/example_02.py index 2b9f10a..d789d0f 100644 --- a/examples/example_02.py +++ b/examples/example_02.py @@ -14,6 +14,7 @@ num_heads=4, num_transformer_blocks=4, optimizer=keras.optimizers.Adam(learning_rate=1e-4), + random_state=SEED, ) # Split into train and test sets diff --git a/examples/example_03.py b/examples/example_03.py index 3445e90..8eaae59 100644 --- a/examples/example_03.py +++ b/examples/example_03.py @@ -7,13 +7,14 @@ LSTMModel, RandomForestModel, SVMModel, - TransformerModel, # TODO: Falta XGBoost + TransformerModel, + XGBoostModel, ) -SEED = 0 # TODO: Use this for reproducibility +SEED = 0 # Random seed for reproducibility -dataset = Dataset.mnist_stroke() -train, test = dataset.cut(60_000) +dataset = Dataset.uci_characters() +train, test = dataset.split(.8, random_state=SEED) featurizer = featurizers.UniversalFeaturizer() vectorized_models = [ @@ -24,6 +25,7 @@ bootstrap=False, warm_start=True, n_jobs=6, + random_state=SEED, ), KNeighborsModel( featurizer=featurizer, @@ -32,9 +34,15 @@ DecisionTreeModel( featurizer=featurizer, max_depth=7, + random_state=SEED, ), SVMModel( featurizer=featurizer, + random_state=SEED, + ), + XGBoostModel( + featurizer=featurizer, + random_state=SEED, ), ] @@ -44,12 +52,14 @@ num_heads=4, num_transformer_blocks=4, optimizer=keras.optimizers.Adam(learning_rate=1e-4), + random_state=SEED, ) lstm = LSTMModel( loss="sparse_categorical_crossentropy", optimizer="rmsprop", metrics=["accuracy"], + random_state=SEED, ) # Train and evaluate vectorized models @@ -61,7 +71,7 @@ # Train and evaluate LSTM model checkpoint = keras.callbacks.ModelCheckpoint( - "partially_trained_model_lstm_mnist_stroke.h5", + f"partially_trained_model_lstm_{dataset.name}.h5", monitor="loss", verbose=1, save_best_only=True, @@ -73,7 +83,7 @@ # Train and evaluate Transformer model checkpoint = keras.callbacks.ModelCheckpoint( - "partially_trained_model_transformer_mnist_stroke.h5", + f"partially_trained_model_transformer_{dataset.name}.h5", monitor="loss", verbose=1, save_best_only=True, diff --git a/examples/example_04.py b/examples/example_04.py index c40b494..2c0c6d1 100644 --- a/examples/example_04.py +++ b/examples/example_04.py @@ -23,7 +23,10 @@ # Split the dataset into train and test train, test = dataset.filter( lambda traj, _: len(traj) >= 5 and traj.r.delta.norm.sum() > 0 - ).split(train_size=0.7, random_state=SEED) + ).split( + train_size=0.7, + random_state=SEED, + ) # Select the desired features to be extracted from the trajectories featurizer = featurizers.UniversalFeaturizer() diff --git a/pactus/models/lstm_model.py b/pactus/models/lstm_model.py index 4ff411a..63fd0f1 100644 --- a/pactus/models/lstm_model.py +++ b/pactus/models/lstm_model.py @@ -1,8 +1,11 @@ +import datetime import logging +import time from pathlib import Path from typing import Any, List, Tuple, Union import numpy as np +import tensorflow as tf from sklearn.preprocessing import LabelEncoder from tensorflow import keras from yupi import Trajectory @@ -30,6 +33,7 @@ def __init__( loss="sparse_categorical_crossentropy", optimizer="rmsprop", metrics=None, + random_state: Union[int, None] = None, **kwargs, ): super().__init__(NAME) @@ -38,6 +42,7 @@ def __init__( self.dataset: Union[Dataset, None] = None self.model: keras.Secuential self.max_len = 0 + self.random_state = random_state metrics = ["accuracy"] if metrics is None else metrics self.units = [128, 64] if units is None else units kwargs.update(dict(loss=loss, optimizer=optimizer, metrics=metrics)) @@ -111,6 +116,13 @@ def train( callbacks: Union[list, None] = None, checkpoint: Union[keras.callbacks.ModelCheckpoint, None] = None, ): + if self.random_state is not None: + tf.keras.utils.set_random_seed(self.random_state) + logging.warning( + f"Custom seed provided for {self.name} model. This " + "calls 'tf.keras.utils.set_random_seed' which sets a global " + "random state on python, numpy and tensorflow." + ) if cross_validation != 0: logging.warning("Cross validation is not supported yet for lstm") self.set_summary(epochs=epochs, validation_split=validation_split) diff --git a/pactus/models/transformer_model.py b/pactus/models/transformer_model.py index db62b6b..71342a7 100644 --- a/pactus/models/transformer_model.py +++ b/pactus/models/transformer_model.py @@ -3,6 +3,7 @@ from typing import Any, List, Tuple, Union import numpy as np +import tensorflow as tf from sklearn.model_selection import KFold from sklearn.preprocessing import LabelEncoder from tensorflow import keras @@ -39,9 +40,9 @@ def __init__( max_traj_len: int = -1, skip_long_trajs: bool = False, mask_value=cfg.MASK_VALUE, - name=NAME, + random_state: Union[int, None] = None, ): - super().__init__(name) + super().__init__(NAME) self.head_size = head_size self.num_heads = num_heads self.ff_dim = ff_dim @@ -59,6 +60,7 @@ def __init__( self.encoder: Union[LabelEncoder, None] = None self.labels: Union[List[Any], None] = None self.original_data: Union[Data, None] = None + self.random_state: Union[int, None] = random_state self.set_summary( head_size=self.head_size, num_heads=self.num_heads, @@ -85,6 +87,13 @@ def train( callbacks: Union[list, None] = None, checkpoint: Union[keras.callbacks.ModelCheckpoint, None] = None, ): + if self.random_state is not None: + tf.keras.utils.set_random_seed(self.random_state) + logging.warning( + f"Custom seed provided for {self.name} model. This " + "calls 'tf.keras.utils.set_random_seed' which sets a global " + "random state on python, numpy and tensorflow." + ) self.set_summary( cross_validation=cross_validation, epochs=epochs,