-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexperiments_digits.py
97 lines (70 loc) · 4.62 KB
/
experiments_digits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from autoencoder import Autoencoder, AutoencoderModel
from transfer import Transfer, TransferModel
def load_digits_data(batch1_classes, batch2_classes):
X, y = load_digits(return_X_y=True)
batch_1_idx = [y_i in batch1_classes for y_i in y]
batch_2_idx = [y_i in batch2_classes for y_i in y]
X_batch1, y_batch1 = X[batch_1_idx,:], y[batch_1_idx]
X_batch2, y_batch2 = X[batch_2_idx,:], y[batch_2_idx]
return X_batch1, y_batch1, X_batch2, y_batch2
def perturb_sample(x_img):
n_dim = x_img.shape[0]
n_dim_perturb = int(n_dim / 2.) # Perturb the first half of pixels
for i in range(n_dim_perturb):
x_img[i] = 0.0 # Set pixel to zero (i.e. "turn it off")
return x_img
if __name__ == "__main__":
# Load and create data set
X_batch1, y_batch1, X_batch2, y_batch2 = load_digits_data([0, 1, 2, 3, 4], [0, 1, 2, 3, 4])
input_dim = X_batch1.shape[1]
for i in range(X_batch2.shape[0]): # Apply perturbation (concept drift) to second batch
X_batch2[i,:] = perturb_sample(X_batch2[i,:])
# Split the two sets/batches each further into a training and test set
kf = KFold(n_splits=5)
for train_index, test_index in kf.split(X_batch1):
X_batch1_train, X_batch1_test, y_batch1_train, y_batch1_test = X_batch1[train_index, :], X_batch1[test_index,:], y_batch1[train_index], y_batch1[test_index]
X_batch2_train, X_batch2_test, y_batch2_train, y_batch2_test = X_batch2[train_index, :], X_batch2[test_index,:], y_batch2[train_index], y_batch2[test_index]
print(np.unique(y_batch1_train, return_counts=True))
# Fit a classifier as a downstream task
model = LogisticRegression()
model.fit(X_batch1_train, y_batch1_train)
print(f"Classifier score on clean test set: {model.score(X_batch1_test, y_batch1_test)}")
print(f"Classifier score on faulty test set: {model.score(X_batch2_test, y_batch2_test)}")
# Fit autoencoder for concept drift detection
ae_model = AutoencoderModel(features=[32, 16, 32, input_dim], input_dim=input_dim)
ae = Autoencoder(ae_model, C=.1)
X_batch1_test_pred_0 = ae_model(X_batch1_test);score_0 = np.mean(np.square(X_batch1_test - X_batch1_test_pred_0))
ae.fit(X_batch1_train, n_iter=1000, n_trials=5, step_size=1e-3, verbose=False)
X_batch1_test_pred = ae_model(X_batch1_test)
score = np.mean(np.square(X_batch1_test - X_batch1_test_pred))
print(f"Autoencoder score before training the autoencoder: {score_0}")
print(f"Autoencoder score after tranining the autoencoder: {score}")
# Sanity check: Apply classifier to reconstructed samples!
print(f"Classifier score on clean test set: {model.score(X_batch1_test, y_batch1_test)}")
print(f"Classifier score on reconstructed clean test set: {model.score(X_batch1_test_pred, y_batch1_test)}")
# Evaluate autoencoder on drifted data
X_batch2_test_pred = ae_model(X_batch2_test)
score_untransformed = np.mean(np.square(X_batch2_test - X_batch2_test_pred))
print(f"Autoencoder score on fauly data: {score_untransformed}")
print(f"Classifier score on faulty reconstructed test set: {model.score(X_batch2_test_pred, y_batch2_test)}")
# Fit transfer function
transfer_model = TransferModel(features=[input_dim], input_dim=input_dim)
transfer = Transfer(transfer_model, ae, C=.00001)
transfer.fit(X_batch2_train, n_iter=500, step_size=None, verbose=False)
X_batch2_test_transformed = transfer_model(X_batch2_test)
X_batch2_test_transformed_pred = ae_model(X_batch2_test_transformed)
score_transformed = np.mean(np.square(X_batch2_test_transformed - X_batch2_test_transformed_pred))
print(f"Autoencoder score on faulty AFTER transforming the data: {score_transformed}")
# Evaluate downstream task
print(f"Classifier score on faulty test set: {model.score(X_batch2_test, y_batch2_test)}")
print(f"Classifier score on transformed faulty test set: {model.score(X_batch2_test_transformed, y_batch2_test)}")
print(classification_report(y_batch1_test, model.predict(X_batch1_test)))
print(classification_report(y_batch2_test, model.predict(X_batch2_test)))
print(classification_report(y_batch2_test, model.predict(X_batch2_test_pred)))
print(classification_report(y_batch2_test, model.predict(X_batch2_test_transformed)))
print()