forked from Jhy1993/HAN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
jhyexp.py
86 lines (74 loc) · 2.87 KB
/
jhyexp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import numpy as np
#import matplotlib
#matplotlib.use('Agg')
#import matplotlib.pyplot as plt
import pickle
from sklearn.cluster import KMeans
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn import linear_model
from sklearn.metrics import roc_curve, f1_score
from sklearn import manifold
from sklearn import linear_model
from sklearn.model_selection import train_test_split
def my_KNN(x, y, k=5, split_list=[0.2, 0.4, 0.6, 0.8], time=10, show_train=True, shuffle=True):
x = np.array(x)
x = np.squeeze(x)
y = np.array(y)
if len(y.shape) > 1:
y = np.argmax(y, axis=1)
for split in split_list:
ss = split
split = int(x.shape[0] * split)
micro_list = []
macro_list = []
if time:
for i in range(time):
if shuffle:
permutation = np.random.permutation(x.shape[0])
x = x[permutation, :]
y = y[permutation]
# x_true = np.array(x_true)
train_x = x[:split, :]
test_x = x[split:, :]
train_y = y[:split]
test_y = y[split:]
estimator = KNeighborsClassifier(n_neighbors=k)
estimator.fit(train_x, train_y)
y_pred = estimator.predict(test_x)
f1_macro = f1_score(test_y, y_pred, average='macro')
f1_micro = f1_score(test_y, y_pred, average='micro')
macro_list.append(f1_macro)
micro_list.append(f1_micro)
print('KNN({}avg, split:{}, k={}) f1_macro: {:.4f}, f1_micro: {:.4f}'.format(
time, ss, k, sum(macro_list) / len(macro_list), sum(micro_list) / len(micro_list)))
def my_Kmeans(x, y, k=4, time=10, return_NMI=False):
x = np.array(x)
x = np.squeeze(x)
y = np.array(y)
if len(y.shape) > 1:
y = np.argmax(y, axis=1)
estimator = KMeans(n_clusters=k)
ARI_list = [] # adjusted_rand_score(
NMI_list = []
if time:
# print('KMeans exps {}次 æ±~B平å~]~G '.format(time))
for i in range(time):
estimator.fit(x, y)
y_pred = estimator.predict(x)
score = normalized_mutual_info_score(y, y_pred)
NMI_list.append(score)
s2 = adjusted_rand_score(y, y_pred)
ARI_list.append(s2)
# print('NMI_list: {}'.format(NMI_list))
score = sum(NMI_list) / len(NMI_list)
s2 = sum(ARI_list) / len(ARI_list)
print('NMI (10 avg): {:.4f} , ARI (10avg): {:.4f}'.format(score, s2))
else:
estimator.fit(x, y)
y_pred = estimator.predict(x)
score = normalized_mutual_info_score(y, y_pred)
print("NMI on all label data: {:.5f}".format(score))
if return_NMI:
return score, s2