Skip to content

Commit 17206b8

Browse files
authored
Add files via upload
1 parent 50890d0 commit 17206b8

File tree

3 files changed

+605
-0
lines changed

3 files changed

+605
-0
lines changed

02_emp_kmeans.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# LEiDA(Cabral 2017. Sci Rep.)-PART2: K-means and centroids for each brain states
2+
# author: zhangjiaqi(Smile.Z), CASIA, Brainnetome
3+
import numpy as np
4+
from scipy.signal import hilbert
5+
from scipy.spatial.distance import cosine
6+
import math
7+
import matplotlib.pyplot as plt
8+
import seaborn as sns
9+
from sklearn.cluster import KMeans
10+
from sklearn.metrics import silhouette_score
11+
from sklearn.metrics import davies_bouldin_score
12+
import pandas as pd
13+
from sklearn.decomposition import PCA
14+
import os
15+
from validclust import ValidClust
16+
from mpl_toolkits.mplot3d import Axes3D
17+
18+
19+
# get Best K
20+
# V1: (nsubjects * T) * N
21+
# M: each k run M times for average score
22+
23+
def Decide_K(V1):
24+
X = []
25+
for i in range(V1.shape[0]):
26+
X.append(V1[i])
27+
vclust = ValidClust(k=list(range(2, 21)), methods = ['kmeans'])
28+
cvi_vals = vclust.fit_predict(X)
29+
cvi_vals.to_csv('DecideK/cluster.csv')
30+
vclust.plot()
31+
plt.savefig('DecideK/cluster.png')
32+
33+
34+
# get centroids for each brain states and sort by probability
35+
# V1: (nsubjects * T) * N
36+
# k: best cluster number
37+
38+
def EMP_BrainStates(V1, k):
39+
X = []
40+
for i in range(V1.shape[0]):
41+
X.append(V1[i])
42+
km = KMeans(n_clusters=k)
43+
km.fit(X)
44+
45+
count = pd.Series(km.labels_).value_counts()
46+
center = pd.DataFrame(km.cluster_centers_, dtype=np.float)
47+
r= pd.concat([count, center], axis=1)
48+
np.savetxt(str(k)+'/centroids_'+str(k)+'_count.txt', np.array(count), delimiter=' ')
49+
r.to_csv(str(k)+'/centroids_'+str(k)+'_cluster.csv')
50+
data = r.values[np.argsort(-r.values[:, 0])]
51+
centroids = data[:, 1:]
52+
53+
plt.clf()
54+
vec = PCA(n_components=2).fit_transform(X)
55+
df2 = pd.DataFrame(vec)
56+
df2['labels'] = km.labels_
57+
visual_vec = k*[0]
58+
for m in range(k):
59+
visual_vec[m] = df2[df2['labels'] == m]
60+
plt.scatter(visual_vec[m][0], visual_vec[m][1], s=5)
61+
plt.savefig(str(k)+'/kmeans_visualize_2d_'+str(k)+'_cluster.png')
62+
plt.clf()
63+
64+
fig = plt.figure()
65+
ax = Axes3D(fig)
66+
vec = PCA(n_components=3).fit_transform(X)
67+
df3 = pd.DataFrame(vec)
68+
df3['labels'] = km.labels_
69+
visual_vec = k*[0]
70+
for m in range(k):
71+
visual_vec[m] = df3[df3['labels'] == m]
72+
ax.scatter(visual_vec[m][0], visual_vec[m][1], visual_vec[m][2],s=5)
73+
plt.savefig(str(k)+'/kmeans_visualize_3d_'+str(k)+'_cluster.png')
74+
75+
76+
return centroids
77+
78+
79+
if __name__ == '__main__':
80+
path_mdd = '/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step1_get_dFC_V1/V1/MDD/'
81+
path_hc = '/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step1_get_dFC_V1/V1/HC/'
82+
mdd_file = os.listdir(path_mdd)
83+
hc_file = os.listdir(path_hc)
84+
85+
V1 = np.zeros((40*230, 246))
86+
87+
i = 0
88+
for file in hc_file:
89+
path = path_hc+file
90+
vec = np.loadtxt(path)
91+
for j in range(vec.shape[0]):
92+
V1[i, :] = vec[j]
93+
i = i+1
94+
95+
for file in mdd_file:
96+
path = path_mdd+file
97+
vec = np.loadtxt(path)
98+
for j in range(vec.shape[0]):
99+
V1[i, :] = vec[j]
100+
i = i+1
101+
102+
Decide_K(V1)
103+
104+
for k in range(2, 21):
105+
os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step2_emp_kmeans/'+str(k))
106+
center = EMP_BrainStates(V1, k)
107+
np.savetxt(str(k)+'/centroids_'+str(k)+'_cluster.txt', center, delimiter=' ')

03_index.py

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
# LEiDA(Cabral 2017. Sci Rep.)-PART3: Index for each brain state
2+
# author: zhangjiaqi(Smile.Z), CASIA, Brainnetome
3+
import numpy as np
4+
from scipy.signal import hilbert
5+
from scipy.spatial.distance import cosine
6+
import math
7+
import matplotlib.pyplot as plt
8+
import seaborn as sns
9+
from sklearn.cluster import KMeans
10+
from sklearn.metrics import silhouette_score
11+
from sklearn.metrics import davies_bouldin_score
12+
import pandas as pd
13+
from sklearn.decomposition import PCA
14+
import os
15+
from validclust import ValidClust
16+
from mpl_toolkits.mplot3d import Axes3D
17+
import itertools
18+
from scipy import stats
19+
20+
21+
# Yeo7 Correlation with cluster
22+
def Yeo7Corr(K):
23+
centers = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step2_emp_kmeans/'+str(K)+'/centroids_'+str(K)+'_cluster.txt')
24+
yeo7 = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/00_Assign2Yeo7/output/DICE_Yeo-7_&_Brainnetome_res-1x1x1.txt')
25+
yeo7 = np.delete(yeo7, 0, axis=0)
26+
yeo7 = np.delete(yeo7, 0, axis=1)
27+
corr = np.zeros((K, 7))
28+
p_value = np.zeros((K, 7))
29+
for i in range(K):
30+
for j in range(7):
31+
corr[i][j] = stats.pearsonr(centers[i, :], yeo7[j, :])[0]
32+
p_value[i][j] = stats.pearsonr(centers[i, :], yeo7[j, :])[1]
33+
return corr, p_value
34+
35+
36+
# Yeo17 Correlation with cluster
37+
def Yeo17Corr(K):
38+
centers = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step2_emp_kmeans/'+str(K)+'/centroids_'+str(K)+'_cluster.txt')
39+
yeo17 = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/00_Assign2Yeo7/output/DICE_Yeo-17_&_Brainnetome_res-1x1x1.txt')
40+
yeo17 = np.delete(yeo17, 0, axis=0)
41+
yeo17 = np.delete(yeo17, 0, axis=1)
42+
corr = np.zeros((K, 17))
43+
p_value = np.zeros((K, 17))
44+
for i in range(K):
45+
for j in range(17):
46+
corr[i][j] = stats.pearsonr(centers[i, :], yeo17[j, :])[0]
47+
p_value[i][j] = stats.pearsonr(centers[i, :], yeo17[j, :])[1]
48+
return corr, p_value
49+
50+
51+
# Community for cluster
52+
# K: number of cluster
53+
def Community(K):
54+
f = open('brainnetome_subregions.txt', 'r')
55+
subregions = f.readlines()
56+
centers = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step2_emp_kmeans/'+str(K)+'/centroids_'+str(K)+'_cluster.txt')
57+
community_pname = {}
58+
community_pno = {}
59+
community_nname = {}
60+
community_nno = {}
61+
for i in range(K):
62+
pname = []
63+
pno = []
64+
nname = []
65+
nno = []
66+
for j in range(centers[i].shape[0]):
67+
if centers[i][j] >0:
68+
pname.append(subregions[j])
69+
pno.append(j)
70+
else:
71+
nname.append(subregions[j])
72+
nno.append(j)
73+
community_pname[i] = pname
74+
community_pno[i] = pno
75+
community_nname[i] = nname
76+
community_nno[i] = nno
77+
return community_pname, community_pno, community_nname, community_nno
78+
79+
# Sign for each subject
80+
# V1: ntp * nregions 230*246
81+
# K: number of cluster
82+
def Sign(V1, K):
83+
cluster = np.zeros((V1.shape[0]))
84+
centers = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step2_emp_kmeans/'+str(K)+'/centroids_'+str(K)+'_cluster.txt')
85+
for i in range(V1.shape[0]):
86+
dis = []
87+
for j in range(K):
88+
dis.append(np.linalg.norm(V1[i]-centers[j]))
89+
cluster[i] = dis.index(min(dis))
90+
return cluster
91+
92+
93+
# Fractional Occupancy for each subject
94+
# V1: ntp * nregions 230*246
95+
# cluster: sign for which cluster
96+
# K: number of cluster
97+
def FO(V1, cluster,K):
98+
fo = np.zeros((K))
99+
cluster = list(cluster)
100+
for i in range(K):
101+
fo[i] = cluster.count(i)/V1.shape[0]
102+
return fo
103+
104+
105+
# Dwell Time for each subject
106+
def DT(cluster, K):
107+
cluster = list(map(int, cluster))
108+
cnt = np.zeros((K))
109+
sl = np.zeros((K))
110+
dt = np.zeros((K))
111+
for key, group in itertools.groupby(cluster):
112+
cnt[key] += 1
113+
sl[key] += len(list(group))
114+
for i in range(K):
115+
dt[i] = 2*sl[i]/cnt[i]
116+
return dt
117+
118+
119+
# Markov Chain Transition Probabilities
120+
121+
def transition_matrix(transitions, K):
122+
n = 1+ max(transitions) #number of states
123+
124+
M = np.zeros((K, K))
125+
N = np.zeros((K, K))
126+
127+
for (i,j) in zip(transitions,transitions[1:]):
128+
M[int(i)][int(j)] += 1
129+
130+
#now convert to probabilities:
131+
for i in range(M.shape[0]):
132+
s = np.sum(M[i])
133+
if s>0:
134+
N[i, :] = M[i, :]/s
135+
return N
136+
137+
138+
if __name__ == "__main__":
139+
for i in range(2, 21):
140+
os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i))
141+
corr, p_value = Yeo7Corr(i)
142+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/yeo7corr.txt', corr, delimiter=' ')
143+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/yeo7pvalue.txt', p_value, delimiter=' ')
144+
145+
corr, p_value = Yeo17Corr(i)
146+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/yeo17corr.txt', corr, delimiter=' ')
147+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/yeo17pvalue.txt', p_value, delimiter=' ')
148+
149+
community_pname, community_pno, community_nname, community_nno = Community(i)
150+
for j in range(i):
151+
if community_pname[j] != {}:
152+
f = open('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/cluster_'+str(j)+'_positive_region_name.txt', 'a+')
153+
for name in community_pname[j]:
154+
f.writelines(name)
155+
f.close()
156+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/cluster_'+str(j)+'_positive_region_no.txt', np.array(list(map(int, community_pno[j]))), delimiter=' ')
157+
if community_nname[j] != {}:
158+
f = open('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/cluster_'+str(j)+'_negative_region_name.txt', 'a+')
159+
for name in community_nname[j]:
160+
f.writelines(name)
161+
f.close()
162+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/cluster_'+str(j)+'_negative_region_no.txt', np.array(list(map(int, community_nno[j]))), delimiter=' ')
163+
164+
165+
mdd_path = '/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step1_get_dFC_V1/V1/MDD/'
166+
hc_path = '/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step1_get_dFC_V1/V1/HC/'
167+
mdd_file = os.listdir(mdd_path)
168+
hc_file = os.listdir(hc_path)
169+
170+
for sub in mdd_file:
171+
print(sub[:7]+' starting...')
172+
os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7])
173+
V1 = np.loadtxt(mdd_path+sub)
174+
for K in range(2, 21):
175+
os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7]+'/'+str(K))
176+
cluster = Sign(V1, K)
177+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7]+'/'+str(K)+'/V1_cluster.txt', np.array(cluster), delimiter=' ')
178+
fo = FO(V1, cluster, K)
179+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7]+'/'+str(K)+'/FO.txt', np.array(fo), delimiter=' ')
180+
dt = DT(cluster, K)
181+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7]+'/'+str(K)+'/DT.txt', np.array(dt), delimiter=' ')
182+
markov_matrix = transition_matrix(cluster, K)
183+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7]+'/'+str(K)+'/Markov_Matrix.txt', np.array(markov_matrix), delimiter=' ')
184+
print(sub[:7]+' finished.')
185+
186+
187+
188+
for sub in hc_file:
189+
print(sub[:10]+' starting...')
190+
os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10])
191+
V1 = np.loadtxt(hc_path+sub)
192+
for K in range(2, 21):
193+
os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10]+'/'+str(K))
194+
cluster = Sign(V1, K)
195+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10]+'/'+str(K)+'/V1_cluster.txt', np.array(cluster), delimiter=' ')
196+
fo = FO(V1, cluster, K)
197+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10]+'/'+str(K)+'/FO.txt', np.array(fo), delimiter=' ')
198+
dt = DT(cluster, K)
199+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10]+'/'+str(K)+'/DT.txt', np.array(dt), delimiter=' ')
200+
markov_matrix = transition_matrix(cluster, K)
201+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10]+'/'+str(K)+'/Markov_Matrix.txt', np.array(markov_matrix), delimiter=' ')
202+
print(sub[:10]+' finished.')
203+
204+
205+
print("MDD Group Starting...")
206+
V1 = np.zeros((20*230, 246))
207+
i = 0
208+
for file in mdd_file:
209+
path = mdd_path+file
210+
vec = np.loadtxt(path)
211+
for j in range(vec.shape[0]):
212+
V1[i, :] = vec[j]
213+
i = i+1
214+
215+
for K in range(2, 21):
216+
os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/MDDGroup/'+str(K))
217+
cluster = Sign(V1, K)
218+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/MDDGroup/'+str(K)+'/V1_cluster.txt', np.array(cluster), delimiter=' ')
219+
fo = FO(V1, cluster, K)
220+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/MDDGroup/'+str(K)+'/FO.txt', np.array(fo), delimiter=' ')
221+
dt = DT(cluster, K)
222+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/MDDGroup/'+str(K)+'/DT.txt', np.array(dt), delimiter=' ')
223+
markov_matrix = transition_matrix(cluster, K)
224+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/MDDGroup/'+str(K)+'/Markov_Matrix.txt', np.array(markov_matrix), delimiter=' ')
225+
226+
print("MDD Group finished.")
227+
228+
229+
print("HC Group Starting...")
230+
V1 = np.zeros((20*230, 246))
231+
i = 0
232+
for file in hc_file:
233+
path = hc_path+file
234+
vec = np.loadtxt(path)
235+
for j in range(vec.shape[0]):
236+
V1[i, :] = vec[j]
237+
i = i+1
238+
239+
for K in range(2, 21):
240+
os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/HCGroup/'+str(K))
241+
cluster = Sign(V1, K)
242+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/HCGroup/'+str(K)+'/V1_cluster.txt', np.array(cluster), delimiter=' ')
243+
fo = FO(V1, cluster, K)
244+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/HCGroup/'+str(K)+'/FO.txt', np.array(fo), delimiter=' ')
245+
dt = DT(cluster, K)
246+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/HCGroup/'+str(K)+'/DT.txt', np.array(dt), delimiter=' ')
247+
markov_matrix = transition_matrix(cluster, K)
248+
np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/HCGroup/'+str(K)+'/Markov_Matrix.txt', np.array(markov_matrix), delimiter=' ')
249+
250+
print("HC Group finished.")
251+
252+

0 commit comments

Comments
 (0)