Add files via upload

jqzhang111 · web-flow · commit 17206b8667c0 · 2022-05-16T14:29:30.000+08:00
diff --git a/02_emp_kmeans.py b/02_emp_kmeans.py
@@ -0,0 +1,107 @@
+# LEiDA(Cabral 2017. Sci Rep.)-PART2: K-means and centroids for each brain states
+# author: zhangjiaqi(Smile.Z), CASIA, Brainnetome
+import numpy as np 
+from scipy.signal import hilbert
+from scipy.spatial.distance import cosine
+import math
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.cluster import KMeans
+from sklearn.metrics import silhouette_score
+from sklearn.metrics import davies_bouldin_score
+import pandas as pd
+from sklearn.decomposition import PCA
+import os
+from validclust import ValidClust
+from mpl_toolkits.mplot3d import Axes3D
+
+
+# get Best K
+# V1: (nsubjects * T) * N
+# M: each k run M times for average score
+
+def Decide_K(V1):
+    X = []
+    for i in range(V1.shape[0]):
+        X.append(V1[i])
+    vclust = ValidClust(k=list(range(2, 21)), methods = ['kmeans'])
+    cvi_vals = vclust.fit_predict(X)
+    cvi_vals.to_csv('DecideK/cluster.csv')
+    vclust.plot()
+    plt.savefig('DecideK/cluster.png')
+
+
+# get centroids for each brain states and sort by probability
+# V1: (nsubjects * T) * N
+# k: best cluster number
+
+def EMP_BrainStates(V1, k):
+    X = []
+    for i in range(V1.shape[0]):
+        X.append(V1[i])
+    km = KMeans(n_clusters=k)
+    km.fit(X)
+
+    count = pd.Series(km.labels_).value_counts()
+    center = pd.DataFrame(km.cluster_centers_, dtype=np.float)
+    r= pd.concat([count, center], axis=1)
+    np.savetxt(str(k)+'/centroids_'+str(k)+'_count.txt', np.array(count), delimiter=' ')
+    r.to_csv(str(k)+'/centroids_'+str(k)+'_cluster.csv')
+    data = r.values[np.argsort(-r.values[:, 0])]
+    centroids = data[:, 1:]
+    
+    plt.clf()
+    vec = PCA(n_components=2).fit_transform(X)
+    df2 = pd.DataFrame(vec)
+    df2['labels'] = km.labels_
+    visual_vec = k*[0]
+    for m in range(k):
+        visual_vec[m] = df2[df2['labels'] == m]
+        plt.scatter(visual_vec[m][0], visual_vec[m][1], s=5)
+    plt.savefig(str(k)+'/kmeans_visualize_2d_'+str(k)+'_cluster.png')
+    plt.clf()
+    
+    fig = plt.figure()
+    ax = Axes3D(fig)
+    vec = PCA(n_components=3).fit_transform(X)
+    df3 = pd.DataFrame(vec)
+    df3['labels'] = km.labels_
+    visual_vec = k*[0]
+    for m in range(k):
+        visual_vec[m] = df3[df3['labels'] == m]
+        ax.scatter(visual_vec[m][0], visual_vec[m][1], visual_vec[m][2],s=5)
+    plt.savefig(str(k)+'/kmeans_visualize_3d_'+str(k)+'_cluster.png')
+    
+
+    return centroids
+    
+    
+if __name__ == '__main__':
+    path_mdd = '/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step1_get_dFC_V1/V1/MDD/'
+    path_hc = '/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step1_get_dFC_V1/V1/HC/'
+    mdd_file = os.listdir(path_mdd)
+    hc_file = os.listdir(path_hc)
+    
+    V1 = np.zeros((40*230, 246))
+    
+    i = 0
+    for file in hc_file:
+        path = path_hc+file
+        vec = np.loadtxt(path)
+        for j in range(vec.shape[0]):
+            V1[i, :] = vec[j]
+            i = i+1
+
+    for file in mdd_file:
+        path = path_mdd+file
+        vec = np.loadtxt(path)
+        for j in range(vec.shape[0]):
+            V1[i, :] = vec[j]
+            i = i+1
+
+    Decide_K(V1)
+
+    for k in range(2, 21):
+        os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step2_emp_kmeans/'+str(k))
+        center = EMP_BrainStates(V1, k)
+        np.savetxt(str(k)+'/centroids_'+str(k)+'_cluster.txt', center, delimiter=' ')
diff --git a/03_index.py b/03_index.py
@@ -0,0 +1,252 @@
+# LEiDA(Cabral 2017. Sci Rep.)-PART3: Index for each brain state
+# author: zhangjiaqi(Smile.Z), CASIA, Brainnetome
+import numpy as np 
+from scipy.signal import hilbert
+from scipy.spatial.distance import cosine
+import math
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.cluster import KMeans
+from sklearn.metrics import silhouette_score
+from sklearn.metrics import davies_bouldin_score
+import pandas as pd
+from sklearn.decomposition import PCA
+import os
+from validclust import ValidClust
+from mpl_toolkits.mplot3d import Axes3D
+import itertools
+from scipy import stats
+
+
+# Yeo7 Correlation with cluster
+def Yeo7Corr(K):
+    centers = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step2_emp_kmeans/'+str(K)+'/centroids_'+str(K)+'_cluster.txt')
+    yeo7 = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/00_Assign2Yeo7/output/DICE_Yeo-7_&_Brainnetome_res-1x1x1.txt')
+    yeo7 = np.delete(yeo7, 0, axis=0)
+    yeo7 = np.delete(yeo7, 0, axis=1)
+    corr = np.zeros((K, 7))
+    p_value = np.zeros((K, 7))
+    for i in range(K):
+        for j in range(7):
+            corr[i][j] = stats.pearsonr(centers[i, :], yeo7[j, :])[0]
+            p_value[i][j] = stats.pearsonr(centers[i, :], yeo7[j, :])[1]
+    return corr, p_value
+
+
+# Yeo17 Correlation with cluster
+def Yeo17Corr(K):
+    centers = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step2_emp_kmeans/'+str(K)+'/centroids_'+str(K)+'_cluster.txt')
+    yeo17 = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/00_Assign2Yeo7/output/DICE_Yeo-17_&_Brainnetome_res-1x1x1.txt')
+    yeo17 = np.delete(yeo17, 0, axis=0)
+    yeo17 = np.delete(yeo17, 0, axis=1)
+    corr = np.zeros((K, 17))
+    p_value = np.zeros((K, 17))
+    for i in range(K):
+        for j in range(17):
+            corr[i][j] = stats.pearsonr(centers[i, :], yeo17[j, :])[0]
+            p_value[i][j] = stats.pearsonr(centers[i, :], yeo17[j, :])[1]
+    return corr, p_value
+
+
+# Community for cluster
+# K: number of cluster
+def Community(K):
+    f = open('brainnetome_subregions.txt', 'r')
+    subregions = f.readlines()
+    centers = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step2_emp_kmeans/'+str(K)+'/centroids_'+str(K)+'_cluster.txt')
+    community_pname = {}
+    community_pno = {}
+    community_nname = {}
+    community_nno = {}
+    for i in range(K):
+        pname = []
+        pno = []
+        nname = []
+        nno = []
+        for j in range(centers[i].shape[0]):
+            if centers[i][j] >0:
+                pname.append(subregions[j])
+                pno.append(j)
+            else:
+                nname.append(subregions[j])
+                nno.append(j)
+        community_pname[i] = pname 
+        community_pno[i] = pno
+        community_nname[i] = nname 
+        community_nno[i] = nno
+    return community_pname, community_pno, community_nname, community_nno
+
+# Sign for each subject
+# V1: ntp * nregions 230*246
+# K: number of cluster
+def Sign(V1, K):
+    cluster = np.zeros((V1.shape[0]))
+    centers = np.loadtxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step2_emp_kmeans/'+str(K)+'/centroids_'+str(K)+'_cluster.txt')
+    for i in range(V1.shape[0]):
+        dis = []
+        for j in range(K):
+            dis.append(np.linalg.norm(V1[i]-centers[j]))
+        cluster[i] = dis.index(min(dis))
+    return cluster
+
+
+# Fractional Occupancy for each subject
+# V1: ntp * nregions 230*246
+# cluster: sign for which cluster
+# K: number of cluster
+def FO(V1, cluster,K):
+    fo = np.zeros((K))
+    cluster = list(cluster)
+    for i in range(K):
+        fo[i] = cluster.count(i)/V1.shape[0]
+    return fo
+
+
+# Dwell Time for each subject
+def DT(cluster, K):
+    cluster = list(map(int, cluster))
+    cnt = np.zeros((K))
+    sl = np.zeros((K))
+    dt = np.zeros((K))
+    for key, group in itertools.groupby(cluster):
+        cnt[key] += 1
+        sl[key] += len(list(group))
+    for i in range(K):
+        dt[i] = 2*sl[i]/cnt[i]
+    return dt
+
+
+# Markov Chain Transition Probabilities
+
+def transition_matrix(transitions, K):
+    n = 1+ max(transitions) #number of states
+
+    M = np.zeros((K, K))
+    N = np.zeros((K, K))
+
+    for (i,j) in zip(transitions,transitions[1:]):
+        M[int(i)][int(j)] += 1
+
+    #now convert to probabilities:
+    for i in range(M.shape[0]):
+        s = np.sum(M[i])
+        if s>0:
+            N[i, :] = M[i, :]/s
+    return N
+
+
+if __name__ == "__main__":
+    for i in range(2, 21):
+        os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i))
+        corr, p_value = Yeo7Corr(i)
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/yeo7corr.txt', corr, delimiter=' ')
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/yeo7pvalue.txt', p_value, delimiter=' ')
+
+        corr, p_value = Yeo17Corr(i)
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/yeo17corr.txt', corr, delimiter=' ')
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/yeo17pvalue.txt', p_value, delimiter=' ')
+
+        community_pname, community_pno, community_nname, community_nno = Community(i)
+        for j in range(i):
+            if community_pname[j] != {}:
+                f = open('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/cluster_'+str(j)+'_positive_region_name.txt', 'a+')
+                for name in community_pname[j]:
+                    f.writelines(name)
+                f.close()
+                np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/cluster_'+str(j)+'_positive_region_no.txt', np.array(list(map(int, community_pno[j]))), delimiter=' ')
+            if community_nname[j] != {}:
+                f = open('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/cluster_'+str(j)+'_negative_region_name.txt', 'a+')
+                for name in community_nname[j]:
+                    f.writelines(name)
+                f.close()
+                np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/cluster/'+str(i)+'/cluster_'+str(j)+'_negative_region_no.txt', np.array(list(map(int, community_nno[j]))), delimiter=' ')
+
+        
+    mdd_path = '/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step1_get_dFC_V1/V1/MDD/'
+    hc_path = '/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step1_get_dFC_V1/V1/HC/'
+    mdd_file = os.listdir(mdd_path)
+    hc_file = os.listdir(hc_path)
+
+    for sub in mdd_file:
+        print(sub[:7]+' starting...')
+        os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7])
+        V1 = np.loadtxt(mdd_path+sub)
+        for K in range(2, 21):
+            os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7]+'/'+str(K))
+            cluster = Sign(V1, K)
+            np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7]+'/'+str(K)+'/V1_cluster.txt', np.array(cluster), delimiter=' ')
+            fo = FO(V1, cluster, K)
+            np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7]+'/'+str(K)+'/FO.txt', np.array(fo), delimiter=' ')
+            dt = DT(cluster, K)
+            np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7]+'/'+str(K)+'/DT.txt', np.array(dt), delimiter=' ')
+            markov_matrix = transition_matrix(cluster, K)
+            np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:7]+'/'+str(K)+'/Markov_Matrix.txt', np.array(markov_matrix), delimiter=' ')
+        print(sub[:7]+' finished.')
+
+
+
+    for sub in hc_file:
+        print(sub[:10]+' starting...')
+        os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10])
+        V1 = np.loadtxt(hc_path+sub)
+        for K in range(2, 21):
+            os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10]+'/'+str(K))
+            cluster = Sign(V1, K)
+            np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10]+'/'+str(K)+'/V1_cluster.txt', np.array(cluster), delimiter=' ')
+            fo = FO(V1, cluster, K)
+            np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10]+'/'+str(K)+'/FO.txt', np.array(fo), delimiter=' ')
+            dt = DT(cluster, K)
+            np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10]+'/'+str(K)+'/DT.txt', np.array(dt), delimiter=' ')
+            markov_matrix = transition_matrix(cluster, K)
+            np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/subject/'+sub[:10]+'/'+str(K)+'/Markov_Matrix.txt', np.array(markov_matrix), delimiter=' ')
+        print(sub[:10]+' finished.')
+
+
+    print("MDD Group Starting...")
+    V1 = np.zeros((20*230, 246))
+    i = 0
+    for file in mdd_file:
+        path = mdd_path+file
+        vec = np.loadtxt(path)
+        for j in range(vec.shape[0]):
+            V1[i, :] = vec[j]
+            i = i+1
+
+    for K in range(2, 21):
+        os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/MDDGroup/'+str(K))
+        cluster = Sign(V1, K)
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/MDDGroup/'+str(K)+'/V1_cluster.txt', np.array(cluster), delimiter=' ')
+        fo = FO(V1, cluster, K)
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/MDDGroup/'+str(K)+'/FO.txt', np.array(fo), delimiter=' ')
+        dt = DT(cluster, K)
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/MDDGroup/'+str(K)+'/DT.txt', np.array(dt), delimiter=' ')
+        markov_matrix = transition_matrix(cluster, K)
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/MDDGroup/'+str(K)+'/Markov_Matrix.txt', np.array(markov_matrix), delimiter=' ')
+
+    print("MDD Group finished.")
+
+
+    print("HC Group Starting...")
+    V1 = np.zeros((20*230, 246))
+    i = 0
+    for file in hc_file:
+        path = hc_path+file
+        vec = np.loadtxt(path)
+        for j in range(vec.shape[0]):
+            V1[i, :] = vec[j]
+            i = i+1
+
+    for K in range(2, 21):
+        os.makedirs('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/HCGroup/'+str(K))
+        cluster = Sign(V1, K)
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/HCGroup/'+str(K)+'/V1_cluster.txt', np.array(cluster), delimiter=' ')
+        fo = FO(V1, cluster, K)
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/HCGroup/'+str(K)+'/FO.txt', np.array(fo), delimiter=' ')
+        dt = DT(cluster, K)
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/HCGroup/'+str(K)+'/DT.txt', np.array(dt), delimiter=' ')
+        markov_matrix = transition_matrix(cluster, K)
+        np.savetxt('/share/home/zhangjiaqi/2022Project/HOPF/02_LEiDA_Empircal/step3_index/HCGroup/'+str(K)+'/Markov_Matrix.txt', np.array(markov_matrix), delimiter=' ')
+
+    print("HC Group finished.")
+
+
diff --git a/brainnetome_subregions.txt b/brainnetome_subregions.txt