diff --git a/seminar5/data/TS_R_NYU.npy b/seminar5/data/TS_R_NYU.npy new file mode 100644 index 0000000..9f6c752 Binary files /dev/null and b/seminar5/data/TS_R_NYU.npy differ diff --git a/seminar5/data/y_nyu.npy b/seminar5/data/y_nyu.npy new file mode 100644 index 0000000..f84ee20 Binary files /dev/null and b/seminar5/data/y_nyu.npy differ diff --git a/seminar5/seminar5.ipynb b/seminar5/seminar5.ipynb new file mode 100644 index 0000000..186ddaf --- /dev/null +++ b/seminar5/seminar5.ipynb @@ -0,0 +1,1078 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Neuro ML 2020\n", + "\n", + "## Seminar 5: Functional connectivity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import numpy as np\n", + "\n", + "import nilearn # pip install nilearn\n", + "import networkx as nx # pip install networkx\n", + "import diagram2vec # pip install diagram2vec" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load ABIDE1 data from NYU site\n", + "ts = np.load(\"./data/TS_R_NYU.npy\")\n", + "(n_patients, n_steps, n_regions) = ts.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multivariate time-series\n", + "\n", + "#### Visualization of time series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# visualization of raw time series\n", + "plt.figure(figsize=(16.5,5))\n", + "plt.title(\"First 3 time series\")\n", + "plt.hlines(0, -2, n_steps+2, linewidth=1.0, linestyles=\"dotted\")\n", + "plt.plot(ts[0,:,:3])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mean and standard deviation\n", + "print(\"Mean ± std of the:\")\n", + "print(\"1st time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,0]), np.std(ts[0,:,0])))\n", + "print(\"2nd time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,1]), np.std(ts[0,:,1])))\n", + "print(\"3rd time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,2]), np.std(ts[0,:,2])))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Normalization and trend removal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# break 2nd and 3rd time series\n", + "ts[0,:,1] = ts[0,:,1] + 150 # add mean shift\n", + "ts[0,:,2] = ts[0,:,2] + np.linspace(0, 150, n_steps) # add trend" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Mean ± std of the:\")\n", + "print(\"1st time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,0]), np.std(ts[0,:,0])))\n", + "print(\"2nd time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,1]), np.std(ts[0,:,1])))\n", + "print(\"3rd time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,2]), np.std(ts[0,:,2])))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# visualization of broken time series\n", + "plt.figure(figsize=(16.5,5))\n", + "plt.title(\"First 3 time series\")\n", + "plt.hlines(0, -2, n_steps+2, linewidth=1.0, linestyles=\"dotted\")\n", + "plt.plot(ts[0,:,:3])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from nilearn.signal import clean" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts_normalized = np.zeros_like(ts)\n", + "\n", + "# normalize and detrend\n", + "for i in range(ts.shape[0]):\n", + " ts_normalized[i] = clean(ts[i], standardize=\"zscore\", detrend=True)\n", + " \n", + "print(\"Mean ± std of the:\")\n", + "print(\"1st time series: {:.3f} ± {:.3f}\".format(np.mean(ts_normalized[0,:,0]), np.std(ts_normalized[0,:,0])))\n", + "print(\"2nd time series: {:.3f} ± {:.3f}\".format(np.mean(ts_normalized[0,:,1]), np.std(ts_normalized[0,:,1])))\n", + "print(\"3rd time series: {:.3f} ± {:.3f}\".format(np.mean(ts_normalized[0,:,2]), np.std(ts_normalized[0,:,2])))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# visualization of raw time series\n", + "plt.figure(figsize=(16.5,5))\n", + "plt.title(\"First 3 time series\")\n", + "plt.hlines(0, -2, n_steps+2, linewidth=1.0, linestyles=\"dotted\")\n", + "plt.plot(ts_normalized[0,:,:3])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Metrics of functional connectivity\n", + "\n", + "### Pearson correlation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from nilearn.connectome import ConnectivityMeasure\n", + "from sklearn.covariance import EmpiricalCovariance\n", + "\n", + "covariance_estimator = EmpiricalCovariance()\n", + "connectivity_correlation = ConnectivityMeasure(kind=\"correlation\", cov_estimator=covariance_estimator)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ts[0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "R = connectivity_correlation.fit_transform(ts)\n", + "R.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16.5,5))\n", + "plt.suptitle(\"Correlation connectivity matrices\")\n", + "ax1.imshow(R[0])\n", + "ax2.imshow(R[1])\n", + "ax3.imshow(R[2])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Regularization\n", + "\n", + "_Condition number_ - minimum/maximum eigenvalue ratio of a matrix\n", + "\n", + "\n", + "#### Tikhonov regularization\n", + "\n", + "$$\\tilde{\\mathbf{C}}_X = \\mathbf{C}_X + \\alpha \\mathbf{I},~~~\\alpha > 0$$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Task**\n", + "\n", + "Check the minimum eigenvalue of a correlation matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.min(np.linalg.eigvalsh(R[0]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Task**\n", + "\n", + "Apply Tikhonov regularization and check how it affects the minimum eigenvalue of a correlation matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Shrinkage estimators\n", + "\n", + "Ledoit-Wolf\n", + "\n", + "$$\\tilde{\\mathbf{C}}_X = (1 - \\beta)\\mathbf{C}_X + \\alpha \\beta \\mathbf{I},~~~\\alpha > 0, 0 \\leq \\beta \\leq 1\\\\\n", + "\\alpha = \\frac{trace(\\mathbf{C})}{n_{features}}$$\n", + "\n", + "A well conditioned estimator for large dimensional covariance matrices, $\\alpha$ is predefined according to formula, $\\beta$ is inferred from data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.covariance import LedoitWolf\n", + "\n", + "cov_estimator_shrinked = LedoitWolf()\n", + "connectivity_correlation_shrinked = ConnectivityMeasure(kind=\"correlation\", cov_estimator=cov_estimator_shrinked)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# check the value of beta parameter\n", + "cov = cov_estimator_shrinked.fit(ts[0])\n", + "cov.shrinkage_" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "R_shrinked = connectivity_correlation_shrinked.fit_transform(ts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# checking the minimum eigenvalue\n", + "np.min(np.linalg.eigvalsh(R_shrinked[0]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Spearman correlation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import spearmanr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "S = np.zeros((3, n_regions, n_regions))\n", + "\n", + "for i in range(3):\n", + " S[i], _ = spearmanr(ts[i])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16.5,5))\n", + "plt.suptitle(\"Spearman correlation connectivity matrices\")\n", + "ax1.imshow(S[0])\n", + "ax2.imshow(S[1])\n", + "ax3.imshow(S[2])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mutual information\n", + "\n", + "Mutual information measures the information that random variables $X$ and $Y$ share, how much knowing one of these variables reduces uncertainty about the other. determined how different to joint distributon $p(X, Y)$ is to the production of the marginal distrubutions $p(X) p(Y)$." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$$I(X, Y) = \\sum_{(x, y)} p(x, y) \\log_2 \\left( \\frac{p(x, y)}{p(x)p(y)} \\right)$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(6,6))\n", + "plt.plot(ts[2,:,0], ts[2,:,12], \".\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import mutual_info_score\n", + "\n", + "def calc_MI(x, y, bins=10):\n", + " c_xy = np.histogram2d(x, y, bins)[0]\n", + " mi = mutual_info_score(None, None, contingency=c_xy)\n", + " return mi\n", + "\n", + "def bound(x):\n", + " return np.sqrt(1 - np.exp(-2 * x))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "l = calc_MI(ts[1,:,42], ts[1,:,45])\n", + "l, bound(l)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c_xy = np.rot90(np.histogram2d(ts[2,:,0], ts[2,:,12], 10)[0])\n", + "plt.imshow(c_xy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "l = calc_MI(ts[1,:,42], ts[1,:,41])\n", + "l, bound(l)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(6,6))\n", + "plt.plot(ts[2,:,42], ts[2,:,41], \".\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c_xy = np.rot90(np.histogram2d(ts[2,:,42], ts[2,:,41], 10)[0])\n", + "plt.imshow(c_xy)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "M = np.zeros((3, n_regions, n_regions))\n", + "\n", + "for k in range(3):\n", + " for i in range(n_regions):\n", + " for j in range(i, n_regions):\n", + " M[k,i,j] = bound(calc_MI(ts[k,:,i], ts[k,:,j]))\n", + " \n", + " M[k] = M[k] + M[k].T\n", + " np.fill_diagonal(M[k], 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16.5,5))\n", + "plt.suptitle(\"Mutual information connectivity matrices\")\n", + "ax1.imshow(M[0])\n", + "ax2.imshow(M[1])\n", + "ax3.imshow(M[2])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "#### Thresholding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "R_tresholded = R[0].copy()\n", + "np.fill_diagonal(R_tresholded, 0)\n", + "R_tresholded[R_tresholded < 0.5] = 0.0\n", + "R_tresholded" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, (ax1) = plt.subplots(1, 1, figsize=(5,5))\n", + "plt.suptitle(\"Thresholded Pearson correlation matrix\")\n", + "ax1.imshow(R_tresholded)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Network visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from nilearn import plotting\n", + "\n", + "# get coordinates of brain regions\n", + "atlas_aal = nilearn.datasets.fetch_atlas_aal()\n", + "coordinates = plotting.find_parcellation_cut_coords(labels_img=atlas_aal[\"maps\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# nilearn graph drawing\n", + "fig = plt.figure(figsize=(13,6))\n", + "edge_options = {\"color\": \"r\", \"linewidth\": 1.5, \"alpha\": 0.5}\n", + "plotting.plot_connectome(R_tresholded, coordinates, figure=fig, edge_kwargs=edge_options)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# matplotlib graph drawing\n", + "fig, (ax1) = plt.subplots(1, 1, figsize=(8,8))\n", + "ax1.set_title(\"Correlation graph\")\n", + "nx.draw_shell(nx.from_numpy_array(R_tresholded), ax=ax1)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Network analysis\n", + "\n", + "### Graph-theoretic" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create graph from connectivity matrix\n", + "G_R = nx.from_numpy_array(R_tresholded)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Node degree" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "degree = np.array([degree[1] for degree in nx.degree(G_R)])\n", + "degree" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "neighbor_degree_avg = np.array(list(nx.average_neighbor_degree(G_R).values()))\n", + "neighbor_degree_avg" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Centralities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "centrality_betweenness = np.array(list(nx.betweenness_centrality(G_R).values()))\n", + "centrality_betweenness" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "centrality_closeness = np.array(list(nx.closeness_centrality(G_R).values()))\n", + "centrality_closeness" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Clustering coefficient" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "clustering_coefficient_local = np.array(list(nx.clustering(G_R).values()))\n", + "clustering_coefficient_local" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Efficiency" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nx.local_efficiency(G_R)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nx.global_efficiency(G_R)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Spectral graph theory\n", + "\n", + "Eigenvalues of\n", + "- connectivity matrix\n", + "- Laplacian matrix" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Spectrum\n", + "\n", + "Solve for $\\mathbf{\\lambda}$ the eigenvalue problem, where $\\mathbf{A}$ is the connectivity matrix\n", + "\n", + "$$\\mathbf{Av} = \\mathbf{\\lambda} \\mathbf{v}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eigenvalues, _ = np.linalg.eigh(R[0])\n", + "eigenvalues" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Laplacian spectrum\n", + "\n", + "Solve for $\\mathbf{\\lambda}$ the eigenvalue problem\n", + "\n", + "$$\\mathbf{Lv} = \\mathbf{\\lambda} \\mathbf{v},$$\n", + "\n", + "where $\\mathbf{L}$ is the Laplacian matrix of the graph given by the connectivity matrix $\\mathbf{A}$\n", + "\n", + "$$\\mathbf{L} = \\mathbf{D} - \\mathbf{A}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# laplacian matrix L = D - A\n", + "# A is contained in variable R_tresholded\n", + "\n", + "# D, your code here\n", + "\n", + "# L, your code here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "eigenvalues_laplacian, _ = np.linalg.eigh(L)\n", + "eigenvalues_laplacian" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Topological\n", + "\n", + "Loops, Betti numbers, persistent homology" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from ripser import ripser\n", + "import diagram2vec" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reverse matrix to add higher correlated edges first to the filtration\n", + "R_filtered = 1 - np.abs(R[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# compute persistence diagram of the network\n", + "diagram_R = ripser(R_filtered, distance_matrix=True)[\"dgms\"]\n", + "diagram_R" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# vectorize persistent diagram\n", + "betti_curve = diagram2vec.persistence_curve(diagram_R)\n", + "betti_curve" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16.5,5))\n", + "ax1.set_title(\"Persistence diagram\")\n", + "ax1.set_xlim(-0.025,1)\n", + "ax1.set_ylim(-0.025,1)\n", + "ax1.scatter(diagram_R[0][:,0], diagram_R[0][:,1], c=\"b\")\n", + "ax1.scatter(diagram_R[1][:,0], diagram_R[1][:,1], c=\"r\")\n", + "ax2.set_title(\"0th Betti number curve\")\n", + "ax2.plot(betti_curve[0,0], c=\"b\")\n", + "ax3.set_title(\"1st Betti number curve\")\n", + "ax3.plot(betti_curve[0,1], c=\"r\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Machine learning\n", + "\n", + "Use the computed graph, spectral and topological classes features with sklearn classifiers. Try concatenating and/or boosting features of different classes, and stacking/emsembling of classifiers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.model_selection import cross_val_score" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load ABIDE1 data from NYU site\n", + "ts = np.load(\"./data/TS_R_NYU.npy\")\n", + "y = np.load(\"./data/y_nyu.npy\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# compute correlation networks\n", + "R = ConnectivityMeasure(kind=\"correlation\").fit_transform(ts)\n", + "R.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "R.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Topological features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# topological features\n", + "R_filtration = 1 - np.abs(R)\n", + "\n", + "diagrams = []\n", + "\n", + "for i, R_filtered in enumerate(R_filtration):\n", + " diagram = ripser(R_filtered, distance_matrix=True)[\"dgms\"]\n", + " diagrams.append(diagram)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_topological = diagram2vec.persistence_curve(diagrams, quantity=\"persistence\", m=40)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_topological[:,0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "clf = DecisionTreeClassifier(random_state=42, max_depth=5)\n", + "cross_val_score(clf, X_topological[:,1], y, cv=10).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Graph features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "R_thresholded = np.copy(R)\n", + "for i in range(R_thresholded.shape[0]):\n", + " np.fill_diagonal(R_thresholded[i], 0)\n", + "\n", + "R_thresholded[R_thresholded < 0.47] = 0.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# graph features\n", + "X_graph = np.zeros((n_patients, n_regions*5))\n", + "\n", + "for i in range(R_thresholded.shape[0]):\n", + " \n", + " G = nx.from_numpy_array(R_thresholded[i])\n", + " \n", + " degree = np.array([degree[1] for degree in nx.degree(G)])\n", + " neighbor_degree_avg = np.array(list(nx.average_neighbor_degree(G).values()))\n", + " centrality_betweenness = np.array(list(nx.betweenness_centrality(G).values()))\n", + " centrality_closeness = np.array(list(nx.closeness_centrality(G).values()))\n", + " clustering_coefficient = np.array(list(nx.clustering(G).values()))\n", + " \n", + " feature_i = np.concatenate((degree, neighbor_degree_avg, centrality_betweenness, centrality_closeness, clustering_coefficient))\n", + " X_graph[i] = feature_i\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cross_val_score(clf, X_graph, y, cv=10).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Spectral features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# spectral features (eigher spectrum or Laplacian spectrum)\n", + "X_spectral = np.zeros((n_patients, n_regions))\n", + "\n", + "for i in range(R_thresholded.shape[0]):\n", + " eigenvalues_i, _ = np.linalg.eigh(R_thresholded[i])\n", + " X_spectral[i] = eigenvalues_i" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cross_val_score(clf, X_spectral, y, cv=10).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Features concatenation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cross_val_score(clf, np.concatenate((X_graph, X_spectral, X_topological[:,1]), axis=1), y, cv=10).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Task**\n", + "\n", + "Repeat the feature extraction and machine learning pipeline on mutual information matrices. Note that the estimation of mutual information matrices could be be time-consuming." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "@webio": { + "lastCommId": null, + "lastKernelId": null + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}