diff --git a/seminar5/data/TS_R_NYU.npy b/seminar5/data/TS_R_NYU.npy
new file mode 100644
index 0000000..9f6c752
Binary files /dev/null and b/seminar5/data/TS_R_NYU.npy differ
diff --git a/seminar5/data/y_nyu.npy b/seminar5/data/y_nyu.npy
new file mode 100644
index 0000000..f84ee20
Binary files /dev/null and b/seminar5/data/y_nyu.npy differ
diff --git a/seminar5/seminar5.ipynb b/seminar5/seminar5.ipynb
new file mode 100644
index 0000000..186ddaf
--- /dev/null
+++ b/seminar5/seminar5.ipynb
@@ -0,0 +1,1078 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Neuro ML 2020\n",
+    "\n",
+    "## Seminar 5: Functional connectivity"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "import nilearn # pip install nilearn\n",
+    "import networkx as nx # pip install networkx\n",
+    "import diagram2vec # pip install diagram2vec"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load ABIDE1 data from NYU site\n",
+    "ts = np.load(\"./data/TS_R_NYU.npy\")\n",
+    "(n_patients, n_steps, n_regions) = ts.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Multivariate time-series\n",
+    "\n",
+    "#### Visualization of time series"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# visualization of raw time series\n",
+    "plt.figure(figsize=(16.5,5))\n",
+    "plt.title(\"First 3 time series\")\n",
+    "plt.hlines(0, -2, n_steps+2, linewidth=1.0, linestyles=\"dotted\")\n",
+    "plt.plot(ts[0,:,:3])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# mean and standard deviation\n",
+    "print(\"Mean ± std of the:\")\n",
+    "print(\"1st time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,0]), np.std(ts[0,:,0])))\n",
+    "print(\"2nd time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,1]), np.std(ts[0,:,1])))\n",
+    "print(\"3rd time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,2]), np.std(ts[0,:,2])))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Normalization and trend removal"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# break 2nd and 3rd time series\n",
+    "ts[0,:,1] = ts[0,:,1] + 150 # add mean shift\n",
+    "ts[0,:,2] = ts[0,:,2] + np.linspace(0, 150, n_steps) # add trend"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Mean ± std of the:\")\n",
+    "print(\"1st time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,0]), np.std(ts[0,:,0])))\n",
+    "print(\"2nd time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,1]), np.std(ts[0,:,1])))\n",
+    "print(\"3rd time series: {:.3f} ± {:.3f}\".format(np.mean(ts[0,:,2]), np.std(ts[0,:,2])))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# visualization of broken time series\n",
+    "plt.figure(figsize=(16.5,5))\n",
+    "plt.title(\"First 3 time series\")\n",
+    "plt.hlines(0, -2, n_steps+2, linewidth=1.0, linestyles=\"dotted\")\n",
+    "plt.plot(ts[0,:,:3])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nilearn.signal import clean"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ts_normalized = np.zeros_like(ts)\n",
+    "\n",
+    "# normalize and detrend\n",
+    "for i in range(ts.shape[0]):\n",
+    "    ts_normalized[i] = clean(ts[i], standardize=\"zscore\", detrend=True)\n",
+    "    \n",
+    "print(\"Mean ± std of the:\")\n",
+    "print(\"1st time series: {:.3f} ± {:.3f}\".format(np.mean(ts_normalized[0,:,0]), np.std(ts_normalized[0,:,0])))\n",
+    "print(\"2nd time series: {:.3f} ± {:.3f}\".format(np.mean(ts_normalized[0,:,1]), np.std(ts_normalized[0,:,1])))\n",
+    "print(\"3rd time series: {:.3f} ± {:.3f}\".format(np.mean(ts_normalized[0,:,2]), np.std(ts_normalized[0,:,2])))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# visualization of raw time series\n",
+    "plt.figure(figsize=(16.5,5))\n",
+    "plt.title(\"First 3 time series\")\n",
+    "plt.hlines(0, -2, n_steps+2, linewidth=1.0, linestyles=\"dotted\")\n",
+    "plt.plot(ts_normalized[0,:,:3])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Metrics of functional connectivity\n",
+    "\n",
+    "### Pearson correlation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nilearn.connectome import ConnectivityMeasure\n",
+    "from sklearn.covariance import EmpiricalCovariance\n",
+    "\n",
+    "covariance_estimator = EmpiricalCovariance()\n",
+    "connectivity_correlation = ConnectivityMeasure(kind=\"correlation\", cov_estimator=covariance_estimator)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ts[0].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "R = connectivity_correlation.fit_transform(ts)\n",
+    "R.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16.5,5))\n",
+    "plt.suptitle(\"Correlation connectivity matrices\")\n",
+    "ax1.imshow(R[0])\n",
+    "ax2.imshow(R[1])\n",
+    "ax3.imshow(R[2])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Regularization\n",
+    "\n",
+    "_Condition number_ - minimum/maximum eigenvalue ratio of a matrix\n",
+    "\n",
+    "\n",
+    "#### Tikhonov regularization\n",
+    "\n",
+    "$$\\tilde{\\mathbf{C}}_X = \\mathbf{C}_X + \\alpha \\mathbf{I},~~~\\alpha > 0$$"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Task**\n",
+    "\n",
+    "Check the minimum eigenvalue of a correlation matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.min(np.linalg.eigvalsh(R[0]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Task**\n",
+    "\n",
+    "Apply Tikhonov regularization and check how it affects the minimum eigenvalue of a correlation matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### your code here"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Shrinkage estimators\n",
+    "\n",
+    "Ledoit-Wolf\n",
+    "\n",
+    "$$\\tilde{\\mathbf{C}}_X = (1 - \\beta)\\mathbf{C}_X + \\alpha \\beta \\mathbf{I},~~~\\alpha > 0, 0 \\leq \\beta \\leq 1\\\\\n",
+    "\\alpha = \\frac{trace(\\mathbf{C})}{n_{features}}$$\n",
+    "\n",
+    "A well conditioned estimator for large dimensional covariance matrices, $\\alpha$ is predefined according to formula, $\\beta$ is inferred from data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.covariance import LedoitWolf\n",
+    "\n",
+    "cov_estimator_shrinked = LedoitWolf()\n",
+    "connectivity_correlation_shrinked = ConnectivityMeasure(kind=\"correlation\", cov_estimator=cov_estimator_shrinked)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# check the value of beta parameter\n",
+    "cov = cov_estimator_shrinked.fit(ts[0])\n",
+    "cov.shrinkage_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "R_shrinked = connectivity_correlation_shrinked.fit_transform(ts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# checking the minimum eigenvalue\n",
+    "np.min(np.linalg.eigvalsh(R_shrinked[0]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Spearman correlation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy.stats import spearmanr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "S = np.zeros((3, n_regions, n_regions))\n",
+    "\n",
+    "for i in range(3):\n",
+    "    S[i], _ = spearmanr(ts[i])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16.5,5))\n",
+    "plt.suptitle(\"Spearman correlation connectivity matrices\")\n",
+    "ax1.imshow(S[0])\n",
+    "ax2.imshow(S[1])\n",
+    "ax3.imshow(S[2])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Mutual information\n",
+    "\n",
+    "Mutual information measures the information that random variables $X$ and $Y$ share, how much knowing one of these variables reduces uncertainty about the other. determined how different to joint distributon $p(X, Y)$ is to the production of the marginal distrubutions $p(X) p(Y)$."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "$$I(X, Y) = \\sum_{(x, y)} p(x, y) \\log_2 \\left( \\frac{p(x, y)}{p(x)p(y)} \\right)$$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(6,6))\n",
+    "plt.plot(ts[2,:,0], ts[2,:,12], \".\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import mutual_info_score\n",
+    "\n",
+    "def calc_MI(x, y, bins=10):\n",
+    "    c_xy = np.histogram2d(x, y, bins)[0]\n",
+    "    mi = mutual_info_score(None, None, contingency=c_xy)\n",
+    "    return mi\n",
+    "\n",
+    "def bound(x):\n",
+    "    return np.sqrt(1 - np.exp(-2 * x))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "l = calc_MI(ts[1,:,42], ts[1,:,45])\n",
+    "l, bound(l)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "c_xy = np.rot90(np.histogram2d(ts[2,:,0], ts[2,:,12], 10)[0])\n",
+    "plt.imshow(c_xy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "l = calc_MI(ts[1,:,42], ts[1,:,41])\n",
+    "l, bound(l)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(figsize=(6,6))\n",
+    "plt.plot(ts[2,:,42], ts[2,:,41], \".\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "c_xy = np.rot90(np.histogram2d(ts[2,:,42], ts[2,:,41], 10)[0])\n",
+    "plt.imshow(c_xy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "M = np.zeros((3, n_regions, n_regions))\n",
+    "\n",
+    "for k in range(3):\n",
+    "    for i in range(n_regions):\n",
+    "        for j in range(i, n_regions):\n",
+    "            M[k,i,j] = bound(calc_MI(ts[k,:,i], ts[k,:,j]))\n",
+    "            \n",
+    "    M[k] = M[k] + M[k].T\n",
+    "    np.fill_diagonal(M[k], 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16.5,5))\n",
+    "plt.suptitle(\"Mutual information connectivity matrices\")\n",
+    "ax1.imshow(M[0])\n",
+    "ax2.imshow(M[1])\n",
+    "ax3.imshow(M[2])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "\n",
+    "#### Thresholding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "R_tresholded = R[0].copy()\n",
+    "np.fill_diagonal(R_tresholded, 0)\n",
+    "R_tresholded[R_tresholded < 0.5] = 0.0\n",
+    "R_tresholded"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, (ax1) = plt.subplots(1, 1, figsize=(5,5))\n",
+    "plt.suptitle(\"Thresholded Pearson correlation matrix\")\n",
+    "ax1.imshow(R_tresholded)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Network visualization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nilearn import plotting\n",
+    "\n",
+    "# get coordinates of brain regions\n",
+    "atlas_aal = nilearn.datasets.fetch_atlas_aal()\n",
+    "coordinates = plotting.find_parcellation_cut_coords(labels_img=atlas_aal[\"maps\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# nilearn graph drawing\n",
+    "fig = plt.figure(figsize=(13,6))\n",
+    "edge_options = {\"color\": \"r\", \"linewidth\": 1.5, \"alpha\": 0.5}\n",
+    "plotting.plot_connectome(R_tresholded, coordinates, figure=fig, edge_kwargs=edge_options)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# matplotlib graph drawing\n",
+    "fig, (ax1) = plt.subplots(1, 1, figsize=(8,8))\n",
+    "ax1.set_title(\"Correlation graph\")\n",
+    "nx.draw_shell(nx.from_numpy_array(R_tresholded), ax=ax1)\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Network analysis\n",
+    "\n",
+    "### Graph-theoretic"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create graph from connectivity matrix\n",
+    "G_R = nx.from_numpy_array(R_tresholded)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Node degree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "degree = np.array([degree[1] for degree in nx.degree(G_R)])\n",
+    "degree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "neighbor_degree_avg = np.array(list(nx.average_neighbor_degree(G_R).values()))\n",
+    "neighbor_degree_avg"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Centralities"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "centrality_betweenness = np.array(list(nx.betweenness_centrality(G_R).values()))\n",
+    "centrality_betweenness"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "centrality_closeness = np.array(list(nx.closeness_centrality(G_R).values()))\n",
+    "centrality_closeness"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Clustering coefficient"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clustering_coefficient_local = np.array(list(nx.clustering(G_R).values()))\n",
+    "clustering_coefficient_local"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Efficiency"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nx.local_efficiency(G_R)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nx.global_efficiency(G_R)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Spectral graph theory\n",
+    "\n",
+    "Eigenvalues of\n",
+    "- connectivity matrix\n",
+    "- Laplacian matrix"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Spectrum\n",
+    "\n",
+    "Solve for $\\mathbf{\\lambda}$ the eigenvalue problem, where $\\mathbf{A}$ is the connectivity matrix\n",
+    "\n",
+    "$$\\mathbf{Av} = \\mathbf{\\lambda} \\mathbf{v}$$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eigenvalues, _ = np.linalg.eigh(R[0])\n",
+    "eigenvalues"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Laplacian spectrum\n",
+    "\n",
+    "Solve for $\\mathbf{\\lambda}$ the eigenvalue problem\n",
+    "\n",
+    "$$\\mathbf{Lv} = \\mathbf{\\lambda} \\mathbf{v},$$\n",
+    "\n",
+    "where $\\mathbf{L}$ is the Laplacian matrix of the graph given by the connectivity matrix $\\mathbf{A}$\n",
+    "\n",
+    "$$\\mathbf{L} = \\mathbf{D} - \\mathbf{A}$$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# laplacian matrix L = D - A\n",
+    "# A is contained in variable R_tresholded\n",
+    "\n",
+    "# D, your code here\n",
+    "\n",
+    "# L, your code here"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eigenvalues_laplacian, _ = np.linalg.eigh(L)\n",
+    "eigenvalues_laplacian"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Topological\n",
+    "\n",
+    "Loops, Betti numbers, persistent homology"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ripser import ripser\n",
+    "import diagram2vec"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# reverse matrix to add higher correlated edges first to the filtration\n",
+    "R_filtered = 1 - np.abs(R[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compute persistence diagram of the network\n",
+    "diagram_R = ripser(R_filtered, distance_matrix=True)[\"dgms\"]\n",
+    "diagram_R"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# vectorize persistent diagram\n",
+    "betti_curve = diagram2vec.persistence_curve(diagram_R)\n",
+    "betti_curve"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16.5,5))\n",
+    "ax1.set_title(\"Persistence diagram\")\n",
+    "ax1.set_xlim(-0.025,1)\n",
+    "ax1.set_ylim(-0.025,1)\n",
+    "ax1.scatter(diagram_R[0][:,0], diagram_R[0][:,1], c=\"b\")\n",
+    "ax1.scatter(diagram_R[1][:,0], diagram_R[1][:,1], c=\"r\")\n",
+    "ax2.set_title(\"0th Betti number curve\")\n",
+    "ax2.plot(betti_curve[0,0], c=\"b\")\n",
+    "ax3.set_title(\"1st Betti number curve\")\n",
+    "ax3.plot(betti_curve[0,1], c=\"r\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Machine learning\n",
+    "\n",
+    "Use the computed graph, spectral and topological classes features with sklearn classifiers. Try concatenating and/or boosting features of different classes, and stacking/emsembling of classifiers."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.model_selection import cross_val_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load ABIDE1 data from NYU site\n",
+    "ts = np.load(\"./data/TS_R_NYU.npy\")\n",
+    "y = np.load(\"./data/y_nyu.npy\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compute correlation networks\n",
+    "R = ConnectivityMeasure(kind=\"correlation\").fit_transform(ts)\n",
+    "R.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "R.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Topological features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# topological features\n",
+    "R_filtration = 1 - np.abs(R)\n",
+    "\n",
+    "diagrams = []\n",
+    "\n",
+    "for i, R_filtered in enumerate(R_filtration):\n",
+    "    diagram = ripser(R_filtered, distance_matrix=True)[\"dgms\"]\n",
+    "    diagrams.append(diagram)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_topological = diagram2vec.persistence_curve(diagrams, quantity=\"persistence\", m=40)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_topological[:,0].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clf = DecisionTreeClassifier(random_state=42, max_depth=5)\n",
+    "cross_val_score(clf, X_topological[:,1], y, cv=10).mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Graph features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "R_thresholded = np.copy(R)\n",
+    "for i in range(R_thresholded.shape[0]):\n",
+    "    np.fill_diagonal(R_thresholded[i], 0)\n",
+    "\n",
+    "R_thresholded[R_thresholded < 0.47] = 0.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# graph features\n",
+    "X_graph = np.zeros((n_patients, n_regions*5))\n",
+    "\n",
+    "for i in range(R_thresholded.shape[0]):\n",
+    "    \n",
+    "    G = nx.from_numpy_array(R_thresholded[i])\n",
+    "    \n",
+    "    degree = np.array([degree[1] for degree in nx.degree(G)])\n",
+    "    neighbor_degree_avg = np.array(list(nx.average_neighbor_degree(G).values()))\n",
+    "    centrality_betweenness = np.array(list(nx.betweenness_centrality(G).values()))\n",
+    "    centrality_closeness = np.array(list(nx.closeness_centrality(G).values()))\n",
+    "    clustering_coefficient = np.array(list(nx.clustering(G).values()))\n",
+    "    \n",
+    "    feature_i = np.concatenate((degree, neighbor_degree_avg, centrality_betweenness, centrality_closeness, clustering_coefficient))\n",
+    "    X_graph[i] = feature_i\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cross_val_score(clf, X_graph, y, cv=10).mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Spectral features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# spectral features (eigher spectrum or Laplacian spectrum)\n",
+    "X_spectral = np.zeros((n_patients, n_regions))\n",
+    "\n",
+    "for i in range(R_thresholded.shape[0]):\n",
+    "    eigenvalues_i, _ = np.linalg.eigh(R_thresholded[i])\n",
+    "    X_spectral[i] = eigenvalues_i"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cross_val_score(clf, X_spectral, y, cv=10).mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Features concatenation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cross_val_score(clf, np.concatenate((X_graph, X_spectral, X_topological[:,1]), axis=1), y, cv=10).mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Task**\n",
+    "\n",
+    "Repeat the feature extraction and machine learning pipeline on mutual information matrices. Note that the estimation of mutual information matrices could be be time-consuming."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "@webio": {
+   "lastCommId": null,
+   "lastKernelId": null
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}