diff --git a/.circleci/config.yml b/.circleci/config.yml index e6223146..26da3083 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -22,7 +22,7 @@ jobs: command: | python3 -m venv venv source venv/bin/activate - pip install --upgrade pip wheel setuptools + pip install --upgrade wheel setuptools pip pip install -r requirements.txt - run: diff --git a/.github/workflows/notebooks.yml b/.github/workflows/notebooks.yml index dd80db64..d19a2815 100644 --- a/.github/workflows/notebooks.yml +++ b/.github/workflows/notebooks.yml @@ -37,7 +37,7 @@ jobs: - name: Install dependencies run: | - pip install --upgrade pip + pip install pip==21.1.1 pip install -r requirements.txt - name: Test with nbval run: | diff --git a/content/algorithms/assortativity/correlation.md b/content/algorithms/assortativity/correlation.md new file mode 100644 index 00000000..6c75b503 --- /dev/null +++ b/content/algorithms/assortativity/correlation.md @@ -0,0 +1,265 @@ +--- +jupytext: + notebook_metadata_filter: all + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.11.2 +kernelspec: + display_name: Python 3 + language: python + name: python3 +language_info: + codemirror_mode: + name: ipython + version: 3 + file_extension: .py + mimetype: text/x-python + name: python + nbconvert_exporter: python + pygments_lexer: ipython3 + version: 3.8.5 +--- + +# Node assortativity coefficients and correlation measures + +In this tutorial, we will go through the theory of [assortativity](https://en.wikipedia.org/wiki/Assortativity) and its measures. + +Specifically, we'll focus on assortativity measures available in NetworkX at [algorithms/assortativity/correlation.py](https://github.com/networkx/networkx/blob/main/networkx/algorithms/assortativity/correlation.py): +* Attribute assortativity +* Numeric assortativity +* Degree assortativity + +as well as mixing matrices, which are closely releated to assortativity measures. + +## Assortativity + +Assortativity in a network refers to the tendency of nodes to connect with +other 'similar' nodes over 'dissimilar' nodes. + +Here we say that two nodes are 'similar' with respect to a property if they have the same value of that property. Properties can be any structural properties like the degree of a node to other properties like weight, or capacity. + +Based on these properties we can have a different measure of assortativity for the network. +On the other hand, we can also have disassortativity, in which case nodes tend +to connect to dissimilar nodes over similar nodes. + +### Assortativity coefficients + +Let's say we have a network $N$, $N = (V, E)$ where $V$ is the set of nodes in the network and $E$ is the set of edges/directed edges in the network. +In addition, $P(v)$ represents a property for each node $v$. + +#### Mixing matrix + +Let the property $P(v)$ take $P[0],P[1],...P[k-1]$ distinct values on the network, +then the **mixing matrix** is matrix $M$ such that $M[i][j]$ represents the number of edges from +nodes with property $P[i]$ to $P[j]$. +We can normalize mixing matrix by diving by total number of ordered edges i.e. +$ e = \frac{M}{|E|}$. + +Now define, + +$a[i]=$ proportion of edges $(u,v)$ such that $P(u)=P[i]$ + +$$ a[i] = \sum\limits_{j}e[i][j] $$ + +$b[i]=$ proportion of edges $(u,v)$ such that $P(v)=P[i]$ + +$$ b[i] = \sum\limits_{j}e[j][i]$$ + +in Python code it would look something like `a = e.sum(axis=0)` and `b = e.sum(axis=1)` + +Finally, let $\sigma_a$ and $\sigma_b$ represent the standard deviation of +$\{\ P[i]\cdot a[i]\ |\ i \in 0...k-1\}$ and $\{ P[i]\cdot b[i]\ |\ i \in 0...k-1\}$ +respectively. + +Then we can define the assortativity coefficient for this property based on the +Pearson correlation coefficient. + +#### Attribute Assortativity Coefficient + +Here the property $P(v)$ is a nominal property assigned to each node. +As defined above we calculate the normalized mixing matrix $e$ and from that we +define the attribute assortativity coefficient [^1] as below. + +From here onwards we will use subscript notation to denote indexing, for eg. $P_i = P[i]$ and $e_{ij} = e[i][j]$ + +$$ r = \frac{\sum\limits_{i}e_{ii} - \sum\limits_{i}a_{i}b_{i}}{1-\sum\limits_{i}a_{i}b_{i}} = \frac{Trace(e) - ||e^2||}{1-||e^2||}$$ + +It is implemented as `attribute_assortativity_coefficient`. + +#### Numeric Assortativity Coefficient + +Here the property $P(v)$ is a numerical property assigned to each +node and the definition of the normalized mixing +matrix $e$, $\sigma_a$, and $\sigma_b$ are same as above. +From these we define numeric assortativity coefficient [^1] as below. + +$$ r = \frac{\sum\limits_{i,j}P_i P_j(e_{ij} -a_i b_j)}{\sigma_a\sigma_b} $$ + +It is implemented as `numeric_assortativity_coefficient`. + +#### Degree Assortativity Coefficient + +When it comes to measuring degree assortativity for directed networks we have +more options compared to assortativity w.r.t a property because we have 2 types +of degrees, namely in-degree and out-degree. +Based on the 2 types of degrees we can measure $2 \times 2 =4$ different types +of degree assortativity [^2]: + +1. r(in,in) : Measures tendency of having a directed edge (u,v) such that, in-degree(u) = in-degree(v). +2. r(in,out) : Measures tendency of having a directed edge (u,v) such that, in-degree(u) = out-degree(v). +3. r(out,in) : Measures tendency of having a directed edge (u,v) such that, out-degree(u) = in-degree(v). +4. r(out,out) : Measures tendency of having a directed edge (u,v) such that, out-degree(u) = out-degree(v). + +Note: If the network is undirected all the 4 types of degree assortativity are the same. + +To define the degree assortativity coefficient for all 4 types we need slight +modification in the definition of $P[i]$ and $e$, and the definations of +$\sigma_a$ and $\sigma_b$ remain the same. + +Let $x,y \in \{in,out\}$. The property $P(\cdot)$ takes distinct values from +the union of the values taken by $x$-degree$(\cdot)$ and $y$-degree$(\cdot)$, +and $e_{i,j}$ is the proportion of directed edges $(u,v)$ with $x$-degree$(u) = P_i$ +and $y$-degree$(v) = P_j$. + +$$ r(x,y) = \frac{\sum\limits_{i,j}P_i P_j(e_{ij} -a_i b_j)}{\sigma_a\sigma_b} $$ + +It is implemented as `degree_assortativity_coefficient` and +`degree_pearson_correlation_coefficient`. The latter function uses +`scipy.stats.pearsonr` to calculate the assortativity coefficient which makes +it potentally faster. + +## Example + +```{code-cell} ipython3 +%matplotlib inline +import networkx as nx +import matplotlib.pyplot as plt +import pickle +import copy +import random +import warnings +warnings.filterwarnings("ignore") +``` + +Illustrating how value of assortativity changes + +```{code-cell} ipython3 +gname = "g2" +# loading the graph +G = nx.read_graphml(f"data/{gname}.graphml") +with open(f"data/pos_{gname}", "rb") as fp: + pos = pickle.load(fp) +``` + +```{code-cell} ipython3 +fig, axes = plt.subplots(4, 2, figsize=(20, 20)) + +# assign colors and labels to nodes based on their 'cluster' and 'num_prop' property +node_colors = ["orange" if G.nodes[u]["cluster"] == "K5" else "cyan" for u in G.nodes] +node_labels = {u: G.nodes[u]["num_prop"] for u in G.nodes} + +for i in range(8): + g = nx.read_graphml(f"data/{gname}_{i}.graphml") + + # calculating the assortativity coefficients wrt different proeprties + cr = nx.attribute_assortativity_coefficient(g, "cluster") + r_in_out = nx.degree_assortativity_coefficient(g, x="in", y="out") + nr = nx.numeric_assortativity_coefficient(g, "num_prop") + + # drawing the network + nx.draw_networkx_nodes( + g, pos=pos, node_size=300, ax=axes[i // 2][i % 2], node_color=node_colors + ) + nx.draw_networkx_labels(g, pos=pos, labels=node_labels, ax=axes[i // 2][i % 2]) + nx.draw_networkx_edges(g, pos=pos, ax=axes[i // 2][i % 2], edge_color="0.7") + axes[i // 2][i % 2].set_title( + f"Attribute assortativity coefficient = {cr:.3}\nNumeric assortativity coefficient = {nr:.3}\nr(in,out) = {r_in_out:.3}", + size=15, + ) + +fig.tight_layout() +``` + +Nodes are colored by the `cluster` property and labeled by `num_prop` property. +We can observe that the initial network on the left side is completely assortative +and its complement on right side is completely disassortative. +As we add edges between nodes of different (similar) attributes in the assortative +(disassortative) network, the network tends to a non-assortative network and +value of both the assortativity coefficients tends to $0$. + ++++ + +The parameter `nodes` in `attribute_assortativity_coefficient` and +`numeric_assortativity_coefficient` specifies the nodes whose edges are to be +considered in the mixing matrix calculation. +That is to say, if $(u,v)$ is a directed edge then the edge $(u,v)$ will be +used in mixing matrix calculation if $u$ is in `nodes`. +For the undirected case, it's considered if atleast one of the $u,v$ in in `nodes`. + +The `nodes` parameter is interpreted differently in `degree_assortativity_coefficient` and +`degree_pearson_correlation_coefficient`, where it specifies the nodes forming a subgraph +whose edges are considered in the mixing matrix calculation. + +```{code-cell} ipython3 +# list of nodes to consider for the i'th network in the example +# Note: passing 'None' means to consider all the nodes +nodes_list = [ + None, + [str(i) for i in range(3)], + [str(i) for i in range(4)], + [str(i) for i in range(5)], + [str(i) for i in range(4, 8)], + [str(i) for i in range(5, 10)], +] +fig, axes = plt.subplots(3, 2, figsize=(20, 16)) + + +def color_node(u, nodes): + """Utility function to give the color of a node based on its attribute""" + if u not in nodes: + return "0.85" + if G.nodes[u]["cluster"] == "K5": + return "orange" + else: + return "cyan" + + +# adding a edge to show edge cases +G.add_edge("4", "5") + +for nodes, ax in zip(nodes_list, axes.ravel()): + # calculating the value of assortativity + cr = nx.attribute_assortativity_coefficient(G, "cluster", nodes=nodes) + nr = nx.numeric_assortativity_coefficient(G, "num_prop", nodes=nodes) + + # drawing network + ax.set_title( + f"Attribute assortativity coefficient: {cr:.3}\nNumeric assortativity coefficient: {nr:.3}\nNodes = {nodes}", + size=15, + ) + if nodes is None: + nodes = [u for u in G.nodes()] + node_colors = [color_node(u, nodes) for u in G.nodes] + nx.draw_networkx_nodes(G, pos=pos, node_size=450, ax=ax, node_color=node_colors) + nx.draw_networkx_labels(G, pos, labels={u: u for u in G.nodes}, font_size=15, ax=ax) + nx.draw_networkx_edges( + G, + pos=pos, + edgelist=[(u, v) for u, v in G.edges if u in nodes], + ax=ax, + edge_color="0.3", + ) +fig.tight_layout() +``` + +In the above plots only the nodes which are considered are colored and rest are +grayed out and only the edges which are considerd in the assortaivty calculation +are drawn. + ++++ + +[^1]: M. E. J. Newman, Mixing patterns in networks + +[^2]: Foster, J.G., Foster, D.V., Grassberger, P. & Paczuski, M. Edge direction and the structure of networks diff --git a/content/algorithms/assortativity/data/g2.graphml b/content/algorithms/assortativity/data/g2.graphml new file mode 100644 index 00000000..79ea95d3 --- /dev/null +++ b/content/algorithms/assortativity/data/g2.graphml @@ -0,0 +1,177 @@ + + + + + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/algorithms/assortativity/data/g2_0.graphml b/content/algorithms/assortativity/data/g2_0.graphml new file mode 100644 index 00000000..79ea95d3 --- /dev/null +++ b/content/algorithms/assortativity/data/g2_0.graphml @@ -0,0 +1,177 @@ + + + + + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/algorithms/assortativity/data/g2_1.graphml b/content/algorithms/assortativity/data/g2_1.graphml new file mode 100644 index 00000000..c4839d66 --- /dev/null +++ b/content/algorithms/assortativity/data/g2_1.graphml @@ -0,0 +1,167 @@ + + + + + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/algorithms/assortativity/data/g2_2.graphml b/content/algorithms/assortativity/data/g2_2.graphml new file mode 100644 index 00000000..86b5a30f --- /dev/null +++ b/content/algorithms/assortativity/data/g2_2.graphml @@ -0,0 +1,197 @@ + + + + + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/algorithms/assortativity/data/g2_3.graphml b/content/algorithms/assortativity/data/g2_3.graphml new file mode 100644 index 00000000..7f8492d0 --- /dev/null +++ b/content/algorithms/assortativity/data/g2_3.graphml @@ -0,0 +1,189 @@ + + + + + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/algorithms/assortativity/data/g2_4.graphml b/content/algorithms/assortativity/data/g2_4.graphml new file mode 100644 index 00000000..789cf58d --- /dev/null +++ b/content/algorithms/assortativity/data/g2_4.graphml @@ -0,0 +1,227 @@ + + + + + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/algorithms/assortativity/data/g2_5.graphml b/content/algorithms/assortativity/data/g2_5.graphml new file mode 100644 index 00000000..2e96d73e --- /dev/null +++ b/content/algorithms/assortativity/data/g2_5.graphml @@ -0,0 +1,222 @@ + + + + + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/algorithms/assortativity/data/g2_6.graphml b/content/algorithms/assortativity/data/g2_6.graphml new file mode 100644 index 00000000..c8e1b7ee --- /dev/null +++ b/content/algorithms/assortativity/data/g2_6.graphml @@ -0,0 +1,277 @@ + + + + + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 1 + K5 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + 2 + K10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/algorithms/assortativity/data/g2_7.graphml b/content/algorithms/assortativity/data/g2_7.graphml new file mode 100644 index 00000000..852b3fb4 --- /dev/null +++ b/content/algorithms/assortativity/data/g2_7.graphml @@ -0,0 +1,277 @@ + + + + + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K5 + 1 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + K10 + 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/algorithms/assortativity/data/pos_g2 b/content/algorithms/assortativity/data/pos_g2 new file mode 100644 index 00000000..d55ceff8 Binary files /dev/null and b/content/algorithms/assortativity/data/pos_g2 differ diff --git a/content/algorithms/dag/data/clothing_graph.graphml b/content/algorithms/dag/data/clothing_graph.graphml new file mode 100644 index 00000000..dcadfa13 --- /dev/null +++ b/content/algorithms/dag/data/clothing_graph.graphml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/content/algorithms/dag/index.md b/content/algorithms/dag/index.md new file mode 100644 index 00000000..354e217f --- /dev/null +++ b/content/algorithms/dag/index.md @@ -0,0 +1,353 @@ +--- +jupytext: + notebook_metadata_filter: all + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.11.1 +kernelspec: + display_name: Python 3 + language: python + name: python3 +language_info: + codemirror_mode: + name: ipython + version: 3 + file_extension: .py + mimetype: text/x-python + name: python + nbconvert_exporter: python + pygments_lexer: ipython3 + version: 3.9.2 +--- + +# Directed Acyclic Graphs & Topological Sort + +In this tutorial, we will explore the algorithms related to a directed acyclic graph +(or a "dag" as it is sometimes called) implemented in networkx under `networkx/algorithms/dag.py`. + +First of all, we need to understand what a directed graph is. + +## Directed Graph + +### Example + +```{code-cell} ipython3 +%matplotlib inline +import networkx as nx +import matplotlib.pyplot as plt +``` + +```{code-cell} ipython3 +triangle_graph = nx.from_edgelist([(1, 2), (2, 3), (3, 1)], create_using=nx.DiGraph) +``` + +```{code-cell} ipython3 +nx.draw_planar(triangle_graph, + with_labels=True, + node_size=1000, + node_color="#ffff8f", + width=0.8, + font_size=14, +) +``` + +### Definition + +In mathematics, and more specifically in graph theory, +a directed graph (or DiGraph) is a graph that is made up of a set of vertices +connected by directed edges often called arcs. +Edges here have _directionality_, which stands in contrast to undirected graphs +where, semantically, edges have no notion of a direction to them. +Directed acyclic graphs take this idea further; +by being _acyclic_, they have no _cycles_ in them. +You will see this idea in action in the examples below. + +## Directed Acyclic Graph + +### Example + +```{code-cell} ipython3 +clothing_graph = nx.read_graphml(f"data/clothing_graph.graphml") +``` + +```{code-cell} ipython3 +plt.figure(figsize=(12, 12), dpi=150) + +nx.draw_planar(clothing_graph, + arrowsize=12, + with_labels=True, + node_size=8000, + node_color="#ffff8f", + linewidths=2.0, + width=1.5, + font_size=14, +) +``` + +Here is a fun example of Professor Bumstead, +who has a routine for getting dressed in the morning. +By habit, the professor dons certain garments before others (e.g., socks before shoes). +Other items may be put on in any order (e.g., socks and pants). + +A directed edge $(u, v)$ in the example indicates that garment $u$ +must be donned before garment $v$. + +In this example, the `clothing_graph` is a DAG. + +```{code-cell} ipython3 +nx.is_directed_acyclic_graph(clothing_graph) +``` + +By contrast, the `triangle_graph` is not a DAG. + +```{code-cell} ipython3 +nx.is_directed_acyclic_graph(triangle_graph) +``` + +This is because the `triangle_graph` has a cycle: + +```{code-cell} ipython3 +nx.find_cycle(triangle_graph) +``` + +### Applications + +Directed acyclic graphs representations of partial orderings have many applications in scheduling +of systems of tasks with ordering constraints. +An important class of problems of this type concern collections of objects that need to be updated, +for example, calculating the order of cells of a spreadsheet to update after one of the cells has been changed, +or identifying which object files of software to update after its source code has been changed. +In these contexts, we use a dependency graph, which is a graph that has a vertex for each object to be updated, +and an edge connecting two objects whenever one of them needs to be updated earlier than the other. +A cycle in this graph is called a circular dependency, and is generally not allowed, +because there would be no way to consistently schedule the tasks involved in the cycle. +Dependency graphs without circular dependencies form DAGs. + +A directed acyclic graph may also be used to represent a network of processing elements. +In this representation, data enters a processing element through its incoming edges +and leaves the element through its outgoing edges. +For instance, in electronic circuit design, static combinational logic blocks +can be represented as an acyclic system of logic gates that computes a function of an input, +where the input and output of the function are represented as individual bits. + +### Definition + +A directed acyclic graph ("DAG" or "dag") is a directed graph with no directed cycles. +That is, it consists of vertices and edges (also called arcs), with each edge directed from one vertex to another, +such that following those directions will never form a closed loop. + +A directed graph is a DAG if and only if it can be topologically ordered +by arranging the vertices as a linear ordering that is consistent with all edge directions. + +## Topological sort + +Let's now introduce what the topological sort is. + +### Example + +```{code-cell} ipython3 +list(nx.topological_sort(clothing_graph)) +``` + +### Applications + +The canonical application of topological sorting is in scheduling a sequence of jobs +or tasks based on their dependencies. +The jobs are represented by vertices, and there is an edge from $u$ to $v$ +if job $u$ must be completed before job $v$ can be started +(for example, when washing clothes, the washing machine must finish before we put the clothes in the dryer). +Then, a topological sort gives an order in which to perform the jobs. + +A closely related application of topological sorting algorithms +was first studied in the early 1960s in the context of the +[PERT technique](https://en.wikipedia.org/wiki/Program_evaluation_and_review_technique) +for scheduling in project management. +In this application, the vertices of a graph represent the milestones of a project, +and the edges represent tasks that must be performed between one milestone and another. +Topological sorting forms the basis of linear-time algorithms for finding +the critical path of the project, a sequence of milestones and tasks that controls +the length of the overall project schedule. + +In computer science, applications of this type arise in instruction scheduling, +ordering of formula cell evaluation when recomputing formula values in spreadsheets, +logic synthesis, determining the order of compilation tasks to perform in makefiles, +data serialization, and resolving symbol dependencies in linkers. +It is also used to decide in which order to load tables with foreign keys in databases. + +### Definition + +A topological sort of a directed acyclic graph $G = (V, E)$ is a linear ordering of all its vertices +such that if $G$ contains an edge $(u, v)$, then $u$ appears before $v$ in the ordering. + +It is worth noting that if the graph contains a cycle, then no linear ordering is possible. + +It is useful to view a topological sort of a graph as an ordering of its vertices +along a horizontal line so that all directed edges go from left to right. + +### Kahn's algorithm + +NetworkX uses Kahn's algorithm to perform topological sorting. +We will introduce it briefly here. + +First, find a list of "start nodes" which have no incoming edges and insert them into a set S; +at least one such node must exist in a non-empty acyclic graph. Then: + +``` +L <- Empty list that will contain the sorted elements +S <- Set of all nodes with no incoming edge + +while S is not empty do + remove a node N from S + add N to L + for each node M with an edge E from N to M do + remove edge E from the graph + if M has no other incoming edges then + insert M into S + +if graph has edges then + return error # graph has at least one cycle +else + return L # a topologically sorted order +``` + +### NetworkX implementation + +Finally, let's take a look at how the topological sorting is implemented in NetworkX. + +We can see that Kahn's algorithm _stratifies_ the graph such that each level contains all the nodes +whose dependencies have been satisfied by the nodes in a previous level. +In other words, Kahn's algorithm does something like: + - Take all the nodes in the DAG that don't have any dependencies and put them in list. + - "Remove" those nodes from the DAG. + - Repeat the process, creating a new list at each step. +Thus, topological sorting is reduced to correctly stratifying the graph in this way. + +This procedure is implemented in the `topological_generations()` function, on which the `topological_sort()` function is based. + +Let's see how the `topological_generations()` function is implemented in NetworkX step by step. + +#### Step 1. Initialize indegrees. + +Since in Kahn's algorithm we are only interested in the indegrees of the vertices, +in order to preserve the structure of the graph as it is passed in, +instead of removing the edges, we will decrease the indegree of the corresponding vertex. +Therefore, we will save these values in a separate _dictionary_ `indegree_map`. + +``` +indegree_map = {v: d for v, d in G.in_degree() if d > 0} +``` + +#### Step 2. Initialize first level. + +At each step of Kahn's algorithm, we seek out vertices with an in-degree of zero. +In preparation for the first loop iteration of the algorithm, +we can initialize a list called `zero_indegree` that houses these nodes: + +``` +zero_indegree = [v for v, d in G.in_degree() if d == 0] +``` + +#### Step 3. Move from one level to the next. + +Now, we will show how the algorithm moves from one level to the next. + +Inside the loop, the first generation to be considered (`this_generation`) +is the collection of nodes that have zero in-degrees. + +We process all the vertices of the current level in variable `this_generation` +and we store the next level in variable `zero_degree`. + +For each vertex inside `this_generation`, +we remove all of its outgoing edges. + +Then, if the input degree of some vertex is zeroed as a result, +then we add it to the next level `zero_indegree` +and remove it from the `indegree_map` dictionary. + +After we have processed all of the nodes inside `this_generation`, we can yield it. + +``` +while zero_indegree: + this_generation = zero_indegree + zero_indegree = [] + for node in this_generation: + for child in G.neighbors(node): + indegree_map[child] -= 1 + + if indegree_map[child] == 0: + zero_indegree.append(child) + del indegree_map[child] + + yield this_generation +``` + +#### Step 4. Check if there is a cycle in the graph. + +If, after completing the loop there are still vertices in the graph, +then there is a cycle in it and the graph is not a DAG. + +``` +if indegree_map: + raise nx.NetworkXUnfeasible( + "Graph contains a cycle or graph changed during iteration" + ) +``` + +#### Addendum: Topological sort works on multigraphs as well. + +This is possible to do by slightly modifying the algorithm above. + +* Firstly, check if `G` is a multigraph + ``` + multigraph = G.is_multigraph() + ``` + +* Then, replace + ``` + indegree_map[child] -= 1 + ``` + with + ``` + indegree_map[child] -= len(G[node][child]) if multigraph else 1 + ``` + +#### Addendum: The graph may have changed during the iteration. + +Between passing different levels in a topological sort, the graph could change. +We need to check this while the `while` loop is running. + +* To do this, just replace + ``` + for node in this_generation: + for child in G.neighbors(node): + indegree_map[child] -= 1 + ``` + with + ``` + for node in this_generation: + if node not in G: + raise RuntimeError("Graph changed during iteration") + for child in G.neighbors(node): + try: + indegree_map[child] -= 1 + except KeyError as e: + raise RuntimeError("Graph changed during iteration") from e + ``` + +#### Combine all steps. + +Combining all of the above gives the current implementation of the `topological_generations()` function in NetworkX. + +```{code-cell} ipython3 +import inspect + +print(inspect.getsource(nx.topological_generations)) +``` + +Let's finally see what the result will be on the `clothing_graph`. + +```{code-cell} ipython3 +list(nx.topological_generations(clothing_graph)) +``` diff --git a/content/algorithms/index.md b/content/algorithms/index.md new file mode 100644 index 00000000..6e945174 --- /dev/null +++ b/content/algorithms/index.md @@ -0,0 +1,12 @@ +# Algorithms + +A closer look at some of the algorithms and network analysis techniques +provided by NetworkX. + +```{toctree} +--- +maxdepth: 1 +--- +assortativity/correlation +dag/index +``` diff --git a/content/generators/geometric.md b/content/generators/geometric.md index 771d1abd..759b6034 100644 --- a/content/generators/geometric.md +++ b/content/generators/geometric.md @@ -22,7 +22,7 @@ language_info: version: 3.7.4 --- -# Tutorial: Geometric Generator Models +# Geometric Generator Models In this tutorial, we'll explore the geometric network generator models implemented in networkx under networkx/generators/geometric.py and apply them @@ -186,43 +186,6 @@ mpl_params = { plt.rcParams.update(mpl_params) ``` -**Aside: drawing graphs with many edges** - -By default, the matplotlib-based drawing functions in the `nx_pylab` module -use `FancyArrowPatch` objects to represent edges. -`FancyArrowPatch` is quite flexible, supporting many different methods for -drawing curves and different arrow types for representing directed edges. -However, drawing many `FancyArrowPatch` objects can be quite slow, so the -drawing time can be prohibitively long for graphs with more than ~1000 edges. -For graphs with many edges, you can instead use more performant matplotlib -objects for representing graph edges, such as `LineCollection`. -We will define a helper function called `draw_edges_fast` to use instead of the -usual `draw_networkx_edges`, as some of the graphs examined below have -more than 10,000 edges. - -```{code-cell} ipython3 -from matplotlib.collections import LineCollection - -def draw_edges_fast(G, pos, ax, **lc_kwargs): - """ - Return a LineCollection representing the edges of G. - - Parameters - ---------- - G : networkx.Graph - Graph whose edges will be drawn - pos : dict - A mapping of node to positions - ax : matplotlib.axes.Axes - The axes to which the LineCollection will be added - lc_kwargs : dict - All other keyword arguments are passed through to LineCollection - """ - edge_pos = np.array([(pos[e[0]], pos[e[1]]) for e in G.edges()]) - edge_collection = LineCollection(edge_pos, **lc_kwargs) - ax.add_collection(edge_collection) -``` - Next, we load the data and construct the graph. ```{code-cell} ipython3 @@ -255,7 +218,7 @@ plotting options for consistent visualizations. ```{code-cell} ipython3 node_opts = {"node_size": 50, "node_color": "r", "alpha": 0.4} -edge_opts = {"color": "k", "zorder": 0} +edge_opts = {"edge_color": "k"} ``` ## Random Geometric Graphs @@ -273,7 +236,7 @@ radii = (0, 0.1, 0.2, 0.3) for r, ax, alpha, lw in zip(radii, axes.ravel(), alphas, linewidths): RGG = nx.random_geometric_graph(nodes, radius=r, pos=pos) nx.draw_networkx_nodes(G, pos=pos, ax=ax, **node_opts) - draw_edges_fast(RGG, pos=pos, ax=ax, alpha=alpha, linewidth=lw, **edge_opts) + nx.draw_networkx_edges(RGG, pos=pos, ax=ax, alpha=alpha, width=lw, **edge_opts) ax.set_title(f"$r = {r}$, {RGG.number_of_edges()} edges") fig.tight_layout() ``` @@ -282,7 +245,7 @@ fig.tight_layout() # Make edge visualization more prominent (and consistent) for the following # examples edge_opts["alpha"] = 0.8 -edge_opts["linewidth"] = 0.2 +edge_opts["width"] = 0.2 ``` ## Geographical Threshold Graphs @@ -309,7 +272,7 @@ for (name, metric), ax in zip(distance_metrics.items(), axes.ravel()): nodes, 0.1, pos=pos, weight=weight, metric=metric ) nx.draw_networkx_nodes(G, pos=pos, ax=ax, **node_opts) - draw_edges_fast(GTG, pos=pos, ax=ax, **edge_opts) + nx.draw_networkx_edges(GTG, pos=pos, ax=ax, **edge_opts) ax.set_title(f"{name}\n{GTG.number_of_edges()} edges") fig.tight_layout() ``` @@ -331,7 +294,7 @@ for (name, p_dist), ax in zip(p_dists.items(), axes.ravel()): nodes, 0.01, pos=pos, weight=weight, metric=dist, p_dist=p_dist ) nx.draw_networkx_nodes(G, pos=pos, ax=ax, **node_opts) - draw_edges_fast(GTG, pos=pos, ax=ax, **edge_opts) + nx.draw_networkx_edges(GTG, pos=pos, ax=ax, **edge_opts) ax.set_title(f"{name}\n{GTG.number_of_edges()} edges") fig.tight_layout() ``` @@ -349,13 +312,13 @@ fig, axes = plt.subplots(1, 3) pdfs = { "default": None, # default: exponential distribution with `lambda=1` - "exp(-10*d)": lambda d: math.exp(-10*d), + r"$e^{-10d}$": lambda d: math.exp(-10*d), "norm": norm(loc=0.1, scale=0.1).pdf, } for (title, pdf), ax in zip(pdfs.items(), axes.ravel()): SRGG = nx.soft_random_geometric_graph(nodes, 0.1, pos=pos, p_dist=pdf) nx.draw_networkx_nodes(G, pos=pos, ax=ax, **node_opts) - draw_edges_fast(SRGG, pos=pos, ax=ax, **edge_opts) + nx.draw_networkx_edges(SRGG, pos=pos, ax=ax, **edge_opts) ax.set_title(f"p_dist={title}\n{SRGG.number_of_edges()} edges") fig.tight_layout() ``` @@ -376,7 +339,7 @@ for thresh, ax in zip(thresholds, axes): nodes, 0.1, thresh, pos=pos, weight=weight ) nx.draw_networkx_nodes(G, pos=pos, ax=ax, **node_opts) - draw_edges_fast(TRGG, pos=pos, ax=ax, **edge_opts) + nx.draw_networkx_edges(TRGG, pos=pos, ax=ax, **edge_opts) ax.set_title(f"Threshold = {thresh}, {TRGG.number_of_edges()} edges") fig.tight_layout() ``` diff --git a/content/generators/index.md b/content/generators/index.md new file mode 100644 index 00000000..cc311f17 --- /dev/null +++ b/content/generators/index.md @@ -0,0 +1,11 @@ +# Graph Generators + +A closer look at the functions provided by NetworkX to create interesting +graphs. + +```{toctree} +--- +maxdepth: 1 +--- +geometric +``` diff --git a/requirements.txt b/requirements.txt index 603342cc..dc418e37 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,11 @@ matplotlib +scipy pygraphviz nbval git+git://github.com/networkx/networkx@main sphinx myst-nb -pydata-sphinx-theme==0.5.2 +sphinx-book-theme jupytext pandas numpy \ No newline at end of file diff --git a/site/_templates/beta_banner.html b/site/_templates/beta_banner.html deleted file mode 100644 index da519dfb..00000000 --- a/site/_templates/beta_banner.html +++ /dev/null @@ -1,5 +0,0 @@ -{# Create a banner at the top of the page warning users that the site is experimental #} -
-

Warning

-

This site is currently experimental; the content and URLs may change or be removed!

-
diff --git a/site/_templates/layout.html b/site/_templates/layout.html deleted file mode 100644 index d5c1987c..00000000 --- a/site/_templates/layout.html +++ /dev/null @@ -1,6 +0,0 @@ -{% extends "!layout.html" %} - -{% block docs_body %} - {% include "beta_banner.html" %} - {{ super() }} -{% endblock %} diff --git a/site/conf.py b/site/conf.py index 40d5e810..635533e3 100644 --- a/site/conf.py +++ b/site/conf.py @@ -45,33 +45,31 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'pydata_sphinx_theme' +html_theme = 'sphinx_book_theme' html_title = 'NetworkX Notebooks' html_logo = '_static/networkx_logo.svg' # html_favicon html_theme_options = { - "icon_links": [ - { - "name": "GitHub", - "url": "https://github.com/networkx/nx-guides/", - "icon": "fab fa-github-square", - }, - { - "name": "Binder", - "url": "https://mybinder.org/v2/gh/networkx/nx-guides/main?urlpath=lab/tree/content", - "icon": "fas fa-rocket", - }, - ], + "github_url": "https://github.com/networkx/nx-guides/", + "repository_url": "https://github.com/networkx/nx-guides/", + "repository_branch": "main", + "use_repository_button": True, + "use_issues_button": True, "use_edit_page_button": True, -} -html_context = { - "github_user": "networkx", - "github_repo": "nx-guides", - "github_version": "main", - "doc_path": "site", + "path_to_docs": "site/", + "launch_buttons": { + "binderhub_url": "https://mybinder.org", + }, } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] + +# -- Options for MyST-NB configuration ----------------------------------- + +# Bump up per cell execution timeout to 300 seconds (from default 30 seconds) +execution_timeout = 300 + + diff --git a/site/index.md b/site/index.md index 8f15f1ab..e9571a4d 100644 --- a/site/index.md +++ b/site/index.md @@ -36,6 +36,7 @@ maxdepth: 1 --- content/tutorial -content/generators/geometric +content/algorithms/index +content/generators/index ```