CybercentreCanada · cccs-eric · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023
diff --git a/README.md b/README.md
@@ -1,5 +1,9 @@
 # jupyterlab-sql-editor
 
+## User Guide
+
+A user guide is hosted [here](https://cybercentrecanada.github.io/jupyterlab-sql-editor/).
+
 ## Installation
 
 Follow the installation instructions in [CONTRIBUTING](./CONTRIBUTING.md)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -83,3 +83,12 @@ stages:
         pip install twine
         python -m twine upload --skip-existing -r "cccs-pypi" --config-file $(PYPIRC_PATH) $(Pipeline.Workspace)/dist/* --verbose
       displayName: 'Publish artifact to PyPI.'
+  - job: PublishDocumentationToGithub
+    dependsOn: BuildPythonArtifact
+    steps:
+    - checkout: self
+      persistCredentials: true
+    - script: |
+        pip install mkdocs
+        mkdocs gh-deploy --force
+      displayName: 'Publish documentation site to GitHub.'
diff --git a/example/Spark.ipynb → docs/example/SparkConfigurationUsage.ipynb b/example/Spark.ipynb → docs/example/SparkConfigurationUsage.ipynb
@@ -1,5 +1,13 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c9b06840",
+   "metadata": {},
+   "source": [
+    "# Configuration and Usage"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -15,7 +23,7 @@
    },
    "outputs": [],
    "source": [
-    "from pyspark.sql import SparkSession"
+    "from pyspark.sql import SparkSession\n"
    ]
   },
   {
@@ -44,7 +52,7 @@
    "id": "5fe87250-6936-4de6-bacf-0490a5812516",
    "metadata": {},
    "source": [
-    "    Normally IPython only displays the output of the last statement. However it can be handy to run multiple sql magics in a single cell and see the output of each execution. Setting `ast_node_interactivity` to `all` will enable that.\n"
+    "Normally IPython only displays the output of the last statement. However it can be handy to run multiple sql magics in a single cell and see the output of each execution. Setting `ast_node_interactivity` to `all` will enable that.\n"
    ]
   },
   {
@@ -63,7 +71,7 @@
    "outputs": [],
    "source": [
     "from IPython.core.interactiveshell import InteractiveShell\n",
-    "InteractiveShell.ast_node_interactivity = 'all'"
+    "InteractiveShell.ast_node_interactivity = 'all'\n"
    ]
   },
   {
@@ -82,7 +90,7 @@
    },
    "outputs": [],
    "source": [
-    "%load_ext jupyterlab_sql_editor.ipython_magic.sparksql"
+    "%load_ext jupyterlab_sql_editor.ipython_magic.sparksql\n"
    ]
   },
   {
@@ -101,7 +109,7 @@
    "outputs": [],
    "source": [
     "%config SparkSql.cacheTTL=3600\n",
-    "%config SparkSql.outputFile=\"/tmp/sparkdb.schema.json\""
+    "%config SparkSql.outputFile=\"/tmp/sparkdb.schema.json\"\n"
    ]
   },
   {
@@ -143,7 +151,7 @@
    "source": [
     "df = spark.read.json(\"file:/path/to/contacts.json\")\n",
     "df.createOrReplaceTempView(\"CONTACTS_TABLE\")\n",
-    "df.printSchema()"
+    "df.printSchema()\n"
    ]
   },
   {
@@ -179,7 +187,7 @@
    "source": [
     "df = spark.read.json(\"file:/path/to/conversations.json\")\n",
     "df.createOrReplaceTempView(\"MESSAGES_TABLE\")\n",
-    "df.printSchema()"
+    "df.printSchema()\n"
    ]
   },
   {
@@ -207,7 +215,7 @@
     }
    ],
    "source": [
-    "%sparksql --refresh all"
+    "%sparksql --refresh all\n"
    ]
   },
   {
@@ -280,7 +288,7 @@
     }
    ],
    "source": [
-    "%sparksql SHOW TABLES"
+    "%sparksql SHOW TABLES\n"
    ]
   },
   {
@@ -563,15 +571,20 @@
     "SELECT\n",
     "    *\n",
     "FROM\n",
-    "    contacts_table AS con"
+    "    contacts_table AS con\n"
    ]
   },
   {
    "cell_type": "markdown",
    "id": "07b3ac2f-3750-4c24-84e7-27c61d3a131d",
    "metadata": {},
    "source": [
-    "# Create a temporary view with the --view option"
+    "# Efficient query result reuse across cells\n",
+    "\n",
+    "A popular use-case is about capturing the output of a cell and use it later in another query.  This can be done using a view.  Here are the steps required to create a view and then referencing it \n",
+    "vi `sparksql`.\n",
+    "\n",
+    "## Create a temporary view with the --view option"
    ]
   },
   {
@@ -601,7 +614,7 @@
    "source": [
     "%%sparksql --view the_exploded_table --output skip\n",
     "SELECT\n",
-    "    *, \n",
+    "    *,\n",
     "    explode(con.phoneNumbers) as phoneNumber\n",
     "FROM\n",
     "    contacts_table AS con\n"
@@ -678,15 +691,15 @@
     }
    ],
    "source": [
-    "%sparksql SHOW TABLES"
+    "%sparksql SHOW TABLES\n"
    ]
   },
   {
    "cell_type": "markdown",
    "id": "4f145195-e8c9-4771-a4ae-0ffbe479234c",
    "metadata": {},
    "source": [
-    "# Use temporary view in subsequent queries with autocomplet suggestions"
+    "## Use temporary view in subsequent queries with autocomplete suggestions"
    ]
   },
   {
@@ -1073,7 +1086,7 @@
    "source": [
     "%%sparksql --dataframe the_exploded_dataframe --output skip\n",
     "SELECT\n",
-    "    *, \n",
+    "    *,\n",
     "    explode(con.phoneNumbers) as phoneNumber\n",
     "FROM\n",
     "    contacts_table AS con\n"
@@ -1116,7 +1129,7 @@
     }
    ],
    "source": [
-    "the_exploded_dataframe.select('phoneNumber').show()"
+    "the_exploded_dataframe.select('phoneNumber').show()\n"
    ]
   },
   {
@@ -1173,7 +1186,7 @@
     "    contacts_table AS con\n",
     "--end-sparksql\n",
     "'''\n",
-    "print(sql)"
+    "print(sql)\n"
    ]
   },
   {
@@ -1187,13 +1200,13 @@
     "sql = '''\n",
     "--start-sparksql\n",
     "SELECT\n",
-    "    *, \n",
+    "    *,\n",
     "    explode(con.phoneNumbers) as phoneNumber\n",
     "FROM\n",
     "    contacts_table AS con\n",
     "--end-sparksql\n",
     "'''\n",
-    "print(sql)"
+    "print(sql)\n"
    ]
   },
   {
@@ -1225,7 +1238,7 @@
     }
    ],
    "source": [
-    "spark.sql(sql).show()"
+    "spark.sql(sql).show()\n"
    ]
   },
   {
@@ -1290,7 +1303,7 @@
     }
    ],
    "source": [
-    "%%sparksql?"
+    "%%sparksql?\n"
    ]
   },
   {
@@ -1389,7 +1402,7 @@
     "    TRANSFORM(SEQUENCE(1, 512), x -> rand()) AS data -- array of 512 floats\n",
     "FROM\n",
     "RANGE\n",
-    "    (1, 400000, 1, 100) \n",
+    "    (1, 400000, 1, 100)\n",
     "UNION\n",
     "SELECT\n",
     "    id,\n",
@@ -1401,7 +1414,7 @@
     "    TRANSFORM(SEQUENCE(1, 512), x -> rand()) AS data -- array of 512 floats\n",
     "FROM\n",
     "RANGE\n",
-    "    (1, 40000, 1, 100) \n"
+    "    (1, 40000, 1, 100)\n"
    ]
   },
   {
@@ -1484,14 +1497,6 @@
     "    contacts_table AS con\n",
     "    INNER JOIN messages_table AS mes ON mes.`first Name` = con.`first Name`\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "39fbee24-f69b-474a-903d-bf38d170ee0d",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/example/SparkDataframe.ipynb → docs/example/SparkDataframe.ipynb b/example/SparkDataframe.ipynb → docs/example/SparkDataframe.ipynb
@@ -13,7 +13,7 @@
     "import ipywidgets as widgets\n",
     "out = widgets.Output()\n",
     "with out:\n",
-    "    spark = SparkSession.builder.getOrCreate()"
+    "    spark = SparkSession.builder.getOrCreate()\n"
    ]
   },
   {
@@ -37,7 +37,7 @@
    ],
    "source": [
     "df = spark.sql(\"SELECT id, uuid() FROM RANGE (1, 1000)\")\n",
-    "df"
+    "df\n"
    ]
   },
   {
@@ -51,7 +51,7 @@
    "source": [
     "from jupyterlab_sql_editor.ipython.sparkdf import register_display\n",
     "from jupyterlab_sql_editor.outputters.outputters import _display_results\n",
-    "register_display()"
+    "register_display()\n"
    ]
   },
   {
@@ -114,7 +114,7 @@
    "source": [
     "# change default display behaviour\n",
     "df = spark.sql(\"SELECT id, uuid() FROM RANGE (1, 1000)\")\n",
-    "df"
+    "df\n"
    ]
   },
   {
@@ -126,7 +126,7 @@
    },
    "outputs": [],
    "source": [
-    "pdf = df.limit(1).toPandas()"
+    "pdf = df.limit(1).toPandas()\n"
    ]
   },
   {
@@ -156,7 +156,7 @@
    ],
    "source": [
     "# _display_results lets you configure the output\n",
-    "_display_results(pdf, output=\"html\", show_nonprinting=False)"
+    "_display_results(pdf, output=\"html\", show_nonprinting=False)\n"
    ]
   },
   {
@@ -181,7 +181,7 @@
     }
    ],
    "source": [
-    "_display_results(pdf, output=\"text\")"
+    "_display_results(pdf, output=\"text\")\n"
    ]
   },
   {
@@ -210,16 +210,8 @@
    ],
    "source": [
     "df = spark.read.json(\"file:/path/to/contacts.json\")\n",
-    "_display_results(pdf, output=\"json\")"
+    "_display_results(pdf, output=\"json\")\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1447b3e6-955b-4269-bc04-6395a9673036",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/example/SparkSQLEscapeControlChars.ipynb → .../example/SparkSQLEscapeControlChars.ipynb b/example/SparkSQLEscapeControlChars.ipynb → .../example/SparkSQLEscapeControlChars.ipynb
@@ -1,5 +1,13 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "8ba62d82",
+   "metadata": {},
+   "source": [
+    "# Escaping Control Characters"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -13,7 +21,7 @@
     "\n",
     "spark = SparkSession.builder.getOrCreate()\n",
     "\n",
-    "%load_ext jupyterlab_sql_editor.ipython_magic.sparksql"
+    "%load_ext jupyterlab_sql_editor.ipython_magic.sparksql\n"
    ]
   },
   {
@@ -53,7 +61,7 @@
     "    '\\\\\\\\t' AS two_backslash_and_t,\n",
     "    '\\\\\\\\\\t' AS two_backslash_and_tab\n",
     "--end-sparksql\n",
-    "''').show()"
+    "''').show()\n"
    ]
   },
   {
@@ -191,14 +199,6 @@
     "    '\\\\\\\\t' AS two_backslash_and_t,\n",
     "    '\\\\\\\\\\t' AS two_backslash_and_tab\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "383afdca",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {