Skip to content

Commit

Permalink
Merge pull request #12 from mloncode/download-notebook
Browse files Browse the repository at this point in the history
Improve download notebook
  • Loading branch information
m09 authored Jan 28, 2020
2 parents 4d66d44 + 3c231c5 commit f4bca02
Showing 1 changed file with 26 additions and 27 deletions.
53 changes: 26 additions & 27 deletions notebooks/Download repositories.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
"outputs": [],
"source": [
"from logging import getLogger\n",
"from os import makedirs\n",
"from os.path import join as path_join\n",
"from pathlib import Path\n",
"\n",
"from coloredlogs import install as coloredlogs_install\n",
"\n",
Expand All @@ -37,16 +36,17 @@
"logger = getLogger(\"downloader\")\n",
"\n",
"\n",
"git_data_dir = path_join(\"/devfest\", \"repos\", \"git-data\")\n",
"makedirs(git_data_dir, exist_ok=True)\n",
"repos_json = path_join(git_data_dir, \"repos.json\")"
"repos_dir = Path(\"repos\")\n",
"git_data_dir = repos_dir / \"git-data\"\n",
"git_data_dir.mkdir(parents=True, exist_ok=True)\n",
"repos_json = repos_dir / \"repos.json\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To use GitHub API, we need a token. Please create one in your [GitHub account settings](https://github.com/settings/tokens) (the basic permissions are fine), and fill it here:"
"To use GitHub API, we need a token. Normally it has been made available through the `GITHUB_TOKEN` environment variable. Let's check!"
]
},
{
Expand All @@ -55,7 +55,10 @@
"metadata": {},
"outputs": [],
"source": [
"TOKEN = "
"from os import environ\n",
"TOKEN = environ.get(\"GITHUB_TOKEN\")\n",
"if TOKEN is None:\n",
" logger.critical(\"Could not find GITHUB_TOKEN\")"
]
},
{
Expand All @@ -66,32 +69,31 @@
"source": [
"from json import dump as json_dump\n",
"from operator import itemgetter\n",
"from re import compile as re_compile\n",
"from re import search as re_search\n",
"from typing import Any, Dict, List, Optional\n",
"\n",
"import requests\n",
"from tqdm import tqdm_notebook as tqdm\n",
"\n",
"\n",
"next_pattern = re_compile('<(https://api.github.com/user/[^/]+/repos\\?[^>]*page=\\d+[^>]*)>; rel=\"next\"')\n",
"last_pattern = re_compile('<https://api.github.com/user/[^/]+/repos\\?[^>]*page=(\\d+)[^>]*>; rel=\"last\"')\n",
"\n",
"\n",
"def parse_next(link_header: str) -> Optional[str]:\n",
" match = next_pattern.search(link_header)\n",
" return match.group(1) if match is not None else None\n",
"\n",
"\n",
"def parse_last(link_header: str) -> Optional[int]:\n",
" match = last_pattern.search(link_header)\n",
" return int(match.group(1)) if match is not None else None\n",
"\n",
"\n",
"def list_repositories(user: str,\n",
" token: str,\n",
" max_size_mb: int,\n",
" repos_number: int\n",
" ) -> List[Dict[str, Any]]:\n",
"\n",
" def parse_last(link_header: str) -> Optional[int]:\n",
" match = re_search(\n",
" r'<'\n",
" r'https://api.github.com/user/'\n",
" r'[^/]+/repos\\?[^>]*page='\n",
" r'(\\d+)'\n",
" r'[^>]*>; rel=\"last\"',\n",
" link_header)\n",
" if match is None:\n",
" return None\n",
" return int(match.group(1))\n",
"\n",
" repos_list_headers = dict(Authorization=\"token %s\" % token)\n",
" repos_url = \"https://api.github.com/users/%s/repos\" % user\n",
"\n",
Expand All @@ -100,13 +102,10 @@
" total_pages = parse_last(request_total.headers[\"Link\"])\n",
" assert total_pages is not None\n",
"\n",
" def get_page_url(page: int):\n",
" return \"%s?page=%d\" % (repos_url, page)\n",
"\n",
" logger.info(\"Retrieving repos list for user %s\" % user)\n",
" repos = []\n",
" for page in tqdm(range(1, total_pages + 1)):\n",
" request = requests.get(get_page_url(page),\n",
" request = requests.get(\"%s?page=%d\" % (repos_url, page),\n",
" headers=repos_list_headers)\n",
" request.raise_for_status()\n",
" for repo in request.json():\n",
Expand Down Expand Up @@ -222,7 +221,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.7.6"
}
},
"nbformat": 4,
Expand Down

0 comments on commit f4bca02

Please sign in to comment.