Skip to content

Commit e9859cc

Browse files
committed
[bin] Add gh-md-toc
1 parent 34b033f commit e9859cc

File tree

2 files changed

+367
-0
lines changed

2 files changed

+367
-0
lines changed

bin/gh-md-toc

Lines changed: 361 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,361 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Steps:
5+
#
6+
# 1. Download corresponding html file for some README.md:
7+
# curl -s $1
8+
#
9+
# 2. Discard rows where no substring 'user-content-' (github's markup):
10+
# awk '/user-content-/ { ...
11+
#
12+
# 3.1 Get last number in each row like ' ... </span></a>sitemap.js</h1'.
13+
# It's a level of the current header:
14+
# substr($0, length($0), 1)
15+
#
16+
# 3.2 Get level from 3.1 and insert corresponding number of spaces before '*':
17+
# sprintf("%*s", substr($0, length($0), 1)*3, " ")
18+
#
19+
# 4. Find head's text and insert it inside "* [ ... ]":
20+
# substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
21+
#
22+
# 5. Find anchor and insert it inside "(...)":
23+
# substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8)
24+
#
25+
26+
gh_toc_version="0.7.0"
27+
28+
gh_user_agent="gh-md-toc v$gh_toc_version"
29+
30+
#
31+
# Download rendered into html README.md by its url.
32+
#
33+
#
34+
gh_toc_load() {
35+
local gh_url=$1
36+
37+
if type curl &>/dev/null; then
38+
curl --user-agent "$gh_user_agent" -s "$gh_url"
39+
elif type wget &>/dev/null; then
40+
wget --user-agent="$gh_user_agent" -qO- "$gh_url"
41+
else
42+
echo "Please, install 'curl' or 'wget' and try again."
43+
exit 1
44+
fi
45+
}
46+
47+
#
48+
# Converts local md file into html by GitHub
49+
#
50+
# -> curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown
51+
# <p>Hello world github/linguist#1 <strong>cool</strong>, and #1!</p>'"
52+
gh_toc_md2html() {
53+
local gh_file_md=$1
54+
URL=https://api.github.com/markdown/raw
55+
56+
if [ ! -z "$GH_TOC_TOKEN" ]; then
57+
TOKEN=$GH_TOC_TOKEN
58+
else
59+
TOKEN_FILE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/token.txt"
60+
if [ -f "$TOKEN_FILE" ]; then
61+
TOKEN="$(cat $TOKEN_FILE)"
62+
fi
63+
fi
64+
if [ ! -z "${TOKEN}" ]; then
65+
AUTHORIZATION="Authorization: token ${TOKEN}"
66+
fi
67+
68+
# echo $URL 1>&2
69+
OUTPUT=$(curl -s \
70+
--user-agent "$gh_user_agent" \
71+
--data-binary @"$gh_file_md" \
72+
-H "Content-Type:text/plain" \
73+
-H "$AUTHORIZATION" \
74+
"$URL")
75+
76+
if [ "$?" != "0" ]; then
77+
echo "XXNetworkErrorXX"
78+
fi
79+
if [ "$(echo "${OUTPUT}" | awk '/API rate limit exceeded/')" != "" ]; then
80+
echo "XXRateLimitXX"
81+
else
82+
echo "${OUTPUT}"
83+
fi
84+
}
85+
86+
87+
#
88+
# Is passed string url
89+
#
90+
gh_is_url() {
91+
case $1 in
92+
https* | http*)
93+
echo "yes";;
94+
*)
95+
echo "no";;
96+
esac
97+
}
98+
99+
#
100+
# TOC generator
101+
#
102+
gh_toc(){
103+
local gh_src=$1
104+
local gh_src_copy=$1
105+
local gh_ttl_docs=$2
106+
local need_replace=$3
107+
local no_backup=$4
108+
local no_footer=$5
109+
110+
if [ "$gh_src" = "" ]; then
111+
echo "Please, enter URL or local path for a README.md"
112+
exit 1
113+
fi
114+
115+
116+
# Show "TOC" string only if working with one document
117+
if [ "$gh_ttl_docs" = "1" ]; then
118+
119+
echo "Table of Contents"
120+
echo "================="
121+
echo ""
122+
gh_src_copy=""
123+
124+
fi
125+
126+
if [ "$(gh_is_url "$gh_src")" == "yes" ]; then
127+
gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy"
128+
if [ "${PIPESTATUS[0]}" != "0" ]; then
129+
echo "Could not load remote document."
130+
echo "Please check your url or network connectivity"
131+
exit 1
132+
fi
133+
if [ "$need_replace" = "yes" ]; then
134+
echo
135+
echo "!! '$gh_src' is not a local file"
136+
echo "!! Can't insert the TOC into it."
137+
echo
138+
fi
139+
else
140+
local rawhtml=$(gh_toc_md2html "$gh_src")
141+
if [ "$rawhtml" == "XXNetworkErrorXX" ]; then
142+
echo "Parsing local markdown file requires access to github API"
143+
echo "Please make sure curl is installed and check your network connectivity"
144+
exit 1
145+
fi
146+
if [ "$rawhtml" == "XXRateLimitXX" ]; then
147+
echo "Parsing local markdown file requires access to github API"
148+
echo "Error: You exceeded the hourly limit. See: https://developer.github.com/v3/#rate-limiting"
149+
TOKEN_FILE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/token.txt"
150+
echo "or place GitHub auth token here: ${TOKEN_FILE}"
151+
exit 1
152+
fi
153+
local toc=`echo "$rawhtml" | gh_toc_grab "$gh_src_copy"`
154+
echo "$toc"
155+
if [ "$need_replace" = "yes" ]; then
156+
if grep -Fxq "<!--ts-->" $gh_src && grep -Fxq "<!--te-->" $gh_src; then
157+
echo "Found markers"
158+
else
159+
echo "You don't have <!--ts--> or <!--te--> in your file...exiting"
160+
exit 1
161+
fi
162+
local ts="<\!--ts-->"
163+
local te="<\!--te-->"
164+
local dt=`date +'%F_%H%M%S'`
165+
local ext=".orig.${dt}"
166+
local toc_path="${gh_src}.toc.${dt}"
167+
local toc_footer="<!-- Added by: `whoami`, at: `date` -->"
168+
# http://fahdshariff.blogspot.ru/2012/12/sed-mutli-line-replacement-between-two.html
169+
# clear old TOC
170+
sed -i${ext} "/${ts}/,/${te}/{//!d;}" "$gh_src"
171+
# create toc file
172+
echo "${toc}" > "${toc_path}"
173+
if [ "${no_footer}" != "yes" ]; then
174+
echo -e "\n${toc_footer}\n" >> "$toc_path"
175+
fi
176+
177+
# insert toc file
178+
if [[ "`uname`" == "Darwin" ]]; then
179+
sed -i "" "/${ts}/r ${toc_path}" "$gh_src"
180+
else
181+
sed -i "/${ts}/r ${toc_path}" "$gh_src"
182+
fi
183+
echo
184+
if [ "${no_backup}" = "yes" ]; then
185+
rm ${toc_path} ${gh_src}${ext}
186+
fi
187+
echo "!! TOC was added into: '$gh_src'"
188+
if [ -z "${no_backup}" ]; then
189+
echo "!! Origin version of the file: '${gh_src}${ext}'"
190+
echo "!! TOC added into a separate file: '${toc_path}'"
191+
fi
192+
echo
193+
fi
194+
fi
195+
}
196+
197+
#
198+
# Grabber of the TOC from rendered html
199+
#
200+
# $1 - a source url of document.
201+
# It's need if TOC is generated for multiple documents.
202+
#
203+
gh_toc_grab() {
204+
common_awk_script='
205+
modified_href = ""
206+
split(href, chars, "")
207+
for (i=1;i <= length(href); i++) {
208+
c = chars[i]
209+
res = ""
210+
if (c == "+") {
211+
res = " "
212+
} else {
213+
if (c == "%") {
214+
res = "\\x"
215+
} else {
216+
res = c ""
217+
}
218+
}
219+
modified_href = modified_href res
220+
}
221+
print sprintf("%*s", (level-1)*3, "") "* [" text "](" gh_url modified_href ")"
222+
'
223+
if [ `uname -s` == "OS/390" ]; then
224+
grepcmd="pcregrep -o"
225+
echoargs=""
226+
awkscript='{
227+
level = substr($0, length($0), 1)
228+
text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
229+
href = substr($0, match($0, "href=\"([^\"]+)?\"")+6, RLENGTH-7)
230+
'"$common_awk_script"'
231+
}'
232+
else
233+
grepcmd="grep -Eo"
234+
echoargs="-e"
235+
awkscript='{
236+
level = substr($0, length($0), 1)
237+
text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
238+
href = substr($0, match($0, "href=\"[^\"]+?\"")+6, RLENGTH-7)
239+
'"$common_awk_script"'
240+
}'
241+
fi
242+
href_regex='href=\"[^\"]+?\"'
243+
244+
# if closed <h[1-6]> is on the new line, then move it on the prev line
245+
# for example:
246+
# was: The command <code>foo1</code>
247+
# </h1>
248+
# became: The command <code>foo1</code></h1>
249+
sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' |
250+
251+
# find strings that corresponds to template
252+
$grepcmd '<a.*id="user-content-[^"]*".*</h[1-6]' |
253+
254+
# remove code tags
255+
sed 's/<code>//g' | sed 's/<\/code>//g' |
256+
257+
# remove g-emoji
258+
sed 's/<g-emoji[^>]*[^<]*<\/g-emoji> //g' |
259+
260+
# now all rows are like:
261+
# <a id="user-content-..." href="..."><span ...></span></a> ... </h1
262+
# format result line
263+
# * $0 - whole string
264+
# * last element of each row: "</hN" where N in (1,2,3,...)
265+
echo $echoargs "$(awk -v "gh_url=$1" "$awkscript")"
266+
}
267+
268+
# perl -lpE 's/(\[[^\]]*\]\()(.*?)(\))/my ($pre, $in, $post)=($1, $2, $3) ; $in =~ s{\+}{ }g; $in =~ s{%}{\\x}g; $pre.$in.$post/ems')"
269+
270+
#
271+
# Returns filename only from full path or url
272+
#
273+
gh_toc_get_filename() {
274+
echo "${1##*/}"
275+
}
276+
277+
#
278+
# Options handlers
279+
#
280+
gh_toc_app() {
281+
local need_replace="no"
282+
283+
if [ "$1" = '--help' ] || [ $# -eq 0 ] ; then
284+
local app_name=$(basename "$0")
285+
echo "GitHub TOC generator ($app_name): $gh_toc_version"
286+
echo ""
287+
echo "Usage:"
288+
echo " $app_name [--insert] [--hide-footer] src [src] Create TOC for a README file (url or local path)"
289+
echo " $app_name [--no-backup] [--hide-footer] src [src] Create TOC without backup, requires <!--ts--> / <!--te--> placeholders"
290+
echo " $app_name - Create TOC for markdown from STDIN"
291+
echo " $app_name --help Show help"
292+
echo " $app_name --version Show version"
293+
return
294+
fi
295+
296+
if [ "$1" = '--version' ]; then
297+
echo "$gh_toc_version"
298+
echo
299+
echo "os: `lsb_release -d | cut -f 2`"
300+
echo "kernel: `cat /proc/version`"
301+
echo "shell: `$SHELL --version`"
302+
echo
303+
for tool in curl wget grep awk sed; do
304+
printf "%-5s: " $tool
305+
echo `$tool --version | head -n 1`
306+
done
307+
return
308+
fi
309+
310+
if [ "$1" = "-" ]; then
311+
if [ -z "$TMPDIR" ]; then
312+
TMPDIR="/tmp"
313+
elif [ -n "$TMPDIR" -a ! -d "$TMPDIR" ]; then
314+
mkdir -p "$TMPDIR"
315+
fi
316+
local gh_tmp_md
317+
if [ `uname -s` == "OS/390" ]; then
318+
local timestamp=$(date +%m%d%Y%H%M%S)
319+
gh_tmp_md="$TMPDIR/tmp.$timestamp"
320+
else
321+
gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX)
322+
fi
323+
while read input; do
324+
echo "$input" >> "$gh_tmp_md"
325+
done
326+
gh_toc_md2html "$gh_tmp_md" | gh_toc_grab ""
327+
return
328+
fi
329+
330+
if [ "$1" = '--insert' ]; then
331+
need_replace="yes"
332+
shift
333+
fi
334+
335+
if [ "$1" = '--no-backup' ]; then
336+
need_replace="yes"
337+
no_backup="yes"
338+
shift
339+
fi
340+
341+
if [ "$1" = '--hide-footer' ]; then
342+
need_replace="yes"
343+
no_footer="yes"
344+
shift
345+
fi
346+
347+
for md in "$@"
348+
do
349+
echo ""
350+
gh_toc "$md" "$#" "$need_replace" "$no_backup" "$no_footer"
351+
done
352+
353+
echo ""
354+
echo "Created by [gh-md-toc](https://github.com/ekalinin/github-markdown-toc)"
355+
}
356+
357+
#
358+
# Entry point
359+
#
360+
gh_toc_app "$@"
361+

justfile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ hunter:
7575
ledger:
7676
pip3 install ledger_agent
7777

78+
gh-md-toc:
79+
curl https://raw.githubusercontent.com/\
80+
ekalinin/github-markdown-toc/master/gh-md-toc \
81+
> bin/gh-md-toc
82+
chmod +x bin/gh-md-toc
83+
7884
dia-x2160:
7985
#!/usr/bin/env bash
8086
set -euo pipefail

0 commit comments

Comments
 (0)