-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpinboard-to-kindle.recipe
195 lines (164 loc) · 6.36 KB
/
pinboard-to-kindle.recipe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/env python
# To test this recipe run:
# echo "PINBOARD_TOKEN=..." > config.env
# eval $(egrep -v "^#" config.env | xargs) ebook-convert pinboard-to-kindle.recipe pinboard.epub
__license__ = "MIT"
__copyright__ = "2020, Christian Hans"
__website__ = "https://github.com/christianhans/pinboard-to-kindle"
#
# Config
#
## Maximum number of unread Pinboard bookmarks to fetch
MAX_ARTICLES = 15
## If set, the recipe will only consider unread Pinboard bookmarks that have this tag
KINDLE_TO_TAG = "kindle-to"
## If set, the recipe will update all fetched Pinboard bookmarks and
## replace KINDLE_TO_TAG with this tag
KINDLE_SENT_TAG = "kindle-sent"
## Relative file path to fetch-article-moz-readability/index.js
FETCH_ARTICLE_MOZ_READABILITY_RELATIVE_PATH = "fetch-article-moz-readability/index.js"
## Temporary folder to use for downloading bookmark's page content
FETCH_ARTICLE_TMP_PATH = "/tmp"
## Node binary to use
NODE_BIN = "node"
import os
import sys
import re
import json
import subprocess
import uuid
from urllib.parse import urlencode
from urllib.request import urlopen
class PinboardRecipe(BasicNewsRecipe):
title = "Pinboard"
description = "Generate an ebook of unread Pinboard bookmarks."
__author__ = "Christian Hans"
auto_cleanup = False
no_stylesheets = True
remove_javascript = True
articles_are_obfuscated = True
encoding = "utf8"
downloaded_file_paths = {}
def _get_pinboard_token(self):
pinboard_token = os.environ.get("PINBOARD_TOKEN")
if not pinboard_token:
self.abort_recipe_processing(
"Please set PINBOARD_TOKEN environment variable."
)
return pinboard_token
def _get_bookmarks(self):
params = urlencode(
{
"tag": KINDLE_TO_TAG if KINDLE_TO_TAG else "",
"format": "json",
"auth_token": self._get_pinboard_token(),
}
)
response = urlopen("https://api.pinboard.in/v1/posts/all?" + params)
bookmarks = json.loads(response.read().decode('utf-8-sig'))
bookmarks = [b for b in bookmarks if (b["toread"] == "yes")]
if len(bookmarks) == 0:
self.abort_recipe_processing("No unread Pinboard bookmarks.")
return bookmarks
def _mark_bookmark_as_sent(self, bookmark):
if (not KINDLE_TO_TAG) or (not KINDLE_SENT_TAG):
return
print("Updating Pinboard tags: {}".format(bookmark["href"]))
tags = bookmark["tags"].split()
if KINDLE_TO_TAG in tags:
tags.remove(KINDLE_TO_TAG)
if not KINDLE_SENT_TAG in tags:
tags.append(KINDLE_SENT_TAG)
params = urlencode(
{
"url": bookmark["href"],
"description": bookmark["description"],
"extended": bookmark["extended"],
"tags": " ".join(tags),
"dt": bookmark["time"],
"shared": bookmark["shared"],
"toread": bookmark["toread"],
"replace": "yes",
"format": "json",
"auth_token": self._get_pinboard_token(),
}
)
urlopen("https://api.pinboard.in/v1/posts/add?" + params)
def _fetch_article_moz_readability(self, url):
downloaded_file_path = os.path.join(
FETCH_ARTICLE_TMP_PATH, "{}.html".format(uuid.uuid1())
)
self.downloaded_file_paths[url] = downloaded_file_path
print("Downloading: {}".format(url))
subprocess.call(
[
NODE_BIN,
"--unhandled-rejections=strict",
self._get_fetch_article_moz_readability_script_path(),
url,
"--output_file={}".format(downloaded_file_path),
]
)
return downloaded_file_path
def _get_fetch_article_moz_readability_script_path(self):
if len(sys.argv) < 3 or not "ebook-convert" in sys.argv[0]:
self.abort_recipe_processing(
"Please invoke this Calibre recipe via: "
"ebook-convert /path/to/pinboard-to-kindle.recipe output_file [options]"
)
if not "PWD" in os.environ:
self.abort_recipe_processing("PWD environment variable not set.")
recipe_base_path = os.path.normpath(
os.path.join(os.environ["PWD"], os.path.dirname(sys.argv[1]))
)
return os.path.join(recipe_base_path, FETCH_ARTICLE_MOZ_READABILITY_RELATIVE_PATH)
def _get_article_metadata(self, article_file_path):
res = {}
with open(article_file_path, "r") as f:
html = f.read()
match = re.search(
'<[^>]+ class="pb-to-kindle-article-title">(.*?)</[^>]+>', html, re.IGNORECASE
)
if match:
res["title"] = match.group(1)
match = re.search(
'<[^>]+ class="pb-to-kindle-article-metadata">(.*?)</[^>]+>',
html,
re.IGNORECASE,
)
if match:
res["metadata"] = match.group(1)
return res
def get_obfuscated_article(self, url):
""" Let Calibre download images and other media in downloaded html files """
return self.downloaded_file_paths.get(url)
def parse_index(self):
articles = []
bookmarks = self._get_bookmarks()
for bookmark in bookmarks:
if len(articles) >= MAX_ARTICLES:
break
url = bookmark["href"]
try:
article_file_path = self._fetch_article_moz_readability(url)
article_info = self._get_article_metadata(article_file_path)
except:
print("Error fetching URL: {}".format(url))
continue
self._mark_bookmark_as_sent(bookmark)
articles.append(
{
"title": article_info.get("title", ""),
"url": url,
"description": article_info.get("metadata", ""),
}
)
if len(articles) == 0:
self.abort_recipe_processing("No articles were fetched.")
return [("Pinboard", articles)]
def cleanup(self):
for file_path in self.downloaded_file_paths.values():
if not os.path.exists(file_path):
continue
print("Removing temporary file: {}".format(file_path))
os.remove(file_path)