Skip to content

Commit 1cf0181

Browse files
author
claromes
authored
Merge pull request #16 from claromes/clean-urls
Delete sub-endpoints for "original url"
2 parents d334814 + 63b54f8 commit 1cf0181

File tree

2 files changed

+34
-14
lines changed

2 files changed

+34
-14
lines changed

app.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -109,18 +109,33 @@ def clean_link(link):
109109
else:
110110
return link
111111

112-
def pattern_tweet(url):
112+
def pattern_tweet(tweet):
113113
# Reply: /status//
114114
# Link: /status///
115115
# Twimg: /status/https://pbs
116116

117117
pattern = re.compile(r'/status/"([^"]+)"')
118118

119-
match = pattern.search(url)
119+
match = pattern.search(tweet)
120120
if match:
121121
return match.group(1).lstrip('/')
122122
else:
123-
return url
123+
return tweet
124+
125+
def pattern_tweet_id(tweet):
126+
# Delete sub-endpoint (/photos, /likes, /retweet...)
127+
pattern_username = re.compile(r'https://twitter\.com/([^/]+)/status/\d+')
128+
match_username = pattern_username.match(tweet)
129+
130+
pattern_id = r'https://twitter.com/\w+/status/(\d+)'
131+
match_id = re.search(pattern_id, tweet)
132+
133+
if match_id and match_username:
134+
tweet_id = match_id.group(1)
135+
username = match_username.group(1)
136+
return f'https://twitter.com/{username}/status/{tweet_id}'
137+
else:
138+
return tweet
124139

125140
def check_double_status(url_wb, url_tweet):
126141
if url_wb.count('/status/') == 2 and not 'twitter.com' in url_tweet:
@@ -247,14 +262,14 @@ def parse_links(links):
247262
return parsed_links, tweet_links, parsed_mimetype, timestamp
248263

249264
def attr(i):
250-
original_tweet = clean_tweet(tweet_links[i])
265+
original_tweet = pattern_tweet_id(clean_tweet(tweet_links[i]))
251266

252267
if status:
253-
original_tweet = f'https://twitter.com/{tweet_links[i]}'
268+
original_tweet = pattern_tweet_id(f'https://twitter.com/{tweet_links[i]}')
254269
elif not '://' in tweet_links[i]:
255-
original_tweet = f'https://{tweet_links[i]}'
270+
original_tweet = pattern_tweet_id(f'https://{tweet_links[i]}')
256271

257-
st.markdown(f'{i+1 + st.session_state.offset}. [**archive.org**]({link}) · [**original url**]({original_tweet}) · **MIME Type:** {mimetype[i]} · **Saved at:** {datetime.datetime.strptime(timestamp[i], "%Y%m%d%H%M%S")}')
272+
st.markdown(f'{i+1 + st.session_state.offset}. [**archived url**]({link}) · [**original url**]({original_tweet}) · **MIME Type:** {mimetype[i]} · **Saved at:** {datetime.datetime.strptime(timestamp[i], "%Y%m%d%H%M%S")}')
258273

259274
def display_tweet():
260275
if mimetype[i] == 'application/json' or mimetype[i] == 'text/html' or mimetype[i] == 'unk' or mimetype[i] == 'warc/revisit':
@@ -270,22 +285,22 @@ def display_tweet():
270285
st.divider()
271286

272287
def display_not_tweet():
273-
original_link = clean_tweet(tweet_links[i])
288+
original_link = pattern_tweet_id(clean_tweet(tweet_links[i]))
274289

275290
if status:
276-
original_link = f'https://twitter.com/{tweet_links[i]}'
291+
original_link = pattern_tweet_id(f'https://twitter.com/{tweet_links[i]}')
277292
elif not '://' in tweet_links[i]:
278-
original_link = f'https://{tweet_links[i]}'
293+
original_link = pattern_tweet_id(f'https://{tweet_links[i]}')
279294

280295
response_html = requests.get(original_link)
281296

282297
if mimetype[i] == 'text/html' or mimetype[i] == 'warc/revisit' or mimetype[i] == 'unk':
283298
if ('.jpg' in tweet_links[i] or '.png' in tweet_links[i]) and response_html.status_code == 200:
284299
components.iframe(tweet_links[i], height=500, scrolling=True)
285-
elif status:
300+
elif '/status/' not in original_link or response_html.status_code != 200:
301+
st.info("This isn't a status or is not available")
302+
elif status or f'{st.session_state.current_handle}' not in original_link:
286303
st.info(f'Replying to {st.session_state.current_handle}')
287-
elif '/status/' not in original_link:
288-
st.info('Original link is not a tweet')
289304
else:
290305
components.iframe(clean_link(link), height=500, scrolling=True)
291306

docs/CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
# Changelog
22

3-
## [v0.4.1](https://github.com/claromes/waybacktweets/releases/tag/v0.4) - 2023-12-13
3+
## [v0.4.2](https://github.com/claromes/waybacktweets/releases/tag/v0.4.2) - 2023-12-13
4+
- Add:
5+
- Parse tweet URLs to delete `/photos`, `/likes`, `/retweets` and other sub-endpoints
6+
- Only for "original url"
7+
8+
## [v0.4.1](https://github.com/claromes/waybacktweets/releases/tag/v0.4.1) - 2023-12-13
49
- Add:
510
- Warning message for non 200/300 status code
611
- Update:

0 commit comments

Comments
 (0)