@@ -109,18 +109,33 @@ def clean_link(link):
109
109
else :
110
110
return link
111
111
112
- def pattern_tweet (url ):
112
+ def pattern_tweet (tweet ):
113
113
# Reply: /status//
114
114
# Link: /status///
115
115
# Twimg: /status/https://pbs
116
116
117
117
pattern = re .compile (r'/status/"([^"]+)"' )
118
118
119
- match = pattern .search (url )
119
+ match = pattern .search (tweet )
120
120
if match :
121
121
return match .group (1 ).lstrip ('/' )
122
122
else :
123
- return url
123
+ return tweet
124
+
125
+ def pattern_tweet_id (tweet ):
126
+ # Delete sub-endpoint (/photos, /likes, /retweet...)
127
+ pattern_username = re .compile (r'https://twitter\.com/([^/]+)/status/\d+' )
128
+ match_username = pattern_username .match (tweet )
129
+
130
+ pattern_id = r'https://twitter.com/\w+/status/(\d+)'
131
+ match_id = re .search (pattern_id , tweet )
132
+
133
+ if match_id and match_username :
134
+ tweet_id = match_id .group (1 )
135
+ username = match_username .group (1 )
136
+ return f'https://twitter.com/{ username } /status/{ tweet_id } '
137
+ else :
138
+ return tweet
124
139
125
140
def check_double_status (url_wb , url_tweet ):
126
141
if url_wb .count ('/status/' ) == 2 and not 'twitter.com' in url_tweet :
@@ -247,14 +262,14 @@ def parse_links(links):
247
262
return parsed_links , tweet_links , parsed_mimetype , timestamp
248
263
249
264
def attr (i ):
250
- original_tweet = clean_tweet (tweet_links [i ])
265
+ original_tweet = pattern_tweet_id ( clean_tweet (tweet_links [i ]) )
251
266
252
267
if status :
253
- original_tweet = f'https://twitter.com/{ tweet_links [i ]} '
268
+ original_tweet = pattern_tweet_id ( f'https://twitter.com/{ tweet_links [i ]} ' )
254
269
elif not '://' in tweet_links [i ]:
255
- original_tweet = f'https://{ tweet_links [i ]} '
270
+ original_tweet = pattern_tweet_id ( f'https://{ tweet_links [i ]} ' )
256
271
257
- st .markdown (f'{ i + 1 + st .session_state .offset } . [**archive.org **]({ link } ) · [**original url**]({ original_tweet } ) · **MIME Type:** { mimetype [i ]} · **Saved at:** { datetime .datetime .strptime (timestamp [i ], "%Y%m%d%H%M%S" )} ' )
272
+ st .markdown (f'{ i + 1 + st .session_state .offset } . [**archived url **]({ link } ) · [**original url**]({ original_tweet } ) · **MIME Type:** { mimetype [i ]} · **Saved at:** { datetime .datetime .strptime (timestamp [i ], "%Y%m%d%H%M%S" )} ' )
258
273
259
274
def display_tweet ():
260
275
if mimetype [i ] == 'application/json' or mimetype [i ] == 'text/html' or mimetype [i ] == 'unk' or mimetype [i ] == 'warc/revisit' :
@@ -270,22 +285,22 @@ def display_tweet():
270
285
st .divider ()
271
286
272
287
def display_not_tweet ():
273
- original_link = clean_tweet (tweet_links [i ])
288
+ original_link = pattern_tweet_id ( clean_tweet (tweet_links [i ]) )
274
289
275
290
if status :
276
- original_link = f'https://twitter.com/{ tweet_links [i ]} '
291
+ original_link = pattern_tweet_id ( f'https://twitter.com/{ tweet_links [i ]} ' )
277
292
elif not '://' in tweet_links [i ]:
278
- original_link = f'https://{ tweet_links [i ]} '
293
+ original_link = pattern_tweet_id ( f'https://{ tweet_links [i ]} ' )
279
294
280
295
response_html = requests .get (original_link )
281
296
282
297
if mimetype [i ] == 'text/html' or mimetype [i ] == 'warc/revisit' or mimetype [i ] == 'unk' :
283
298
if ('.jpg' in tweet_links [i ] or '.png' in tweet_links [i ]) and response_html .status_code == 200 :
284
299
components .iframe (tweet_links [i ], height = 500 , scrolling = True )
285
- elif status :
300
+ elif '/status/' not in original_link or response_html .status_code != 200 :
301
+ st .info ("This isn't a status or is not available" )
302
+ elif status or f'{ st .session_state .current_handle } ' not in original_link :
286
303
st .info (f'Replying to { st .session_state .current_handle } ' )
287
- elif '/status/' not in original_link :
288
- st .info ('Original link is not a tweet' )
289
304
else :
290
305
components .iframe (clean_link (link ), height = 500 , scrolling = True )
291
306
0 commit comments