Skip to content

Commit

Permalink
feat: auto update rss feeds
Browse files Browse the repository at this point in the history
  • Loading branch information
qrzbing committed Mar 20, 2024
1 parent 3d6487b commit 4c0146a
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 0 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/update-rss.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Update RSS Feed

on:
push:
branches:
- master

jobs:
update-rss:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.11

- name: Install dependencies
run: pip install -r requirements.txt

- name: Run update
run: python update.py
env:
MINIFLUX_ENDPOINT: ${{ secrets.MINIFLUX_ENDPOINT }}
MINIFLUX_API_KEY: ${{ secrets.MINIFLUX_API_KEY }}
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
miniflux==1.0.1
mistune==3.0.2
Requests==2.31.0
76 changes: 76 additions & 0 deletions update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import miniflux
import mistune
import os
import pprint
import requests

from mistune.plugins.table import table


markdown = mistune.create_markdown(renderer='ast', plugins=[table])


def format_rss(blogroll_url):
table_line = []
rss_urls = []
checked_rss_urls = []

r = requests.get(blogroll_url)
ast = markdown(r.text)

for elem in ast:
if isinstance(elem, dict) and elem['type'] == 'table':
for tbl_elem in elem['children']:
if tbl_elem['type'] == "table_body":
for line_elem in tbl_elem['children']:
table_line.append(line_elem['children'][2])
for elem in table_line:
raw_data: str = elem['children'][0]['raw']
if raw_data.startswith("http"):
rss_urls.append(raw_data)

for url in rss_urls:
print("getting {}".format(url))
try:
req = requests.get(url)
if req.status_code != 200:
print(f"{url} with status code {req.status_code}")
continue
if req.text.startswith("<?xml"):
checked_rss_urls.append(url)
except (requests.exceptions.SSLError, requests.exceptions.ConnectionError) as e:
print(f"{url} with exception {e}")
continue

return checked_rss_urls


def add_client(valid_rss, client):
for url in valid_rss:
try:
feed_id = client.create_feed(url, category_id=3, crawler=True)
print(feed_id)
except (miniflux.BadRequest, miniflux.ServerError) as e:
print(f"{url} with exception {e}!")
continue


def get_current_status(client: miniflux.Client):
feeds = [(feed['title'], feed['site_url']) for feed in client.get_feeds()]
pprint.pp(feeds)


if __name__ == '__main__':

miniflux_endpoint = os.environ["MINIFLUX_ENDPOINT"]
miniflux_api_key = os.environ["MINIFLUX_API_KEY"]
blogroll_url = "https://raw.githubusercontent.com/NUAA-Open-Source/BlogRoll/master/README.md"

client = miniflux.Client(miniflux_endpoint, api_key=miniflux_api_key)

valid_rss = format_rss(blogroll_url)
add_client(valid_rss, client)
get_current_status(client)

0 comments on commit 4c0146a

Please sign in to comment.