Skip to content

scrape

scrape #65

Workflow file for this run

name: scrape
on:
workflow_dispatch:
schedule:
- cron: "15 2 * * *"
jobs:
scrape:
runs-on: ubuntu-latest
steps:
- name: Check out this repo
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.10'
- name: Installed package list
run: apt list --installed
- name: Remove Chrome
run: sudo apt purge google-chrome-stable
- name: Remove default Chromium
run: sudo apt purge chromium-browser
- name: Install a new Chromium
run: sudo apt install -y chromium-browser
- name: Install all necessary packages
run: pip install requests beautifulsoup4 pandas selenium titlecase googlemaps webdriver-manager
- name: Run the scraping script
env:
LOCATIONS_FILEPATH: ${{ secrets.LOCATIONS_FILEPATH }}
GMAPS_CACHE_FILEPATH: ${{ secrets.GMAPS_CACHE_FILEPATH }}
GMAPS_API_KEY: ${{ secrets.GMAPS_API_KEY }}
run: python backend/main.py
- name: Set up git config
run: |-
git config --local user.name "github-actions[bot]"
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git fetch
- name: Commit changes
run: |-
git add .
timestamp=$(date -u)
git commit -m "Latest data: ${timestamp}"
- name: Push to main
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: main
- name: Checkout to pages branch
run: |-
git checkout pages
git checkout main -- locations.json index.html
git add .
git commit -m "sync"
- name: Push to pages
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
branch: pages