Skip to content

Dataset backup

Dataset backup #36

name: Dataset backup
on:
workflow_dispatch:
inputs:
dataset:
description: "Dataset name to backup"
required: false
default: "production"
type: string
schedule:
- cron: "22 3 * * 0" # Every Sunday at 3:22 AM. This aims to run when GitHub Actions are less likely to be busy.
jobs:
backup-dataset:
runs-on: ubuntu-latest
name: Backup dataset
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Get uploaded file name
id: upload_name
run: echo "UPLOAD_NAME=$(TZ=America/New_York date +'%Y-%m-%d_%Hh%Mm%Ss')_${{ secrets.SANITY_PROJECT_ID }}-${{ inputs.dataset || 'production' }}-${{ github.run_id }}" >> "$GITHUB_OUTPUT"
- name: Install dependencies
run: npm install
- name: Export dataset
run: npx sanity dataset export ${{ inputs.dataset || 'production' }} 'backups/${{ steps.upload_name.outputs.UPLOAD_NAME }}/${{ steps.upload_name.outputs.UPLOAD_NAME }}.tar.gz'
env:
SANITY_AUTH_TOKEN: ${{ secrets.SANITY_TOKEN }}
NEXT_PUBLIC_SANITY_PROJECT_ID: ${{ secrets.SANITY_PROJECT_ID }}
NEXT_PUBLIC_SANITY_DATASET: ${{ inputs.dataset || 'production' }}
# Below are not actually used, just to satisfy env validation
NEXT_PUBLIC_VERCEL_ENV: production
NEXT_PUBLIC_POSTHOG_KEY: "phc_x"
- name: Authenticate with Google Cloud
uses: "google-github-actions/auth@v2"
with:
service_account: ${{ secrets.BACKUPS_SA_EMAIL }}
credentials_json: ${{ secrets.BACKUPS_SA_KEY }}
- name: Upload to Google Cloud Storage
uses: "google-github-actions/upload-cloud-storage@v2"
with:
project_id: ${{ secrets.BACKUPS_SA_PROJECT }}
destination: "${{ secrets.BACKUPS_BUCKET }}/sanity-datasets/${{ secrets.SANITY_PROJECT_ID }}/${{ inputs.dataset || 'production' }}"
path: "./backups/${{ steps.upload_name.outputs.UPLOAD_NAME }}/${{ steps.upload_name.outputs.UPLOAD_NAME }}.tar.gz"
parent: false
process_gcloudignore: false