Skip to content

Commit

Permalink
Build: Enable typesense scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
Krinkle committed Oct 11, 2023
1 parent 9cd5ba8 commit c80fa31
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 0 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/typesense.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: typesense
on:
# Once a day at 11:30 UTC <https://crontab.guru/>
schedule:
- cron: '30 11 * * *'
# Or after a deployment
push:
branches:
- main
- add-typesense
# Or manually
workflow_dispatch:

jobs:
typesense:
name: Update Typesense
if: ${{ github.repository_owner == 'jquery' }} # skip on forks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Docsearch Scraper
shell: bash
run: |
docker run \
-e TYPESENSE_API_KEY=${{ secrets.TYPESENSE_ADMIN_KEY }} \
-e TYPESENSE_HOST="${{ secrets.TYPESENSE_HOST }}" \
-e TYPESENSE_PORT="443" \
-e TYPESENSE_PROTOCOL="https" \
-e CONFIG="$(cat docsearch.config.json | jq -r tostring)" \
typesense/docsearch-scraper:0.8.0
57 changes: 57 additions & 0 deletions docsearch.config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"index_name": "jqueryui_com",
"start_urls": [
{ "url": "https://api.jqueryui.com", "selectors_key": "api", "page_rank": 20 },
{ "url": "https://jqueryui.com", "page_rank": 10 }
],
"// stop_urls": [
"// Exclude URLs containing '?' such as /themeroller/?..."
],
"stop_urls": [
"\\?",
".com/category/",
".com/resources/",
"api.jqueryui.com\\/\\d\\."
],
"selectors": {
"default": {
"lvl0": {
"selector": "#menu-top .menu-item.current > a",
"global": true,
"default_value": "Documentation"
},
"lvl1": "#content h1",
"lvl2": "#content h2",
"lvl3": "#content h3",
"lvl4": "#content h4",
"lvl5": "#content h5",
"text": "#content p, #content li, #content tr"
},
"api": {
"lvl0": {
"selector": "#categories .cat-item.current-cat > a",
"global": true,
"default_value": "API"
},
"lvl1": "#content h1",
"lvl2": "#content h2, #content h4.name",
"lvl3": "#content h3, #content h4:not(.name)",
"lvl4": "#content h5, #content strong:first-child",
"text": ".entry-content p, .entry-content li"
}
},
"custom_settings": {
"token_separators": ["_", "-", "."]
},
"selectors_exclude": [
"header ~ article",
".returns",
".version-details",
".section-title",
".icon-link.toc-link",
"[class^=toclevel]",
"#toctitle"
],
"min_indexed_level": 2,
"scrape_start_urls": false
}

0 comments on commit c80fa31

Please sign in to comment.