Skip to content

Commit

Permalink
update to new sheet
Browse files Browse the repository at this point in the history
  • Loading branch information
mhpob committed Jan 8, 2025
1 parent 4915f9b commit 6d9a274
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 9 deletions.
11 changes: 6 additions & 5 deletions other_code/scrape_rtwb_to_current.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
library(googlesheets4)
library(rvest)
library(dplyr)

# This brings the Year 3 sheet up-to-date with October 4, 2023

Expand All @@ -26,7 +27,7 @@ n_reviewed <- function(date){

read_html(daily_url) |>
html_element(xpath = '/html/body/table') |>
html_table(na.strings = '')|>
html_table(na.strings = '') |>
dplyr::summarize(date = unique(gsub(' .*', '', `Date/time`)),
n_reviewed = sum(!is.na(Tracks))
)
Expand All @@ -40,17 +41,17 @@ dates <- seq(
### From this date...
as.Date('2024-10-22'),
### To this date...
as.Date('2025-01-08'),
as.Date('2025-01-07'),
by = 'day')
dates <- format(dates, '%Y%m%d')

all_n_reviewed <- lapply(dates,
n_reviewed)
all_n_reviewed <- dplyr::bind_rows(all_n_reviewed)
n_reviewed) |>
bind_rows(all_n_reviewed)

write_sheet(
all_n_reviewed,
# Year 2 URL for "Webscraper_TallyPeriods_year2" (HIDDEN)
# Year 3 URL for "Webscraper_TallyPeriods_year3"
'https://docs.google.com/spreadsheets/d/18zA7XAaZQTDdYxgaVf6GM8Kp-p8Wwa8BL6J2siALsaw/edit#gid=0',
'https://docs.google.com/spreadsheets/d/1M293uj32-a_aSv8jhKsjEnZ7-eD7Rt0S80ggburvBe0',
sheet = 1)
6 changes: 4 additions & 2 deletions scheduled_code/daily_occurrence_scraper.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ whales_html <- read_html(
# Year 2 URL
# 'http://dcs.whoi.edu/mdoc0722/mdoc0722_mdoc.shtml'
# Year 3 URL
# Year 4 URL
'https://dcs.whoi.edu/mdoc2410/mdoc2410_mdoc.shtml'
)

Expand Down Expand Up @@ -93,8 +94,9 @@ gs4_auth(path = Sys.getenv('GDRIVE_PAT'))
# The sheet we are targeting:
occurrence_sheet <-
# Year 2 URL for "Webscraper_Real-time whale occurrence Monthly_year2" (HIDDEN)
# Year 3 URL for "Webscraper_Real-time whale occurrence Monthly_year3"
'https://docs.google.com/spreadsheets/d/1RubEzH8ZIZwxvrg1alknPX95ILDOMocK4v-JSNYNWN0/edit?gid=0#gid=0'
# Year 3 URL for "Webscraper_Real-time whale occurrence Monthly_year3" (HIDDEN)
# Year 4 URL for "Webscraper_Real-time whale occurrence Monthly_year4"
'https://docs.google.com/spreadsheets/d/1RubEzH8ZIZwxvrg1alknPX95ILDOMocK4v-JSNYNWN0'

# Remove previously-scraped sheets (anything containing the text "Scraper - " in
# its name)
Expand Down
5 changes: 3 additions & 2 deletions scheduled_code/scrape_rtwb.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ gs4_auth(path = Sys.getenv('GDRIVE_PAT'))
# Tack the number of reviewed pitch tracks to the bottom of the spreadsheet
sheet_append(
# Year 2 URL for "Webscraper_TallyPeriods_year2" (HIDDEN)
# Year 3 URL for "Webscraper_TallyPeriods_year3"
'https://docs.google.com/spreadsheets/d/18zA7XAaZQTDdYxgaVf6GM8Kp-p8Wwa8BL6J2siALsaw/edit#gid=0',
# Year 3 URL for "Webscraper_TallyPeriods_year3" (HIDDEN)
# Year 4 URL for "Webscraper_TallyPeriods_year4"
'https://docs.google.com/spreadsheets/d/1M293uj32-a_aSv8jhKsjEnZ7-eD7Rt0S80ggburvBe0',
n_reviewed,
sheet = 1
)
Expand Down

0 comments on commit 6d9a274

Please sign in to comment.