Skip to content

Commit

Permalink
v0.4.3: Time Stat Integration (#14)
Browse files Browse the repository at this point in the history
* feat: add time stat integration

* chore: bump version to v0.4.3

* feat: attach endpoint for getting time stats to router

* feat: update login script to account for SP23
  • Loading branch information
ewang2002 authored Feb 17, 2023
1 parent da63d2b commit 4164b80
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 33 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "webreg_scraper"
version = "0.4.2"
version = "0.4.3"
authors = ["Edward Wang"]
edition = "2021"
description = "A scraper and/or API for UC San Diego's WebReg enrollment system."
Expand Down
46 changes: 46 additions & 0 deletions src/api/status_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,52 @@ pub async fn api_get_term_status(
.await
}

/// An endpoint for checking the time stats for a specific term's scrapers.
///
/// # Usage
/// The endpoint should be called like so:
/// ```
/// /<term>
/// ```
pub async fn api_get_timing_stats(
Path(term): Path<String>,
State(s): State<Arc<WrapperState>>,
) -> Response {
info!("Called with path {term}.");

api_get_general(
term.as_str(),
move |term_info| async move {
let num_requests = term_info.tracker.num_requests.load(Ordering::SeqCst);
let time_spent = term_info.tracker.total_time_spent.load(Ordering::SeqCst);
let recent_requests = format!(
"[{}]",
term_info
.tracker
.recent_requests
.lock()
.await
.iter()
.map(|amt| amt.to_string())
.collect::<Vec<_>>()
.join(", ")
);

(
StatusCode::OK,
Json(json!({
"ttl_requests": num_requests,
"ttl_time_ms": time_spent,
"recent_requests": recent_requests
})),
)
.into_response()
},
s,
)
.await
}

/// An endpoint for checking the status of a specific term's scrapers.
///
/// # Usage
Expand Down
5 changes: 4 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ use webweg::reqwest::Client;

#[cfg(feature = "api")]
use {
crate::api::status_api::{api_get_login_script_stats, api_get_term_status},
crate::api::status_api::{
api_get_login_script_stats, api_get_term_status, api_get_timing_stats,
},
crate::api::webreg_api::{api_get_course_info, api_get_prereqs, api_get_search_courses},
axum::routing::get,
axum::Router,
Expand Down Expand Up @@ -109,6 +111,7 @@ async fn main() -> ExitCode {
"/scraper/login_script/:term/:stat_type",
get(api_get_login_script_stats),
)
.route("/scraper/timing_stats/:term", get(api_get_timing_stats))
.with_state(state.clone());

let server = axum::Server::bind(
Expand Down
62 changes: 36 additions & 26 deletions src/tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use std::sync::Arc;
use std::time::Duration;

use serde_json::Value;
use tokio::sync::Mutex;
use webweg::wrapper::{SearchType, WebRegWrapper};
use tokio::time::Instant;
use webweg::wrapper::SearchType;

#[cfg(feature = "scraper")]
use {
Expand All @@ -20,6 +20,7 @@ use {
use crate::types::TermInfo;
use crate::util::get_pretty_time;

const MAX_RECENT_REQUESTS: usize = 100;
const CLEANED_CSV_HEADER: &str = "time,enrolled,available,waitlisted,total";

#[cfg(debug_assertions)]
Expand Down Expand Up @@ -69,13 +70,7 @@ pub async fn run_tracker(wrapper_info: Arc<TermInfo>, stop_flag: Arc<AtomicBool>
let mut first_passed = false;
loop {
wrapper_info.is_running.store(true, Ordering::SeqCst);
track_webreg_enrollment(
&wrapper_info.scraper_wrapper,
&wrapper_info,
&stop_flag,
verbose,
)
.await;
track_webreg_enrollment(&wrapper_info, &stop_flag, verbose).await;
wrapper_info.is_running.store(false, Ordering::SeqCst);

if stop_flag.load(Ordering::SeqCst) {
Expand Down Expand Up @@ -173,18 +168,13 @@ pub async fn run_tracker(wrapper_info: Arc<TermInfo>, stop_flag: Arc<AtomicBool>
/// basic course information and store this in a CSV file for later processing.
///
/// # Parameters
/// - `wrapper`: The wrapper.
/// - `setting`: The settings for this term.
/// - `end_location`: The end location for the cleaned CSV files. Just the base location will
/// suffice.
pub async fn track_webreg_enrollment(
wrapper: &Mutex<WebRegWrapper>,
info: &TermInfo,
stop_flag: &Arc<AtomicBool>,
verbose: bool,
) {
/// - `info`: The term information.
/// - `stop_flag`: The stop flag. This is essentially a global flag that indicates if the scraper
/// should stop running.
/// - `verbose`: Whether logging should be verbose.
pub async fn track_webreg_enrollment(info: &TermInfo, stop_flag: &Arc<AtomicBool>, verbose: bool) {
// If the wrapper doesn't have a valid cookie, then return.
if !wrapper.lock().await.is_valid().await {
if !info.scraper_wrapper.lock().await.is_valid().await {
eprintln!(
"[{}] [{}] Initial instance is not valid. Returning.",
info.term,
Expand Down Expand Up @@ -227,7 +217,7 @@ pub async fn track_webreg_enrollment(
writer.flush().unwrap();
let results = {
let mut r = vec![];
let w = wrapper.lock().await;
let w = info.scraper_wrapper.lock().await;
for search_query in &info.search_query {
let mut temp = w
.search_courses(SearchType::Advanced(search_query))
Expand Down Expand Up @@ -279,11 +269,14 @@ pub async fn track_webreg_enrollment(
break 'main;
}

let w = wrapper.lock().await;
let res = w
.get_enrollment_count(r.subj_code.trim(), r.course_code.trim())
.await;
drop(w);
// Start timing.
let start_time = Instant::now();

let res = {
let w = info.scraper_wrapper.lock().await;
w.get_enrollment_count(r.subj_code.trim(), r.course_code.trim())
.await
};

match res {
Err(e) => {
Expand Down Expand Up @@ -353,11 +346,28 @@ pub async fn track_webreg_enrollment(
}
}

// Record time spent on request.
let end_time = start_time.elapsed();
info.tracker.num_requests.fetch_add(1, Ordering::SeqCst);
let time_spent = end_time.as_millis() as usize;
info.tracker
.total_time_spent
.fetch_add(time_spent, Ordering::SeqCst);

// Add the most recent request to the deque, removing the oldest if necessary.
let mut recent_requests = info.tracker.recent_requests.lock().await;
while recent_requests.len() >= MAX_RECENT_REQUESTS {
recent_requests.pop_front();
}

recent_requests.push_back(time_spent);

// Sleep between requests so we don't get ourselves banned by webreg
tokio::time::sleep(Duration::from_secs_f64(info.cooldown)).await;
}
}

// Out of loop, this should run only if we need to exit the scraper (e.g., need to log back in)
#[cfg(feature = "scraper")]
{
if !writer.buffer().is_empty() {
Expand Down
22 changes: 19 additions & 3 deletions src/types.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::collections::HashMap;
use std::sync::atomic::AtomicBool;
use std::collections::{HashMap, VecDeque};
use std::sync::atomic::{AtomicBool, AtomicUsize};
use std::sync::Arc;

use serde::{Deserialize, Serialize};
Expand All @@ -8,7 +8,6 @@ use webweg::reqwest::Client;
use webweg::wrapper::{CourseLevelFilter, SearchRequestBuilder, WebRegWrapper};

/// A structure that represents the current state of all wrappers.
#[derive(Clone)]
pub struct WrapperState {
/// A map containing all active scrapers, grouped by term.
pub all_wrappers: WrapperMap,
Expand All @@ -20,6 +19,16 @@ pub struct WrapperState {

pub type WrapperMap = HashMap<String, Arc<TermInfo>>;

/// A structure that holds basic stats about the tracker.
pub struct StatTracker {
/// The amount of time it took for the 100 most requests to finish processing.
pub recent_requests: Mutex<VecDeque<usize>>,
/// The number of requests that have been made thus far.
pub num_requests: AtomicUsize,
/// The total amount of time spent making those requests, in milliseconds.
pub total_time_spent: AtomicUsize,
}

/// A structure that holds information relating to the scraper and, more importantly, the
/// scraper instances themselves.
pub struct TermInfo {
Expand All @@ -41,6 +50,8 @@ pub struct TermInfo {
pub general_wrapper: Mutex<WebRegWrapper>,
/// Whether the scrapers are running.
pub is_running: AtomicBool,
/// Tracker stats. This field contains information on the performance of the scrapers.
pub tracker: StatTracker,
}

impl From<&ConfigTermDatum> for TermInfo {
Expand All @@ -55,6 +66,11 @@ impl From<&ConfigTermDatum> for TermInfo {
scraper_wrapper: Mutex::new(WebRegWrapper::new(Client::new(), "", value.term.as_str())),
general_wrapper: Mutex::new(WebRegWrapper::new(Client::new(), "", value.term.as_str())),
is_running: AtomicBool::new(false),
tracker: StatTracker {
recent_requests: Default::default(),
num_requests: Default::default(),
total_time_spent: Default::default(),
},
};

if cfg!(feature = "scraper") {
Expand Down
3 changes: 2 additions & 1 deletion webregautoin/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ const DEBUG_MODE: boolean = false;
// <option value="THIS">Some Quarter</option>
// ----
const ALL_TERMS: readonly string[] = [
"5260:::WI23"
"5260:::WI23",
"5270:::SP23"
];

const NUM_ATTEMPTS_BEFORE_EXIT: number = 6;
Expand Down

0 comments on commit 4164b80

Please sign in to comment.