v0.4.3: Time Stat Integration (#14)

* feat: add time stat integration * chore: bump version to v0.4.3 * feat: attach endpoint for getting time stats to router * feat: update login script to account for SP23
UCSD-Historical-Enrollment-Data · Feb 17, 2023 · 4164b80 · 4164b80
1 parent da63d2b
commit 4164b80
Show file tree

Hide file tree

Showing 7 changed files with 109 additions and 33 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "webreg_scraper"
-version = "0.4.2"
+version = "0.4.3"
 authors = ["Edward Wang"]
 edition = "2021"
 description = "A scraper and/or API for UC San Diego's WebReg enrollment system."

diff --git a/src/api/status_api.rs b/src/api/status_api.rs
@@ -37,6 +37,52 @@ pub async fn api_get_term_status(
     .await
 }
 
+/// An endpoint for checking the time stats for a specific term's scrapers.
+///
+/// # Usage
+/// The endpoint should be called like so:
+/// ```
+/// /<term>
+/// ```
+pub async fn api_get_timing_stats(
+    Path(term): Path<String>,
+    State(s): State<Arc<WrapperState>>,
+) -> Response {
+    info!("Called with path {term}.");
+
+    api_get_general(
+        term.as_str(),
+        move |term_info| async move {
+            let num_requests = term_info.tracker.num_requests.load(Ordering::SeqCst);
+            let time_spent = term_info.tracker.total_time_spent.load(Ordering::SeqCst);
+            let recent_requests = format!(
+                "[{}]",
+                term_info
+                    .tracker
+                    .recent_requests
+                    .lock()
+                    .await
+                    .iter()
+                    .map(|amt| amt.to_string())
+                    .collect::<Vec<_>>()
+                    .join(", ")
+            );
+
+            (
+                StatusCode::OK,
+                Json(json!({
+                    "ttl_requests": num_requests,
+                    "ttl_time_ms": time_spent,
+                    "recent_requests": recent_requests
+                })),
+            )
+                .into_response()
+        },
+        s,
+    )
+    .await
+}
+
 /// An endpoint for checking the status of a specific term's scrapers.
 ///
 /// # Usage

diff --git a/src/main.rs b/src/main.rs
@@ -12,7 +12,9 @@ use webweg::reqwest::Client;
 
 #[cfg(feature = "api")]
 use {
-    crate::api::status_api::{api_get_login_script_stats, api_get_term_status},
+    crate::api::status_api::{
+        api_get_login_script_stats, api_get_term_status, api_get_timing_stats,
+    },
     crate::api::webreg_api::{api_get_course_info, api_get_prereqs, api_get_search_courses},
     axum::routing::get,
     axum::Router,
@@ -109,6 +111,7 @@ async fn main() -> ExitCode {
                 "/scraper/login_script/:term/:stat_type",
                 get(api_get_login_script_stats),
             )
+            .route("/scraper/timing_stats/:term", get(api_get_timing_stats))
             .with_state(state.clone());
 
         let server = axum::Server::bind(

diff --git a/src/tracker.rs b/src/tracker.rs
@@ -3,8 +3,8 @@ use std::sync::Arc;
 use std::time::Duration;
 
 use serde_json::Value;
-use tokio::sync::Mutex;
-use webweg::wrapper::{SearchType, WebRegWrapper};
+use tokio::time::Instant;
+use webweg::wrapper::SearchType;
 
 #[cfg(feature = "scraper")]
 use {
@@ -20,6 +20,7 @@ use {
 use crate::types::TermInfo;
 use crate::util::get_pretty_time;
 
+const MAX_RECENT_REQUESTS: usize = 100;
 const CLEANED_CSV_HEADER: &str = "time,enrolled,available,waitlisted,total";
 
 #[cfg(debug_assertions)]
@@ -69,13 +70,7 @@ pub async fn run_tracker(wrapper_info: Arc<TermInfo>, stop_flag: Arc<AtomicBool>
     let mut first_passed = false;
     loop {
         wrapper_info.is_running.store(true, Ordering::SeqCst);
-        track_webreg_enrollment(
-            &wrapper_info.scraper_wrapper,
-            &wrapper_info,
-            &stop_flag,
-            verbose,
-        )
-        .await;
+        track_webreg_enrollment(&wrapper_info, &stop_flag, verbose).await;
         wrapper_info.is_running.store(false, Ordering::SeqCst);
 
         if stop_flag.load(Ordering::SeqCst) {
@@ -173,18 +168,13 @@ pub async fn run_tracker(wrapper_info: Arc<TermInfo>, stop_flag: Arc<AtomicBool>
 /// basic course information and store this in a CSV file for later processing.
 ///
 /// # Parameters
-/// - `wrapper`: The wrapper.
-/// - `setting`: The settings for this term.
-/// - `end_location`: The end location for the cleaned CSV files. Just the base location will
-///   suffice.
-pub async fn track_webreg_enrollment(
-    wrapper: &Mutex<WebRegWrapper>,
-    info: &TermInfo,
-    stop_flag: &Arc<AtomicBool>,
-    verbose: bool,
-) {
+/// - `info`: The term information.
+/// - `stop_flag`: The stop flag. This is essentially a global flag that indicates if the scraper
+/// should stop running.
+/// - `verbose`: Whether logging should be verbose.
+pub async fn track_webreg_enrollment(info: &TermInfo, stop_flag: &Arc<AtomicBool>, verbose: bool) {
     // If the wrapper doesn't have a valid cookie, then return.
-    if !wrapper.lock().await.is_valid().await {
+    if !info.scraper_wrapper.lock().await.is_valid().await {
         eprintln!(
             "[{}] [{}] Initial instance is not valid. Returning.",
             info.term,
@@ -227,7 +217,7 @@ pub async fn track_webreg_enrollment(
         writer.flush().unwrap();
         let results = {
             let mut r = vec![];
-            let w = wrapper.lock().await;
+            let w = info.scraper_wrapper.lock().await;
             for search_query in &info.search_query {
                 let mut temp = w
                     .search_courses(SearchType::Advanced(search_query))
@@ -279,11 +269,14 @@ pub async fn track_webreg_enrollment(
                 break 'main;
             }
 
-            let w = wrapper.lock().await;
-            let res = w
-                .get_enrollment_count(r.subj_code.trim(), r.course_code.trim())
-                .await;
-            drop(w);
+            // Start timing.
+            let start_time = Instant::now();
+
+            let res = {
+                let w = info.scraper_wrapper.lock().await;
+                w.get_enrollment_count(r.subj_code.trim(), r.course_code.trim())
+                    .await
+            };
 
             match res {
                 Err(e) => {
@@ -353,11 +346,28 @@ pub async fn track_webreg_enrollment(
                 }
             }
 
+            // Record time spent on request.
+            let end_time = start_time.elapsed();
+            info.tracker.num_requests.fetch_add(1, Ordering::SeqCst);
+            let time_spent = end_time.as_millis() as usize;
+            info.tracker
+                .total_time_spent
+                .fetch_add(time_spent, Ordering::SeqCst);
+
+            // Add the most recent request to the deque, removing the oldest if necessary.
+            let mut recent_requests = info.tracker.recent_requests.lock().await;
+            while recent_requests.len() >= MAX_RECENT_REQUESTS {
+                recent_requests.pop_front();
+            }
+
+            recent_requests.push_back(time_spent);
+
             // Sleep between requests so we don't get ourselves banned by webreg
             tokio::time::sleep(Duration::from_secs_f64(info.cooldown)).await;
         }
     }
 
+    // Out of loop, this should run only if we need to exit the scraper (e.g., need to log back in)
     #[cfg(feature = "scraper")]
     {
         if !writer.buffer().is_empty() {

diff --git a/src/types.rs b/src/types.rs
@@ -1,5 +1,5 @@
-use std::collections::HashMap;
-use std::sync::atomic::AtomicBool;
+use std::collections::{HashMap, VecDeque};
+use std::sync::atomic::{AtomicBool, AtomicUsize};
 use std::sync::Arc;
 
 use serde::{Deserialize, Serialize};
@@ -8,7 +8,6 @@ use webweg::reqwest::Client;
 use webweg::wrapper::{CourseLevelFilter, SearchRequestBuilder, WebRegWrapper};
 
 /// A structure that represents the current state of all wrappers.
-#[derive(Clone)]
 pub struct WrapperState {
     /// A map containing all active scrapers, grouped by term.
     pub all_wrappers: WrapperMap,
@@ -20,6 +19,16 @@ pub struct WrapperState {
 
 pub type WrapperMap = HashMap<String, Arc<TermInfo>>;
 
+/// A structure that holds basic stats about the tracker.
+pub struct StatTracker {
+    /// The amount of time it took for the 100 most requests to finish processing.
+    pub recent_requests: Mutex<VecDeque<usize>>,
+    /// The number of requests that have been made thus far.
+    pub num_requests: AtomicUsize,
+    /// The total amount of time spent making those requests, in milliseconds.
+    pub total_time_spent: AtomicUsize,
+}
+
 /// A structure that holds information relating to the scraper and, more importantly, the
 /// scraper instances themselves.
 pub struct TermInfo {
@@ -41,6 +50,8 @@ pub struct TermInfo {
     pub general_wrapper: Mutex<WebRegWrapper>,
     /// Whether the scrapers are running.
     pub is_running: AtomicBool,
+    /// Tracker stats. This field contains information on the performance of the scrapers.
+    pub tracker: StatTracker,
 }
 
 impl From<&ConfigTermDatum> for TermInfo {
@@ -55,6 +66,11 @@ impl From<&ConfigTermDatum> for TermInfo {
             scraper_wrapper: Mutex::new(WebRegWrapper::new(Client::new(), "", value.term.as_str())),
             general_wrapper: Mutex::new(WebRegWrapper::new(Client::new(), "", value.term.as_str())),
             is_running: AtomicBool::new(false),
+            tracker: StatTracker {
+                recent_requests: Default::default(),
+                num_requests: Default::default(),
+                total_time_spent: Default::default(),
+            },
         };
 
         if cfg!(feature = "scraper") {

diff --git a/webregautoin/src/index.ts b/webregautoin/src/index.ts
@@ -18,7 +18,8 @@ const DEBUG_MODE: boolean = false;
 // <option value="THIS">Some Quarter</option>
 //                ----
 const ALL_TERMS: readonly string[] = [
-    "5260:::WI23"
+    "5260:::WI23",
+    "5270:::SP23"
 ];
 
 const NUM_ATTEMPTS_BEFORE_EXIT: number = 6;