diff --git a/CHANGELOG.md b/CHANGELOG.md index e39614d80..a8c6ac418 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Breaking changes +- AUDITOR: Remove forbidden characters ([@raghuvar-vijay](https://github.com/raghuvar-vijay)) - pyauditor + Apel plugin + HTCondor collector: drop support for Python 3.8 ([@dirksammel](https://github.com/dirksammel)) ### Security +- [RUSTSEC-2024-0421]: Update idna from 0.5.0 to 1.0.3 ([@raghuvar-vijay](https://github.com/raghuvar-vijay)) - [RUSTSEC-2024-0363]: Update sqlx from 0.7.4 to 0.8.2 (missed some occurrences) ([@dirksammel](https://github.com/dirksammel)) - [RUSTSEC-2024-0399]: Update rustls from 0.23.16 to 0.23.19 ([@dirksammel](https://github.com/dirksammel)) - [RUSTSEC-2024-0402]: Update hashbrown from 0.15.0 to 0.15.2 ([@dirksammel](https://github.com/dirksammel)) diff --git a/Cargo.toml b/Cargo.toml index e13b586a9..9d85f05b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,10 @@ members = [ "plugins/priority", ] +exclude = [ + "auditor/scripts/slurm_revert_encoding", + ] + [workspace.dependencies] actix-web = "4.8.0" actix-tls = "3.4.0" diff --git a/auditor/scripts/revert_encoding/.env b/auditor/scripts/revert_encoding/.env new file mode 100644 index 000000000..bc0abf664 --- /dev/null +++ b/auditor/scripts/revert_encoding/.env @@ -0,0 +1,6 @@ +DB_NAME=auditor +DB_USER=postgres +DB_PASSWORD=securepassword +DB_HOST=localhost +DB_PORT=5432 + diff --git a/auditor/scripts/revert_encoding/requirements.txt b/auditor/scripts/revert_encoding/requirements.txt new file mode 100644 index 000000000..a7b648a77 --- /dev/null +++ b/auditor/scripts/revert_encoding/requirements.txt @@ -0,0 +1,2 @@ +psycopg2==2.9.10 +python-dotenv==1.0.1 diff --git a/auditor/scripts/revert_encoding/revert_encodings.py b/auditor/scripts/revert_encoding/revert_encodings.py new file mode 100644 index 000000000..87d373caa --- /dev/null +++ b/auditor/scripts/revert_encoding/revert_encodings.py @@ -0,0 +1,83 @@ +import json +import os +from json.decoder import JSONDecodeError +from urllib.parse import unquote + +import psycopg2 +from dotenv import load_dotenv + +load_dotenv(dotenv_path="auditor/scripts/htcondor_revert_encodings") + + +DB_CONFIG = { + "dbname": os.getenv("DB_NAME", "auditor"), + "user": os.getenv("DB_USER", "postgres"), + "password": os.getenv("DB_PASSWORD", "password"), + "host": os.getenv("DB_HOST", "localhost"), + "port": os.getenv("DB_PORT", "5432"), +} + + +def decode_record(record_id, meta): + """ + Decode the record_id and meta values. + """ + decoded_record_id = unquote(record_id) + + try: + decoded_meta = { + key: [unquote(value) for value in values] for key, values in meta.items() + } + except Exception as e: + raise JSONDecodeError(f"Error decoding meta: {e}", str(meta), 0) + return decoded_record_id, json.dumps(decoded_meta) + + +def main(): + BATCH_SIZE = 1000 + offset = 0 + conn = None + cursor = None + + try: + conn = psycopg2.connect(**DB_CONFIG) + while True: + cursor = conn.cursor() + + fetch_query = f"SELECT id, record_id, meta FROM auditor_accounting ORDER BY id LIMIT {BATCH_SIZE} OFFSET {offset};" + cursor.execute(fetch_query) + rows = cursor.fetchall() + + if not rows: + break + + for row in rows: + record_id, meta = row[1], row[2] + decoded_record_id, decoded_meta = decode_record(record_id, meta) + + update_query = """ + UPDATE auditor_accounting + SET record_id = %s, meta = %s + WHERE id = %s; + """ + cursor.execute(update_query, (decoded_record_id, decoded_meta, row[0])) + + conn.commit() + offset += BATCH_SIZE + + print("Database updated successfully!") + + except Exception as e: + print(f"Error: {e}") + if conn: + conn.rollback() # Rollback on error + + finally: + if cursor: + cursor.close() + if conn: + conn.close() + + +if __name__ == "__main__": + main() diff --git a/auditor/scripts/revert_encoding/test_script.py b/auditor/scripts/revert_encoding/test_script.py new file mode 100644 index 000000000..880eaf287 --- /dev/null +++ b/auditor/scripts/revert_encoding/test_script.py @@ -0,0 +1,102 @@ +import json +import unittest +from unittest.mock import MagicMock, patch +from urllib.parse import quote + +from revert_encodings import decode_record, main + + +class TestDecodeRecord(unittest.TestCase): + def test_decode_record_success(self): + # Test for successful decoding + record_id = quote("test_record_id/", safe="") + meta = { + "key1": [quote("value1*"), quote("value2%")], + "key2": [quote("value3!")], + } + + expected_record_id = "test_record_id/" + expected_meta = {"key1": ["value1*", "value2%"], "key2": ["value3!"]} + + decoded_record_id, decoded_meta = decode_record(record_id, meta) + + self.assertEqual(decoded_record_id, expected_record_id) + self.assertEqual(json.loads(decoded_meta), expected_meta) + + def test_decode_record_failure(self): + # Test for failure in decoding meta + record_id = quote("test_record_id") + meta = "invalid_meta_format" # Invalid meta format + + with self.assertRaises(Exception) as context: + decode_record(record_id, meta) + + self.assertIn("Error decoding meta", str(context.exception)) + + +class TestDatabaseUpdate(unittest.TestCase): + def setUp(self): + """Set up test cases""" + self.fetch_query = "SELECT id, record_id, meta FROM auditor_accounting ORDER BY id LIMIT 1000 OFFSET 0;" + self.update_query = """ + UPDATE auditor_accounting + SET record_id = %s, meta = %s + WHERE id = %s; + """ + + # Sample encoded data + self.sample_id = 1 + self.encoded_record_id = quote("test/record/1", safe="") + self.encoded_meta = {"key1": [quote("value1")]} + + # Expected decoded data + self.expected_record_id = "test/record/1" + self.expected_meta = json.dumps({"key1": ["value1"]}) + + @patch("psycopg2.connect") + def test_main_success(self, mock_connect): + """Test successful execution of main function""" + # Set up mock connection and cursor + mock_cursor = MagicMock() + mock_conn = MagicMock() + mock_connect.return_value = mock_conn + mock_conn.cursor.return_value = mock_cursor + + # Mock the database responses + mock_cursor.fetchall.side_effect = [ + [ + (self.sample_id, self.encoded_record_id, self.encoded_meta) + ], # First batch + [], # Empty result to end the loop + ] + + # Run the main function + main() + + # Verify the correct SQL queries were executed + mock_cursor.execute.assert_any_call(self.fetch_query) + mock_cursor.execute.assert_any_call( + self.update_query, + (self.expected_record_id, self.expected_meta, self.sample_id), + ) + + # Verify proper cleanup + mock_conn.commit.assert_called_once() + mock_cursor.close.assert_called_once() + mock_conn.close.assert_called_once() + + @patch("psycopg2.connect") + def test_main_database_error(self, mock_connect): + """Test database error handling""" + # Configure the mock to raise an exception + mock_connect.side_effect = Exception("Database connection failed") + + # Run the main function - should handle error gracefully + main() + + # Verify the connection attempt was made + mock_connect.assert_called_once() + + +if __name__ == "__main__": + unittest.main() diff --git a/auditor/scripts/test_valid_names/test_slurm_decoding.py b/auditor/scripts/test_valid_names/test_slurm_decoding.py new file mode 100644 index 000000000..686b2e0f4 --- /dev/null +++ b/auditor/scripts/test_valid_names/test_slurm_decoding.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 + +import json +import unittest +from datetime import datetime + +import requests + + +class TestSlurmEncodingReversal(unittest.TestCase): + def test_check_db_records(self): + record_1 = { + "record_id": "record1", + "meta": { + "site_id": ["site/1"], + "user_id": ["user/1"], + "group_id": ["group/1"], + }, + "components": [ + { + "name": "NumCPUs", + "amount": 31, + "scores": [{"name": "HEPSPEC", "value": 1.2}], + } + ], + "start_time": "2022-06-27T15:00:00Z", + "stop_time": "2022-06-27T15:01:00Z", + "runtime": 6, + } + + record_2 = { + "record_id": "record2", + "meta": { + "site_id": ["site/2"], + "user_id": ["user/2"], + "group_id": ["group/2"], + }, + "components": [ + { + "name": "NumCPUs", + "amount": 31, + "scores": [{"name": "HEPSPEC", "value": 1.2}], + } + ], + "start_time": "2022-06-27T15:00:00Z", + "stop_time": "2022-06-27T15:01:00Z", + "runtime": 60, + } + + response = requests.get("http://localhost:8000/records") + + if response.status_code != 200: + print(f"Failed to get record: {response.status_code}, {response.text}") + else: + print("Successfully retrieved records ", len(response.json())) + + records_json = response.json() + records = sorted(response.json(), key=lambda x: x.get("record_id")) + + self.assertEqual(records[0].get("record_id"), record_1.get("record_id")) + self.assertEqual(records[0].get("meta"), record_1.get("meta")) + + self.assertEqual(records[1].get("record_id"), record_2.get("record_id")) + self.assertEqual(records[1].get("meta"), record_2.get("meta")) + + +# Call the function +if __name__ == "__main__": + unittest.main() diff --git a/auditor/scripts/test_valid_names/test_valid_names.py b/auditor/scripts/test_valid_names/test_valid_names.py new file mode 100644 index 000000000..dc09f4d7f --- /dev/null +++ b/auditor/scripts/test_valid_names/test_valid_names.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 + +import json +import unittest +from datetime import datetime + +import requests + + +class TestHTCondorEncodingReversal(unittest.TestCase): + def test_check_db_records(self): + record_1 = { + "record_id": "record-example/job id & 1", + "meta": { + "site_id": ["site 1"], + "user_id": ["user 1"], + "group_id": ["group/1"], + }, + "components": [ + { + "name": "NumCPUs", + "amount": 31, + "scores": [{"name": "HEPSPEC", "value": 1.2}], + } + ], + "start_time": "2022-06-27T15:00:00Z", + "stop_time": "2022-06-27T15:01:00Z", + "runtime": 6, + } + + record_2 = { + "record_id": "record-example/job id & 2", + "meta": { + "site_id": ["site 2"], + "user_id": ["user 2"], + "group_id": ["group/2"], + }, + "components": [ + { + "name": "NumCPUs", + "amount": 31, + "scores": [{"name": "HEPSPEC", "value": 1.2}], + } + ], + "start_time": "2022-06-27T15:00:00Z", + "stop_time": "2022-06-27T15:01:00Z", + "runtime": 60, + } + + response = requests.get("http://localhost:8000/records") + + if response.status_code != 200: + print(f"Failed to get record: {response.status_code}, {response.text}") + else: + print("Successfully retrieved records ", len(response.json())) + + records_json = response.json() + records = sorted(response.json(), key=lambda x: x.get("record_id")) + + self.assertEqual(records[0].get("record_id"), record_1.get("record_id")) + self.assertEqual(records[0].get("meta"), record_1.get("meta")) + + self.assertEqual(records[1].get("record_id"), record_2.get("record_id")) + self.assertEqual(records[1].get("meta"), record_2.get("meta")) + + +# Call the function +if __name__ == "__main__": + unittest.main() diff --git a/auditor/src/constants.rs b/auditor/src/constants.rs index 5d3ae3fa6..953d093f0 100644 --- a/auditor/src/constants.rs +++ b/auditor/src/constants.rs @@ -5,6 +5,5 @@ // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. -pub const FORBIDDEN_CHARACTERS: [char; 9] = ['/', '(', ')', '"', '<', '>', '\\', '{', '}']; pub const ERR_RECORD_EXISTS: &str = "RECORD_EXISTS"; pub const ERR_UNEXPECTED_ERROR: &str = "UNEXPECTED_ERROR"; diff --git a/auditor/src/domain/record.rs b/auditor/src/domain/record.rs index 729c66b7b..5f36d67c1 100644 --- a/auditor/src/domain/record.rs +++ b/auditor/src/domain/record.rs @@ -79,30 +79,6 @@ use serde::{Deserialize, Serialize}; /// # Ok(()) /// # } /// ``` -/// Create a record with an invalid ID: -/// -/// ``` -/// # use auditor::domain::{Component, RecordAdd, Score}; -/// # use chrono::{DateTime, TimeZone, Utc}; -/// # use std::collections::HashMap; -/// # -/// # fn main() -> Result<(), anyhow::Error> { -/// # let start_time: DateTime = Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap(); -/// # -/// # let component_cpu = Component::new("CPU", 10)? -/// # .with_score(Score::new("HEPSPEC06", 9.2)?); -/// # let component_mem = Component::new("MEM", 32)?; -/// # let components = vec![component_cpu, component_mem]; -/// # -/// # let mut meta = HashMap::new(); -/// # meta.insert("site_id", vec!["site1"]); -/// # meta.insert("features", vec!["ssd", "gpu"]); -/// # -/// let record = RecordAdd::new("123/456", meta, components, start_time); -/// assert!(record.is_err()); -/// Ok(()) -/// # } -/// ``` #[derive(Serialize, Deserialize, Clone, Debug)] pub struct RecordAdd { /// Unique identifier of the record. @@ -151,19 +127,6 @@ pub struct RecordAdd { /// # Ok(()) /// # } /// ``` -/// Create a record with an invalid ID: -/// -/// ``` -/// # use auditor::domain::{Component, RecordUpdate}; -/// # use chrono::{DateTime, TimeZone, Utc}; -/// # use std::collections::HashMap; -/// # -/// # fn main() { -/// # let stop_time: DateTime = Utc.with_ymd_and_hms(2023, 1, 1, 0, 0, 0).unwrap(); -/// let record = RecordUpdate::new("123/456", HashMap::new(), Vec::new(), stop_time); -/// assert!(record.is_err()); -/// # } -/// ``` #[derive(Serialize, Deserialize, Clone, Debug)] pub struct RecordUpdate { diff --git a/auditor/src/domain/validname.rs b/auditor/src/domain/validname.rs index 80a6efaa9..d9b7bf0ce 100644 --- a/auditor/src/domain/validname.rs +++ b/auditor/src/domain/validname.rs @@ -5,7 +5,6 @@ // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. -use crate::constants::FORBIDDEN_CHARACTERS; use crate::domain::ValidationError; use anyhow::Context; use std::fmt; @@ -26,9 +25,7 @@ impl ValidName { let is_empty_or_whitespace = s.trim().is_empty(); // count characters let is_too_long = s.graphemes(true).count() > 256; - // check for forbidden characters - let contains_forbidden_characters = s.chars().any(|g| FORBIDDEN_CHARACTERS.contains(&g)); - if is_empty_or_whitespace || is_too_long || contains_forbidden_characters { + if is_empty_or_whitespace || is_too_long { Err(ValidationError(format!("Invalid Name: {s}"))) } else { Ok(Self(s)) @@ -119,14 +116,6 @@ mod tests { assert_err!(ValidName::parse(name)); } - #[test] - fn names_containing_an_invalid_character_are_rejected() { - for name in &['/', '(', ')', '"', '<', '>', '\\', '{', '}'] { - let name = name.to_string(); - assert_err!(ValidName::parse(name)); - } - } - #[quickcheck] fn a_valid_name_is_parsed_successfully(name: ValidNameString) { assert_ok!(ValidName::parse(name.0)); diff --git a/auditor/tests/api/add.rs b/auditor/tests/api/add.rs index 1913e74f6..3cd945e95 100644 --- a/auditor/tests/api/add.rs +++ b/auditor/tests/api/add.rs @@ -38,40 +38,6 @@ async fn add_returns_a_200_for_valid_json_data() { } } -#[tokio::test] -async fn add_returns_a_400_for_invalid_json_data() { - // Arrange - let app = spawn_app().await; - - let forbidden_strings: Vec = ['/', '(', ')', '"', '<', '>', '\\', '{', '}'] - .into_iter() - .map(|s| format!("test{s}test")) - .collect(); - - for _field in ["record_id"] { - for fs in forbidden_strings.iter() { - // Act - let mut body: RecordTest = Faker.fake(); - // match field { - // "record_id" => body.record_id = Some(fs.clone()), - // _ => (), - // } - body.record_id = Some(fs.clone()); - - let response = app.add_record(&body).await; - - assert_eq!(400, response.status().as_u16()); - - let saved: Vec<_> = sqlx::query!(r#"SELECT record_id FROM auditor_accounting"#,) - .fetch_all(&app.db_pool) - .await - .expect("Failed to fetch data"); - - assert_eq!(saved.len(), 0); - } - } -} - #[tokio::test] async fn add_returns_a_400_when_data_is_missing() { // Arrange @@ -164,39 +130,6 @@ async fn bulk_insert_records() { } } -#[tokio::test] -async fn bulk_insert_returns_a_400_for_invalid_json_data() { - let app = spawn_app().await; - - let forbidden_strings: Vec = ['/', '(', ')', '"', '<', '>', '\\', '{', '}'] - .into_iter() - .map(|s| format!("test{s}test")) - .collect(); - - for _field in ["record_id"] { - for fs in forbidden_strings.iter() { - let records: Vec = (0..100) - .map(|_| { - let mut record: RecordTest = Faker.fake(); - record.record_id = Some(fs.clone()); - record - }) - .collect(); - - let response = app.bulk_insert(&records).await; - - assert_eq!(400, response.status().as_u16()); - - let saved: Vec<_> = sqlx::query!(r#"SELECT record_id FROM auditor_accounting"#,) - .fetch_all(&app.db_pool) - .await - .expect("Failed to fetch data"); - - assert_eq!(saved.len(), 0); - } - } -} - #[tokio::test] async fn bulk_insert_returns_a_400_when_data_is_missing() { let app = spawn_app().await; diff --git a/collectors/htcondor/src/auditor_htcondor_collector/collector.py b/collectors/htcondor/src/auditor_htcondor_collector/collector.py index f94c194b2..a2bac227f 100644 --- a/collectors/htcondor/src/auditor_htcondor_collector/collector.py +++ b/collectors/htcondor/src/auditor_htcondor_collector/collector.py @@ -6,7 +6,6 @@ from datetime import datetime as dt from datetime import timezone from typing import List, Optional, Tuple -from urllib.parse import quote from pyauditor import ( AuditorClient, @@ -231,7 +230,7 @@ def _get_meta(self, job: dict) -> Meta: for item in entry if isinstance(entry, list) else [entry]: value = get_value(item, job) if value is not None: - values.append(quote(value, safe="")) + values.append(value) if key == "site": # site is a special case break if values: @@ -270,9 +269,7 @@ def _generate_record(self, job: dict) -> Record: meta = self._get_meta(job) try: - record_id = ( - f"{self.config.record_prefix}-{quote(job_id.encode('utf-8'), safe='')}" - ) + record_id = f"{self.config.record_prefix}-{job_id}" record = Record( record_id=record_id, start_time=dt.fromtimestamp(start_time, tz=timezone.utc), diff --git a/collectors/slurm-epilog/src/main.rs b/collectors/slurm-epilog/src/main.rs index 15118c368..adb1164fd 100644 --- a/collectors/slurm-epilog/src/main.rs +++ b/collectors/slurm-epilog/src/main.rs @@ -6,7 +6,6 @@ // copied, modified, or distributed except according to those terms. use anyhow::Error; -use auditor::constants::FORBIDDEN_CHARACTERS; use auditor::domain::{Component, RecordAdd, Score}; use auditor::telemetry::{get_subscriber, init_subscriber}; use auditor_client::AuditorClientBuilder; @@ -14,7 +13,6 @@ use chrono::{offset::FixedOffset, DateTime, Local, NaiveDateTime, Utc}; use regex::Regex; use std::collections::HashMap; use std::env; -use std::fmt; use std::process::Command; use tracing::{debug, info}; use uuid::Uuid; @@ -62,11 +60,6 @@ fn parse_slurm_timestamp + std::fmt::Debug>( )) } -#[tracing::instrument(name = "Remove forbidden characters from string", level = "debug")] -fn make_string_valid + fmt::Debug>(input: T) -> String { - input.as_ref().replace(&FORBIDDEN_CHARACTERS[..], "") -} - #[tracing::instrument( name = "Construct components from job info and configuration", level = "debug" @@ -86,7 +79,7 @@ fn construct_components(config: &configuration::Settings, job: &Job) -> Vec Result<(), Error> { debug!(?job, "Acquired SLURM job info"); let record = RecordAdd::new( - format!("{}-{job_id}", make_string_valid(&config.record_prefix)), + format!("{}-{job_id}", &config.record_prefix), HashMap::from([ - ( - "site_id".to_string(), - vec![make_string_valid(&config.site_id)], - ), + ("site_id".to_string(), vec![config.site_id.clone()]), ( "user_id".to_string(), - vec![make_string_valid( - job["UserId"].split('(').take(1).collect::>()[0], - )], + vec![job["UserId"].split('(').take(1).collect::>()[0].to_string()], ), ( "group_id".to_string(), - vec![make_string_valid( - job["GroupId"].split('(').take(1).collect::>()[0], - )], + vec![job["GroupId"].split('(').take(1).collect::>()[0].to_string()], ), ]), construct_components(&config, &job), diff --git a/collectors/slurm/src/sacctcaller.rs b/collectors/slurm/src/sacctcaller.rs index b37009def..b601817f7 100644 --- a/collectors/slurm/src/sacctcaller.rs +++ b/collectors/slurm/src/sacctcaller.rs @@ -5,13 +5,10 @@ // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. -use std::{collections::HashMap, fmt}; +use std::collections::HashMap; use anyhow::anyhow; -use auditor::{ - constants::FORBIDDEN_CHARACTERS, - domain::{Component, RecordAdd, Score}, -}; +use auditor::domain::{Component, RecordAdd, Score}; use chrono::{DateTime, Local, Utc}; use color_eyre::eyre::{eyre, Result}; use itertools::Itertools; @@ -307,7 +304,7 @@ fn construct_record( return Ok(None); }; - let record_id = make_string_valid(format!("{}-{job_id}", &CONFIG.record_prefix)); + let record_id = format!("{}-{job_id}", &CONFIG.record_prefix); // We don't want this record, we have already seen it in a previous run. if record_id == last_record_id { return Ok(None); @@ -321,20 +318,14 @@ fn construct_record( val.extract_map()? .iter() .map(|(k, v)| -> Result<(String, Vec)> { - Ok(( - make_string_valid(k.extract_string()?), - vec![make_string_valid(v.extract_string()?)], - )) + Ok((k.extract_string()?, vec![v.extract_string()?])) }) .collect::>>()? } else { vec![] } } else { - vec![( - m.name.clone(), - vec![make_string_valid(map[&m.key].extract_as_string()?)], - )] + vec![(m.name.clone(), vec![map[&m.key].extract_as_string()?])] }; Ok(map) }) @@ -346,15 +337,9 @@ fn construct_record( HashMap::new() }; - meta.insert("site_id".to_string(), vec![make_string_valid(site)]); - meta.insert( - "user_id".to_string(), - vec![make_string_valid(map[USER].extract_string()?)], - ); - meta.insert( - "group_id".to_string(), - vec![make_string_valid(map[GROUP].extract_string()?)], - ); + meta.insert("site_id".to_string(), vec![site]); + meta.insert("user_id".to_string(), vec![map[USER].extract_string()?]); + meta.insert("group_id".to_string(), vec![map[GROUP].extract_string()?]); let components = if let Ok(components) = construct_components(map, &config.components) { components @@ -373,11 +358,6 @@ fn construct_record( )) } -#[tracing::instrument(name = "Remove forbidden characters from string", level = "debug")] -fn make_string_valid + fmt::Debug>(input: T) -> String { - input.as_ref().replace(&FORBIDDEN_CHARACTERS[..], "") -} - #[tracing::instrument(name = "Obtain site from job info and configuration", level = "debug")] fn identify_site(job: &Job) -> Option { CONFIG @@ -394,7 +374,7 @@ fn identify_site(job: &Job) -> Option { } }) .cloned() - .map(|s| make_string_valid(s.name)) + .map(|s| s.name) .collect::>() .first() .cloned() @@ -425,7 +405,7 @@ fn construct_components( .map(|c| { if !job.contains_key(&c.key) { if let Some(default_value) = c.default_value { - Ok(Component::new(make_string_valid(&c.name), default_value) + Ok(Component::new(&c.name, default_value) .expect("Cannot construct component") .with_scores(construct_component_scores(job, &c))) } else { @@ -435,7 +415,7 @@ fn construct_components( } } else { Ok(Component::new( - make_string_valid(&c.name), + &c.name, job[&c.key].extract_i64().unwrap_or_else(|_| { panic!( "Cannot parse key {} (value: {:?}) into i64.", diff --git a/media/website/content/migration.md b/media/website/content/migration.md index 87c2307c7..10fdb5412 100644 --- a/media/website/content/migration.md +++ b/media/website/content/migration.md @@ -6,6 +6,18 @@ weight = 3 # From 0.6.3 to unreleased/0.7.0 +Please backup your db before proceeding with any changes that are listed below. + +## Remove forbidden characters: +The following changes only apply to users who are either using HTCondor collector (v0.6.3 and earlier) or slurm collector (v0.6.3 and earlier), follow these steps to revert the encodings in your database records: + +### HTCondor collector v0.6.3 and earlier or Slurm collector v0.6.3 and earlier +- Clone the github repository [git_repo](https://github.com/ALU-Schumacher/AUDITOR). +- The script for reverting encodings is located at: `auditor/scripts/revert_encoding`. +- Install the required dependencies with `pip install -r requirements.txt` +- Replace the placeholder values in the .env file with the values corresponding to your database config. +- Run `python revert_encodings.py` + ### New feature - TLS TLS is added to AUDITOR, all collectors and plugins. A new config section called tls_config is required by all config files. `use_tls` is a compulsory field of type bool that defines whether to use the TLS or not. diff --git a/scripts/test_htcondor_deserialization.sh b/scripts/test_htcondor_deserialization.sh new file mode 100755 index 000000000..8ceb96303 --- /dev/null +++ b/scripts/test_htcondor_deserialization.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +#!/usr/bin/env bash +set -x +set -eo pipefail + +RELEASE_MODE=${RELEASE_MODE:=false} +ENV_DIR=${ENV_DIR:=".env_test"} + +function compile_auditor() { + if [ "$RELEASE_MODE" = true ]; then + cargo build --bin auditor --release + else + cargo build --bin auditor + fi +} + +function start_auditor() { + if [ "$RELEASE_MODE" = true ]; then + AUDITOR_APPLICATION__ADDR=0.0.0.0 ./target/release/auditor & + else + AUDITOR_APPLICATION__ADDR=0.0.0.0 ./target/debug/auditor & + fi + AUDITOR_SERVER_PID=$! + COUNTER=0 + until curl http://localhost:8000/health_check; do + echo >&2 "Auditor is still unavailable - sleeping" + ((COUNTER = COUNTER + 1)) + if [ "$COUNTER" -gt "30" ]; then + echo >&2 "Auditor did not come up in time." + stop_auditor + echo >&2 "Exiting." + exit 1 + fi + sleep 1 + done +} + +function stop_auditor() { + echo >&2 "Stopping Auditor" + kill $AUDITOR_SERVER_PID + wait $AUDITOR_SERVER_PID +} + +function cleanup_exit() { + if [ -n "$AUDITOR_SERVER_PID" ]; then + echo >&2 "Stopping Auditor due to script exit" + stop_auditor + fi +} + +function fill_auditor_db() { + curl -X POST --header "Content-Type: application/json" \ + --data '{"record_id": "record-example%2Fjob%20id%20%26%201", "meta": {"site_id": ["site%201"], "user_id": ["user%201"], "group_id": ["group%2F1"]}, "components": [{"name": "NumCPUs", "amount": 31, "scores": [{"name": "HEPSPEC", "value": 1.2}]}], "start_time": "2022-06-27T15:00:00Z", "stop_time": "2022-06-27T15:01:00Z", "runtime": 60}' \ + http://localhost:8000/record + + curl -X POST --header "Content-Type: application/json" \ + --data '{"record_id": "record-example%2Fjob%20id%20%26%202", "meta": {"site_id": ["site%202"], "user_id": ["user%202"], "group_id": ["group%2F2"]}, "components": [{"name": "NumCPUs", "amount": 31, "scores": [{"name": "HEPSPEC", "value": 1.2}]}], "start_time": "2022-06-27T15:00:00Z", "stop_time": "2022-06-27T15:01:00Z", "runtime": 60}' \ + http://localhost:8000/record +} + +function replace_encoded_string_in_db() { + python auditor/scripts/revert_encoding/revert_encodings.py +} + +function check_if_records_are_correctly_reverted() { + python auditor/scripts/test_valid_names/test_valid_names.py +} + +trap "cleanup_exit" SIGINT SIGQUIT SIGTERM EXIT + +start_auditor + +fill_auditor_db + +replace_encoded_string_in_db + +check_if_records_are_correctly_reverted + +stop_auditor diff --git a/scripts/test_slurm_deserialization.sh b/scripts/test_slurm_deserialization.sh new file mode 100755 index 000000000..11d5f53d7 --- /dev/null +++ b/scripts/test_slurm_deserialization.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +#!/usr/bin/env bash +set -x +set -eo pipefail + +RELEASE_MODE=${RELEASE_MODE:=false} +ENV_DIR=${ENV_DIR:=".env_test"} + +function compile_auditor() { + if [ "$RELEASE_MODE" = true ]; then + cargo build --bin auditor --release + else + cargo build --bin auditor + fi +} + +function start_auditor() { + if [ "$RELEASE_MODE" = true ]; then + AUDITOR_APPLICATION__ADDR=0.0.0.0 ./target/release/auditor & + else + AUDITOR_APPLICATION__ADDR=0.0.0.0 ./target/debug/auditor & + fi + AUDITOR_SERVER_PID=$! + COUNTER=0 + until curl http://localhost:8000/health_check; do + echo >&2 "Auditor is still unavailable - sleeping" + ((COUNTER = COUNTER + 1)) + if [ "$COUNTER" -gt "30" ]; then + echo >&2 "Auditor did not come up in time." + stop_auditor + echo >&2 "Exiting." + exit 1 + fi + sleep 1 + done +} + +function stop_auditor() { + echo >&2 "Stopping Auditor" + kill $AUDITOR_SERVER_PID + wait $AUDITOR_SERVER_PID +} + +function cleanup_exit() { + if [ -n "$AUDITOR_SERVER_PID" ]; then + echo >&2 "Stopping Auditor due to script exit" + stop_auditor + fi +} + +function fill_auditor_db() { + curl -X POST --header "Content-Type: application/json" \ + --data '{"record_id": "record1", "meta": {"site_id": ["site%2F1"], "user_id": ["user%2F1"], "group_id": ["group%2F1"]}, "components": [{"name": "NumCPUs", "amount": 31, "scores": [{"name": "HEPSPEC", "value": 1.2}]}], "start_time": "2022-06-27T15:00:00Z", "stop_time": "2022-06-27T15:01:00Z", "runtime": 60}' \ + http://localhost:8000/record + + curl -X POST --header "Content-Type: application/json" \ + --data '{"record_id": "record2", "meta": {"site_id": ["site%2F2"], "user_id": ["user%2F2"], "group_id": ["group%2F2"]}, "components": [{"name": "NumCPUs", "amount": 31, "scores": [{"name": "HEPSPEC", "value": 1.2}]}], "start_time": "2022-06-27T15:00:00Z", "stop_time": "2022-06-27T15:01:00Z", "runtime": 60}' \ + http://localhost:8000/record +} + +function replace_encoded_string_in_db() { + python auditor/scripts/revert_encoding/revert_encodings.py +} + +function check_if_records_are_correctly_reverted() { + python auditor/scripts/test_valid_names/test_slurm_decoding.py +} + +trap "cleanup_exit" SIGINT SIGQUIT SIGTERM EXIT + +start_auditor + +fill_auditor_db + +replace_encoded_string_in_db + +check_if_records_are_correctly_reverted + +stop_auditor