Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: remove forbidden characters & add encoding reversal scripts for … #1084

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Breaking changes
- AUDITOR: Remove forbidden characters ([@raghuvar-vijay](https://github.com/raghuvar-vijay))
- pyauditor + Apel plugin + HTCondor collector: drop support for Python 3.8 ([@dirksammel](https://github.com/dirksammel))

### Security
- [RUSTSEC-2024-0421]: Update idna from 0.5.0 to 1.0.3 ([@raghuvar-vijay](https://github.com/raghuvar-vijay))
- [RUSTSEC-2024-0363]: Update sqlx from 0.7.4 to 0.8.2 (missed some occurrences) ([@dirksammel](https://github.com/dirksammel))
- [RUSTSEC-2024-0399]: Update rustls from 0.23.16 to 0.23.19 ([@dirksammel](https://github.com/dirksammel))
- [RUSTSEC-2024-0402]: Update hashbrown from 0.15.0 to 0.15.2 ([@dirksammel](https://github.com/dirksammel))
Expand Down
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ members = [
"plugins/priority",
]

exclude = [
"auditor/scripts/slurm_revert_encoding",
]

[workspace.dependencies]
actix-web = "4.8.0"
actix-tls = "3.4.0"
Expand Down
6 changes: 6 additions & 0 deletions auditor/scripts/revert_encoding/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
DB_NAME=auditor
DB_USER=postgres
DB_PASSWORD=securepassword
DB_HOST=localhost
DB_PORT=5432

2 changes: 2 additions & 0 deletions auditor/scripts/revert_encoding/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
psycopg2==2.9.10
python-dotenv==1.0.1
83 changes: 83 additions & 0 deletions auditor/scripts/revert_encoding/revert_encodings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import json
import os
from json.decoder import JSONDecodeError
from urllib.parse import unquote

import psycopg2
from dotenv import load_dotenv

load_dotenv(dotenv_path="auditor/scripts/htcondor_revert_encodings")


DB_CONFIG = {
"dbname": os.getenv("DB_NAME", "auditor"),
"user": os.getenv("DB_USER", "postgres"),
"password": os.getenv("DB_PASSWORD", "password"),
"host": os.getenv("DB_HOST", "localhost"),
"port": os.getenv("DB_PORT", "5432"),
}


def decode_record(record_id, meta):
"""
Decode the record_id and meta values.
"""
decoded_record_id = unquote(record_id)

try:
decoded_meta = {
key: [unquote(value) for value in values] for key, values in meta.items()
}
except Exception as e:
raise JSONDecodeError(f"Error decoding meta: {e}", str(meta), 0)
return decoded_record_id, json.dumps(decoded_meta)


def main():
BATCH_SIZE = 1000
offset = 0
conn = None
cursor = None

try:
conn = psycopg2.connect(**DB_CONFIG)
while True:
cursor = conn.cursor()

fetch_query = f"SELECT id, record_id, meta FROM auditor_accounting ORDER BY id LIMIT {BATCH_SIZE} OFFSET {offset};"
cursor.execute(fetch_query)
rows = cursor.fetchall()

if not rows:
break

for row in rows:
record_id, meta = row[1], row[2]
decoded_record_id, decoded_meta = decode_record(record_id, meta)

update_query = """
UPDATE auditor_accounting
SET record_id = %s, meta = %s
WHERE id = %s;
"""
cursor.execute(update_query, (decoded_record_id, decoded_meta, row[0]))

conn.commit()
offset += BATCH_SIZE

print("Database updated successfully!")

except Exception as e:
print(f"Error: {e}")
if conn:
conn.rollback() # Rollback on error

finally:
if cursor:
cursor.close()
if conn:
conn.close()


if __name__ == "__main__":
main()
102 changes: 102 additions & 0 deletions auditor/scripts/revert_encoding/test_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import json
import unittest
from unittest.mock import MagicMock, patch
from urllib.parse import quote

from revert_encodings import decode_record, main


class TestDecodeRecord(unittest.TestCase):
def test_decode_record_success(self):
# Test for successful decoding
record_id = quote("test_record_id/", safe="")
meta = {
"key1": [quote("value1*"), quote("value2%")],
"key2": [quote("value3!")],
}

expected_record_id = "test_record_id/"
expected_meta = {"key1": ["value1*", "value2%"], "key2": ["value3!"]}

decoded_record_id, decoded_meta = decode_record(record_id, meta)

self.assertEqual(decoded_record_id, expected_record_id)
self.assertEqual(json.loads(decoded_meta), expected_meta)

def test_decode_record_failure(self):
# Test for failure in decoding meta
record_id = quote("test_record_id")
meta = "invalid_meta_format" # Invalid meta format

with self.assertRaises(Exception) as context:
decode_record(record_id, meta)

self.assertIn("Error decoding meta", str(context.exception))


class TestDatabaseUpdate(unittest.TestCase):
def setUp(self):
"""Set up test cases"""
self.fetch_query = "SELECT id, record_id, meta FROM auditor_accounting ORDER BY id LIMIT 1000 OFFSET 0;"
self.update_query = """
UPDATE auditor_accounting
SET record_id = %s, meta = %s
WHERE id = %s;
"""

# Sample encoded data
self.sample_id = 1
self.encoded_record_id = quote("test/record/1", safe="")
self.encoded_meta = {"key1": [quote("value1")]}

# Expected decoded data
self.expected_record_id = "test/record/1"
self.expected_meta = json.dumps({"key1": ["value1"]})

@patch("psycopg2.connect")
def test_main_success(self, mock_connect):
"""Test successful execution of main function"""
# Set up mock connection and cursor
mock_cursor = MagicMock()
mock_conn = MagicMock()
mock_connect.return_value = mock_conn
mock_conn.cursor.return_value = mock_cursor

# Mock the database responses
mock_cursor.fetchall.side_effect = [
[
(self.sample_id, self.encoded_record_id, self.encoded_meta)
], # First batch
[], # Empty result to end the loop
]

# Run the main function
main()

# Verify the correct SQL queries were executed
mock_cursor.execute.assert_any_call(self.fetch_query)
mock_cursor.execute.assert_any_call(
self.update_query,
(self.expected_record_id, self.expected_meta, self.sample_id),
)

# Verify proper cleanup
mock_conn.commit.assert_called_once()
mock_cursor.close.assert_called_once()
mock_conn.close.assert_called_once()

@patch("psycopg2.connect")
def test_main_database_error(self, mock_connect):
"""Test database error handling"""
# Configure the mock to raise an exception
mock_connect.side_effect = Exception("Database connection failed")

# Run the main function - should handle error gracefully
main()

# Verify the connection attempt was made
mock_connect.assert_called_once()


if __name__ == "__main__":
unittest.main()
69 changes: 69 additions & 0 deletions auditor/scripts/test_valid_names/test_slurm_decoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python3

import json
import unittest
from datetime import datetime

import requests


class TestSlurmEncodingReversal(unittest.TestCase):
def test_check_db_records(self):
record_1 = {
"record_id": "record1",
"meta": {
"site_id": ["site/1"],
"user_id": ["user/1"],
"group_id": ["group/1"],
},
"components": [
{
"name": "NumCPUs",
"amount": 31,
"scores": [{"name": "HEPSPEC", "value": 1.2}],
}
],
"start_time": "2022-06-27T15:00:00Z",
"stop_time": "2022-06-27T15:01:00Z",
"runtime": 6,
}

record_2 = {
"record_id": "record2",
"meta": {
"site_id": ["site/2"],
"user_id": ["user/2"],
"group_id": ["group/2"],
},
"components": [
{
"name": "NumCPUs",
"amount": 31,
"scores": [{"name": "HEPSPEC", "value": 1.2}],
}
],
"start_time": "2022-06-27T15:00:00Z",
"stop_time": "2022-06-27T15:01:00Z",
"runtime": 60,
}

response = requests.get("http://localhost:8000/records")

if response.status_code != 200:
print(f"Failed to get record: {response.status_code}, {response.text}")
else:
print("Successfully retrieved records ", len(response.json()))

records_json = response.json()
records = sorted(response.json(), key=lambda x: x.get("record_id"))

self.assertEqual(records[0].get("record_id"), record_1.get("record_id"))
self.assertEqual(records[0].get("meta"), record_1.get("meta"))

self.assertEqual(records[1].get("record_id"), record_2.get("record_id"))
self.assertEqual(records[1].get("meta"), record_2.get("meta"))


# Call the function
if __name__ == "__main__":
unittest.main()
69 changes: 69 additions & 0 deletions auditor/scripts/test_valid_names/test_valid_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python3

import json
import unittest
from datetime import datetime

import requests


class TestHTCondorEncodingReversal(unittest.TestCase):
def test_check_db_records(self):
record_1 = {
"record_id": "record-example/job id & 1",
"meta": {
"site_id": ["site 1"],
"user_id": ["user 1"],
"group_id": ["group/1"],
},
"components": [
{
"name": "NumCPUs",
"amount": 31,
"scores": [{"name": "HEPSPEC", "value": 1.2}],
}
],
"start_time": "2022-06-27T15:00:00Z",
"stop_time": "2022-06-27T15:01:00Z",
"runtime": 6,
}

record_2 = {
"record_id": "record-example/job id & 2",
"meta": {
"site_id": ["site 2"],
"user_id": ["user 2"],
"group_id": ["group/2"],
},
"components": [
{
"name": "NumCPUs",
"amount": 31,
"scores": [{"name": "HEPSPEC", "value": 1.2}],
}
],
"start_time": "2022-06-27T15:00:00Z",
"stop_time": "2022-06-27T15:01:00Z",
"runtime": 60,
}

response = requests.get("http://localhost:8000/records")

if response.status_code != 200:
print(f"Failed to get record: {response.status_code}, {response.text}")
else:
print("Successfully retrieved records ", len(response.json()))

records_json = response.json()
records = sorted(response.json(), key=lambda x: x.get("record_id"))

self.assertEqual(records[0].get("record_id"), record_1.get("record_id"))
self.assertEqual(records[0].get("meta"), record_1.get("meta"))

self.assertEqual(records[1].get("record_id"), record_2.get("record_id"))
self.assertEqual(records[1].get("meta"), record_2.get("meta"))


# Call the function
if __name__ == "__main__":
unittest.main()
1 change: 0 additions & 1 deletion auditor/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.

pub const FORBIDDEN_CHARACTERS: [char; 9] = ['/', '(', ')', '"', '<', '>', '\\', '{', '}'];
pub const ERR_RECORD_EXISTS: &str = "RECORD_EXISTS";
pub const ERR_UNEXPECTED_ERROR: &str = "UNEXPECTED_ERROR";
Loading
Loading