diff --git a/ai_commons/file_processing/json_validator.py b/ai_commons/file_processing/json_validator.py index 39cd288..caa7157 100644 --- a/ai_commons/file_processing/json_validator.py +++ b/ai_commons/file_processing/json_validator.py @@ -1,6 +1,7 @@ # Json validator module import json +from datetime import datetime def remove_keys_from_json(json_string, keys_to_remove): # 解析JSON字符串为列表 @@ -14,4 +15,31 @@ def remove_keys_from_json(json_string, keys_to_remove): del data[key] # 将列表转换回JSON字符串 - return json.dumps(data_list, ensure_ascii=False) \ No newline at end of file + return json.dumps(data_list, ensure_ascii=False) + +import json +from datetime import datetime + +def change_timestamp_format(json_string, timestamp_field): + """ + 将JSON字符串中的指定字段的时间戳格式从 'YYYY-MM-DD HH:MM:SS' 改为 'YYYY-MM-DD'。 + + Args: + json_string (str): 输入的JSON字符串。 + timestamp_field (str): 需要转换时间戳格式的字段名。 + + Returns: + str: 修改后的JSON字符串。 + """ + data_list = json.loads(json_string) + + for data in data_list: + if timestamp_field in data: + try: + original_timestamp = data[timestamp_field] + new_timestamp = datetime.strptime(original_timestamp, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d') + data[timestamp_field] = new_timestamp + except ValueError: + pass + + return json.dumps(data_list, ensure_ascii=False) diff --git a/tests/test_file_processing.py b/tests/test_file_processing.py index 2b8f63e..61a2372 100644 --- a/tests/test_file_processing.py +++ b/tests/test_file_processing.py @@ -2,42 +2,70 @@ import json import os import pandas as pd +from datetime import datetime from ai_commons.file_processing.excel_parser import ExcelConverter -from ai_commons.file_processing.json_validator import remove_keys_from_json +from ai_commons.file_processing.json_validator import remove_keys_from_json, change_timestamp_format class TestExcelConverter(unittest.TestCase): - + def setUp(self): - # 创建一个示例Excel文件 - self.test_file = 'test.xlsx' - df = pd.DataFrame({ - 'Timestamp': [pd.Timestamp('2024-04-07 20:50:30'), pd.Timestamp('2024-05-08 15:30:45')], - 'User ID': ['U12345', 'U67890'], - 'Gift': ['Flower', 'Chocolate'] - }) - df.to_excel(self.test_file, index=False) - + # 创建一个测试的DataFrame + self.data = { + 'Timestamp': [pd.Timestamp('2024-04-20 22:03:30'), pd.Timestamp('2024-04-10 22:49:39')], + 'Account': ['ABC123', 'XYZ789'], + 'Holdback': ['None', 'Test holdback message'], + 'Comments': [None, None], + 'Features': [None, None] + } + self.df = pd.DataFrame(self.data) + + # 保存为Excel文件 + self.test_excel_file = 'test_excel_file.xlsx' + self.df.to_excel(self.test_excel_file, index=False, engine='openpyxl') + def tearDown(self): - # 删除示例Excel文件 - if os.path.exists(self.test_file): - os.remove(self.test_file) + # 删除测试文件 + if os.path.exists(self.test_excel_file): + os.remove(self.test_excel_file) - def test_to_json(self): - converter = ExcelConverter(self.test_file) - json_output = converter.to_json(chunk_size=2) - expected_output = [ - '[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]' - ] - self.assertEqual(json_output, expected_output) + def test_excel_to_json(self): + # 测试Excel转换为JSON + converter = ExcelConverter(self.test_excel_file) + json_string_list = converter.to_json(chunk_size=2) + + self.assertEqual(len(json_string_list), 1) + json_data = json.loads(json_string_list[0]) + + self.assertEqual(len(json_data), 2) + self.assertEqual(json_data[0]['Account'], 'ABC123') + self.assertEqual(json_data[1]['Account'], 'XYZ789') + self.assertEqual(json_data[0]['Timestamp'], '2024-04-20 22:03:30') class TestJsonValidator(unittest.TestCase): - + + def setUp(self): + self.json_string = ''' + [ + {"Timestamp": "2024-04-20 22:03:30", "Account": "ABC123", "Holdback": "None", "Comments": null, "Features": null}, + {"Timestamp": "2024-04-10 22:49:39", "Account": "XYZ789", "Holdback": "Test holdback message", "Comments": null, "Features": null} + ] + ''' + def test_remove_keys_from_json(self): - json_string = '[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]' - keys_to_remove = [["Timestamp"], ["User ID"]] - modified_json_string = remove_keys_from_json(json_string, keys_to_remove) - expected_output = '[{"Gift": "Flower"}, {"Gift": "Chocolate"}]' - self.assertEqual(modified_json_string, expected_output) + keys_to_remove = [["Comments"], ["Features"]] + cleaned_json_string = remove_keys_from_json(self.json_string, keys_to_remove) + cleaned_data = json.loads(cleaned_json_string) + + for item in cleaned_data: + self.assertNotIn("Comments", item) + self.assertNotIn("Features", item) + + def test_change_timestamp_format(self): + new_json_string = change_timestamp_format(self.json_string, "Timestamp") + new_data = json.loads(new_json_string) + + self.assertEqual(new_data[0]['Timestamp'], '2024-04-20') + self.assertEqual(new_data[1]['Timestamp'], '2024-04-10') if __name__ == '__main__': unittest.main()