Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/update json validator #6

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion ai_commons/file_processing/json_validator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Json validator module

import json
from datetime import datetime

def remove_keys_from_json(json_string, keys_to_remove):
# 解析JSON字符串为列表
Expand All @@ -14,4 +15,31 @@ def remove_keys_from_json(json_string, keys_to_remove):
del data[key]

# 将列表转换回JSON字符串
return json.dumps(data_list, ensure_ascii=False)
return json.dumps(data_list, ensure_ascii=False)

import json
from datetime import datetime

def change_timestamp_format(json_string, timestamp_field):
"""
将JSON字符串中的指定字段的时间戳格式从 'YYYY-MM-DD HH:MM:SS' 改为 'YYYY-MM-DD'。

Args:
json_string (str): 输入的JSON字符串。
timestamp_field (str): 需要转换时间戳格式的字段名。

Returns:
str: 修改后的JSON字符串。
"""
data_list = json.loads(json_string)

for data in data_list:
if timestamp_field in data:
try:
original_timestamp = data[timestamp_field]
new_timestamp = datetime.strptime(original_timestamp, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d')
data[timestamp_field] = new_timestamp
except ValueError:
pass

return json.dumps(data_list, ensure_ascii=False)
82 changes: 55 additions & 27 deletions tests/test_file_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,70 @@
import json
import os
import pandas as pd
from datetime import datetime
from ai_commons.file_processing.excel_parser import ExcelConverter
from ai_commons.file_processing.json_validator import remove_keys_from_json
from ai_commons.file_processing.json_validator import remove_keys_from_json, change_timestamp_format

class TestExcelConverter(unittest.TestCase):

def setUp(self):
# 创建一个示例Excel文件
self.test_file = 'test.xlsx'
df = pd.DataFrame({
'Timestamp': [pd.Timestamp('2024-04-07 20:50:30'), pd.Timestamp('2024-05-08 15:30:45')],
'User ID': ['U12345', 'U67890'],
'Gift': ['Flower', 'Chocolate']
})
df.to_excel(self.test_file, index=False)

# 创建一个测试的DataFrame
self.data = {
'Timestamp': [pd.Timestamp('2024-04-20 22:03:30'), pd.Timestamp('2024-04-10 22:49:39')],
'Account': ['ABC123', 'XYZ789'],
'Holdback': ['None', 'Test holdback message'],
'Comments': [None, None],
'Features': [None, None]
}
self.df = pd.DataFrame(self.data)

# 保存为Excel文件
self.test_excel_file = 'test_excel_file.xlsx'
self.df.to_excel(self.test_excel_file, index=False, engine='openpyxl')

def tearDown(self):
# 删除示例Excel文件
if os.path.exists(self.test_file):
os.remove(self.test_file)
# 删除测试文件
if os.path.exists(self.test_excel_file):
os.remove(self.test_excel_file)

def test_to_json(self):
converter = ExcelConverter(self.test_file)
json_output = converter.to_json(chunk_size=2)
expected_output = [
'[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]'
]
self.assertEqual(json_output, expected_output)
def test_excel_to_json(self):
# 测试Excel转换为JSON
converter = ExcelConverter(self.test_excel_file)
json_string_list = converter.to_json(chunk_size=2)

self.assertEqual(len(json_string_list), 1)
json_data = json.loads(json_string_list[0])

self.assertEqual(len(json_data), 2)
self.assertEqual(json_data[0]['Account'], 'ABC123')
self.assertEqual(json_data[1]['Account'], 'XYZ789')
self.assertEqual(json_data[0]['Timestamp'], '2024-04-20 22:03:30')

class TestJsonValidator(unittest.TestCase):


def setUp(self):
self.json_string = '''
[
{"Timestamp": "2024-04-20 22:03:30", "Account": "ABC123", "Holdback": "None", "Comments": null, "Features": null},
{"Timestamp": "2024-04-10 22:49:39", "Account": "XYZ789", "Holdback": "Test holdback message", "Comments": null, "Features": null}
]
'''

def test_remove_keys_from_json(self):
json_string = '[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]'
keys_to_remove = [["Timestamp"], ["User ID"]]
modified_json_string = remove_keys_from_json(json_string, keys_to_remove)
expected_output = '[{"Gift": "Flower"}, {"Gift": "Chocolate"}]'
self.assertEqual(modified_json_string, expected_output)
keys_to_remove = [["Comments"], ["Features"]]
cleaned_json_string = remove_keys_from_json(self.json_string, keys_to_remove)
cleaned_data = json.loads(cleaned_json_string)

for item in cleaned_data:
self.assertNotIn("Comments", item)
self.assertNotIn("Features", item)

def test_change_timestamp_format(self):
new_json_string = change_timestamp_format(self.json_string, "Timestamp")
new_data = json.loads(new_json_string)

self.assertEqual(new_data[0]['Timestamp'], '2024-04-20')
self.assertEqual(new_data[1]['Timestamp'], '2024-04-10')

if __name__ == '__main__':
unittest.main()
Loading