Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/add file processing tests #3

Merged
merged 3 commits into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions ai_commons/file_processing/excel_parser.py
Original file line number Diff line number Diff line change
@@ -1 +1,39 @@
# Excel parser module
import pandas as pd
import json
import os

class ExcelConverter:
def __init__(self, file_path):
self.file_path = file_path
self.data_frame = pd.read_excel(file_path, engine='openpyxl')

def to_json(self, chunk_size=5):
json_list = []
for index, row in self.data_frame.iterrows():
row_dict = row.to_dict()
for key, value in row_dict.items():
if isinstance(value, pd.Timestamp):
row_dict[key] = value.strftime('%Y-%m-%d %H:%M:%S')
json_list.append(row_dict)

chunked_list = [json_list[i:i + chunk_size] for i in range(0, len(json_list), chunk_size)]
json_string_list = [json.dumps(chunk, ensure_ascii=False) for chunk in chunked_list]
return json_string_list

# TODO: Implement to_yaml method
# def to_yaml(self):
# data = self.data_frame.to_dict(orient='records')
# return yaml.dump(data, allow_unicode=True)

# TODO: Implement to_xml method
# def to_xml(self):
# root = ET.Element("root")
# for _, row in self.data_frame.iterrows():
# item = ET.SubElement(root, "item")
# for key, value in row.items():
# if isinstance(value, pd.Timestamp):
# value = value.strftime('%Y-%m-%d %H:%M:%S')
# child = ET.SubElement(item, key)
# child.text = str(value)
# return ET.tostring(root, encoding='unicode')
18 changes: 17 additions & 1 deletion ai_commons/file_processing/json_validator.py
Original file line number Diff line number Diff line change
@@ -1 +1,17 @@
# JSON validator module
# Json validator module

import json

def remove_keys_from_json(json_string, keys_to_remove):
# 解析JSON字符串为列表
data_list = json.loads(json_string)

# 遍历列表中的每个字典
for data in data_list:
for key_list in keys_to_remove:
for key in key_list:
if key in data:
del data[key]

# 将列表转换回JSON字符串
return json.dumps(data_list, ensure_ascii=False)
44 changes: 43 additions & 1 deletion tests/test_file_processing.py
Original file line number Diff line number Diff line change
@@ -1 +1,43 @@
# Test file processing
import unittest
import json
import os
import pandas as pd
from ai_commons.file_processing.excel_parser import ExcelConverter
from ai_commons.file_processing.json_validator import remove_keys_from_json

class TestExcelConverter(unittest.TestCase):

def setUp(self):
# 创建一个示例Excel文件
self.test_file = 'test.xlsx'
df = pd.DataFrame({
'Timestamp': [pd.Timestamp('2024-04-07 20:50:30'), pd.Timestamp('2024-05-08 15:30:45')],
'User ID': ['U12345', 'U67890'],
'Gift': ['Flower', 'Chocolate']
})
df.to_excel(self.test_file, index=False)

def tearDown(self):
# 删除示例Excel文件
if os.path.exists(self.test_file):
os.remove(self.test_file)

def test_to_json(self):
converter = ExcelConverter(self.test_file)
json_output = converter.to_json(chunk_size=2)
expected_output = [
'[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]'
]
self.assertEqual(json_output, expected_output)

class TestJsonValidator(unittest.TestCase):

def test_remove_keys_from_json(self):
json_string = '[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]'
keys_to_remove = [["Timestamp"], ["User ID"]]
modified_json_string = remove_keys_from_json(json_string, keys_to_remove)
expected_output = '[{"Gift": "Flower"}, {"Gift": "Chocolate"}]'
self.assertEqual(modified_json_string, expected_output)

if __name__ == '__main__':
unittest.main()
Loading