-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from ling6614/feature/add-file-processing-tests
Feature/add file processing tests
- Loading branch information
Showing
3 changed files
with
98 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,39 @@ | ||
# Excel parser module | ||
import pandas as pd | ||
import json | ||
import os | ||
|
||
class ExcelConverter: | ||
def __init__(self, file_path): | ||
self.file_path = file_path | ||
self.data_frame = pd.read_excel(file_path, engine='openpyxl') | ||
|
||
def to_json(self, chunk_size=5): | ||
json_list = [] | ||
for index, row in self.data_frame.iterrows(): | ||
row_dict = row.to_dict() | ||
for key, value in row_dict.items(): | ||
if isinstance(value, pd.Timestamp): | ||
row_dict[key] = value.strftime('%Y-%m-%d %H:%M:%S') | ||
json_list.append(row_dict) | ||
|
||
chunked_list = [json_list[i:i + chunk_size] for i in range(0, len(json_list), chunk_size)] | ||
json_string_list = [json.dumps(chunk, ensure_ascii=False) for chunk in chunked_list] | ||
return json_string_list | ||
|
||
# TODO: Implement to_yaml method | ||
# def to_yaml(self): | ||
# data = self.data_frame.to_dict(orient='records') | ||
# return yaml.dump(data, allow_unicode=True) | ||
|
||
# TODO: Implement to_xml method | ||
# def to_xml(self): | ||
# root = ET.Element("root") | ||
# for _, row in self.data_frame.iterrows(): | ||
# item = ET.SubElement(root, "item") | ||
# for key, value in row.items(): | ||
# if isinstance(value, pd.Timestamp): | ||
# value = value.strftime('%Y-%m-%d %H:%M:%S') | ||
# child = ET.SubElement(item, key) | ||
# child.text = str(value) | ||
# return ET.tostring(root, encoding='unicode') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,17 @@ | ||
# JSON validator module | ||
# Json validator module | ||
|
||
import json | ||
|
||
def remove_keys_from_json(json_string, keys_to_remove): | ||
# 解析JSON字符串为列表 | ||
data_list = json.loads(json_string) | ||
|
||
# 遍历列表中的每个字典 | ||
for data in data_list: | ||
for key_list in keys_to_remove: | ||
for key in key_list: | ||
if key in data: | ||
del data[key] | ||
|
||
# 将列表转换回JSON字符串 | ||
return json.dumps(data_list, ensure_ascii=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,43 @@ | ||
# Test file processing | ||
import unittest | ||
import json | ||
import os | ||
import pandas as pd | ||
from ai_commons.file_processing.excel_parser import ExcelConverter | ||
from ai_commons.file_processing.json_validator import remove_keys_from_json | ||
|
||
class TestExcelConverter(unittest.TestCase): | ||
|
||
def setUp(self): | ||
# 创建一个示例Excel文件 | ||
self.test_file = 'test.xlsx' | ||
df = pd.DataFrame({ | ||
'Timestamp': [pd.Timestamp('2024-04-07 20:50:30'), pd.Timestamp('2024-05-08 15:30:45')], | ||
'User ID': ['U12345', 'U67890'], | ||
'Gift': ['Flower', 'Chocolate'] | ||
}) | ||
df.to_excel(self.test_file, index=False) | ||
|
||
def tearDown(self): | ||
# 删除示例Excel文件 | ||
if os.path.exists(self.test_file): | ||
os.remove(self.test_file) | ||
|
||
def test_to_json(self): | ||
converter = ExcelConverter(self.test_file) | ||
json_output = converter.to_json(chunk_size=2) | ||
expected_output = [ | ||
'[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]' | ||
] | ||
self.assertEqual(json_output, expected_output) | ||
|
||
class TestJsonValidator(unittest.TestCase): | ||
|
||
def test_remove_keys_from_json(self): | ||
json_string = '[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]' | ||
keys_to_remove = [["Timestamp"], ["User ID"]] | ||
modified_json_string = remove_keys_from_json(json_string, keys_to_remove) | ||
expected_output = '[{"Gift": "Flower"}, {"Gift": "Chocolate"}]' | ||
self.assertEqual(modified_json_string, expected_output) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |