Merge pull request #3 from ling6614/feature/add-file-processing-tests

Feature/add file processing tests
telepace · Jun 8, 2024 · ce5ec74 · ce5ec74
2 parents 81b6400 + c0d4047
commit ce5ec74
Show file tree

Hide file tree

Showing 3 changed files with 98 additions and 2 deletions.
diff --git a/ai_commons/file_processing/excel_parser.py b/ai_commons/file_processing/excel_parser.py
@@ -1 +1,39 @@
 # Excel parser module
+import pandas as pd
+import json
+import os
+
+class ExcelConverter:
+    def __init__(self, file_path):
+        self.file_path = file_path
+        self.data_frame = pd.read_excel(file_path, engine='openpyxl')
+
+    def to_json(self, chunk_size=5):
+        json_list = []
+        for index, row in self.data_frame.iterrows():
+            row_dict = row.to_dict()
+            for key, value in row_dict.items():
+                if isinstance(value, pd.Timestamp):
+                    row_dict[key] = value.strftime('%Y-%m-%d %H:%M:%S')
+            json_list.append(row_dict)
+
+        chunked_list = [json_list[i:i + chunk_size] for i in range(0, len(json_list), chunk_size)]
+        json_string_list = [json.dumps(chunk, ensure_ascii=False) for chunk in chunked_list]
+        return json_string_list
+
+    # TODO: Implement to_yaml method
+    # def to_yaml(self):
+    #     data = self.data_frame.to_dict(orient='records')
+    #     return yaml.dump(data, allow_unicode=True)
+
+    # TODO: Implement to_xml method
+    # def to_xml(self):
+    #     root = ET.Element("root")
+    #     for _, row in self.data_frame.iterrows():
+    #         item = ET.SubElement(root, "item")
+    #         for key, value in row.items():
+    #             if isinstance(value, pd.Timestamp):
+    #                 value = value.strftime('%Y-%m-%d %H:%M:%S')
+    #             child = ET.SubElement(item, key)
+    #             child.text = str(value)
+    #     return ET.tostring(root, encoding='unicode')
diff --git a/ai_commons/file_processing/json_validator.py b/ai_commons/file_processing/json_validator.py
@@ -1 +1,17 @@
-# JSON validator module
+# Json validator module
+
+import json
+
+def remove_keys_from_json(json_string, keys_to_remove):
+    # 解析JSON字符串为列表
+    data_list = json.loads(json_string)
+
+    # 遍历列表中的每个字典
+    for data in data_list:
+        for key_list in keys_to_remove:
+            for key in key_list:
+                if key in data:
+                    del data[key]
+
+    # 将列表转换回JSON字符串
+    return json.dumps(data_list, ensure_ascii=False)
diff --git a/tests/test_file_processing.py b/tests/test_file_processing.py
@@ -1 +1,43 @@
-# Test file processing
+import unittest
+import json
+import os
+import pandas as pd
+from ai_commons.file_processing.excel_parser import ExcelConverter
+from ai_commons.file_processing.json_validator import remove_keys_from_json
+
+class TestExcelConverter(unittest.TestCase):
+
+    def setUp(self):
+        # 创建一个示例Excel文件
+        self.test_file = 'test.xlsx'
+        df = pd.DataFrame({
+            'Timestamp': [pd.Timestamp('2024-04-07 20:50:30'), pd.Timestamp('2024-05-08 15:30:45')],
+            'User ID': ['U12345', 'U67890'],
+            'Gift': ['Flower', 'Chocolate']
+        })
+        df.to_excel(self.test_file, index=False)
+
+    def tearDown(self):
+        # 删除示例Excel文件
+        if os.path.exists(self.test_file):
+            os.remove(self.test_file)
+
+    def test_to_json(self):
+        converter = ExcelConverter(self.test_file)
+        json_output = converter.to_json(chunk_size=2)
+        expected_output = [
+            '[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]'
+        ]
+        self.assertEqual(json_output, expected_output)
+
+class TestJsonValidator(unittest.TestCase):
+
+    def test_remove_keys_from_json(self):
+        json_string = '[{"Timestamp": "2024-04-07 20:50:30", "User ID": "U12345", "Gift": "Flower"}, {"Timestamp": "2024-05-08 15:30:45", "User ID": "U67890", "Gift": "Chocolate"}]'
+        keys_to_remove = [["Timestamp"], ["User ID"]]
+        modified_json_string = remove_keys_from_json(json_string, keys_to_remove)
+        expected_output = '[{"Gift": "Flower"}, {"Gift": "Chocolate"}]'
+        self.assertEqual(modified_json_string, expected_output)
+
+if __name__ == '__main__':
+    unittest.main()