Skip to content

Commit 99900e1

Browse files
committed
Add ExcelConverter class to parse and convert Excel files to JSON
1 parent f0b55c4 commit 99900e1

File tree

1 file changed

+38
-0
lines changed

1 file changed

+38
-0
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,39 @@
11
# Excel parser module
2+
import pandas as pd
3+
import json
4+
import os
5+
6+
class ExcelConverter:
7+
def __init__(self, file_path):
8+
self.file_path = file_path
9+
self.data_frame = pd.read_excel(file_path, engine='openpyxl')
10+
11+
def to_json(self, chunk_size=5):
12+
json_list = []
13+
for index, row in self.data_frame.iterrows():
14+
row_dict = row.to_dict()
15+
for key, value in row_dict.items():
16+
if isinstance(value, pd.Timestamp):
17+
row_dict[key] = value.strftime('%Y-%m-%d %H:%M:%S')
18+
json_list.append(row_dict)
19+
20+
chunked_list = [json_list[i:i + chunk_size] for i in range(0, len(json_list), chunk_size)]
21+
json_string_list = [json.dumps(chunk, ensure_ascii=False) for chunk in chunked_list]
22+
return json_string_list
23+
24+
# TODO: Implement to_yaml method
25+
# def to_yaml(self):
26+
# data = self.data_frame.to_dict(orient='records')
27+
# return yaml.dump(data, allow_unicode=True)
28+
29+
# TODO: Implement to_xml method
30+
# def to_xml(self):
31+
# root = ET.Element("root")
32+
# for _, row in self.data_frame.iterrows():
33+
# item = ET.SubElement(root, "item")
34+
# for key, value in row.items():
35+
# if isinstance(value, pd.Timestamp):
36+
# value = value.strftime('%Y-%m-%d %H:%M:%S')
37+
# child = ET.SubElement(item, key)
38+
# child.text = str(value)
39+
# return ET.tostring(root, encoding='unicode')

0 commit comments

Comments
 (0)