|
1 | 1 | # Excel parser module
|
| 2 | +import pandas as pd |
| 3 | +import json |
| 4 | +import os |
| 5 | + |
| 6 | +class ExcelConverter: |
| 7 | + def __init__(self, file_path): |
| 8 | + self.file_path = file_path |
| 9 | + self.data_frame = pd.read_excel(file_path, engine='openpyxl') |
| 10 | + |
| 11 | + def to_json(self, chunk_size=5): |
| 12 | + json_list = [] |
| 13 | + for index, row in self.data_frame.iterrows(): |
| 14 | + row_dict = row.to_dict() |
| 15 | + for key, value in row_dict.items(): |
| 16 | + if isinstance(value, pd.Timestamp): |
| 17 | + row_dict[key] = value.strftime('%Y-%m-%d %H:%M:%S') |
| 18 | + json_list.append(row_dict) |
| 19 | + |
| 20 | + chunked_list = [json_list[i:i + chunk_size] for i in range(0, len(json_list), chunk_size)] |
| 21 | + json_string_list = [json.dumps(chunk, ensure_ascii=False) for chunk in chunked_list] |
| 22 | + return json_string_list |
| 23 | + |
| 24 | + # TODO: Implement to_yaml method |
| 25 | + # def to_yaml(self): |
| 26 | + # data = self.data_frame.to_dict(orient='records') |
| 27 | + # return yaml.dump(data, allow_unicode=True) |
| 28 | + |
| 29 | + # TODO: Implement to_xml method |
| 30 | + # def to_xml(self): |
| 31 | + # root = ET.Element("root") |
| 32 | + # for _, row in self.data_frame.iterrows(): |
| 33 | + # item = ET.SubElement(root, "item") |
| 34 | + # for key, value in row.items(): |
| 35 | + # if isinstance(value, pd.Timestamp): |
| 36 | + # value = value.strftime('%Y-%m-%d %H:%M:%S') |
| 37 | + # child = ET.SubElement(item, key) |
| 38 | + # child.text = str(value) |
| 39 | + # return ET.tostring(root, encoding='unicode') |
0 commit comments