Initial Commit

jameno · Oct 5, 2019 · f9b9da0 · f9b9da0
1 parent 156354b
commit f9b9da0
Show file tree

Hide file tree

Showing 4 changed files with 149 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1 +1,46 @@
-Simple-Apple-Health-XML-to-CSV
+# Simple Apple Health XML to CSV
+
+A simple script to convert Apple Health's export.xml file to an easy to use csv.
+
+
+
+## How to Run
+
+1. ####Verify you have pandas installed on your machine or environment
+
+`python -c "import pandas"` should return blank from the command line
+
+If you get a _**ModuleNotFoundError: No module named 'pandas'**_ error, install pandas:
+
+`pip install pandas`
+
+
+
+2. ####Export your Apple Health Data
+
+   Go to your health home screen and click on the profile icon
+
+<img style="float: left;" src="health_home.PNG" width=300>
+
+On the next page, click the "Export Health Data" button
+
+<img style="float: left;" src="export_data_button.jpg" width = 300 >
+
+Your data will be prepared, and then you can transfer the export.zip file to your machine.
+
+3. Unzip the file, which should contain:
+
+   * apple_health_export
+     * export.xml
+     * export_cda.xml
+
+4. Place the **apple_health_xml_convert.py** into the folder alongside the files
+
+5. Run the script with `python apple_health_xml_convert.py`
+
+
+
+The export will be written with the format:
+
+* **apple_health_export_YYYY-MM-DD.csv**
+
diff --git a/apple_health_xml_convert.py b/apple_health_xml_convert.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Simple Apple Health XML to CSV
+==============================
+:File: convert.py
+:Description: Convert Apple Health "export.xml" file into a csv
+:Version: 0.0.1
+:Created: 2019-10-04
+:Authors: Jason Meno (jam)
+:Dependencies: An export.xml file from Apple Health
+:License: BSD-2-Clause
+"""
+
+# %% Imports
+import pandas as pd
+import xml.etree.ElementTree as ET
+import datetime as dt
+
+
+# %% Function Definitions
+def pre_process():
+    """Pre-processes the XML file by replacing specific bits that would
+    normally result in a ParseError
+    """
+
+    print("Pre-processing...", end="")
+    with open("export.xml") as f:
+        newText = f.read().replace("\x0b", "")
+
+    # with open("apple_health_export_2/new_export.xml", "w") as f:
+    with open("processed_export.xml", "w") as f:
+        f.write(newText)
+
+    print("done!")
+
+    return
+
+
+def convert_xml():
+    """Loops through the element tree, retrieving all objects, and then
+    combining them together into a dataframe
+    """
+
+    print("Converting XML File...", end="")
+    etree = ET.parse("processed_export.xml")
+
+    attribute_list = []
+
+    for child in etree.getroot():
+        attribute_list.append(child.attrib)
+
+    health_df = pd.DataFrame(attribute_list)
+
+    # Every health data type and some columns have a long identifer
+    # Removing these for readability
+    health_df.type = health_df.type.str.replace('HKQuantityTypeIdentifier', "")
+    health_df.type = health_df.type.str.replace('HKCategoryTypeIdentifier', "")
+    health_df.columns = \
+        health_df.columns.str.replace("HKCharacteristicTypeIdentifier", "")
+
+    # Reorder some of the columns for easier visual data review
+    original_cols = list(health_df)
+    shifted_cols = ['type',
+                    'sourceName',
+                    'value',
+                    'unit',
+                    'startDate',
+                    'endDate',
+                    'creationDate']
+
+    remaining_cols = list(set(original_cols) - set(shifted_cols))
+    reordered_cols = shifted_cols + remaining_cols
+    health_df = health_df.reindex(labels=reordered_cols, axis='columns')
+
+    # Sort by newest data first
+    health_df.sort_values(by='creationDate', ascending=False, inplace=True)
+
+    print("done!")
+
+    return health_df
+
+
+def save_to_csv(health_df):
+    print("Saving CSV file...", end="")
+    today = dt.datetime.now().strftime('%Y-%m-%d')
+    health_df.to_csv("apple_health_export_" + today + ".csv", index=False)
+    print("done!")
+
+    return
+
+
+def main():
+    pre_process()
+    health_df = convert_xml()
+    save_to_csv(health_df)
+
+    return
+
+
+# %%
+if __name__ == '__main__':
+    main()
diff --git a/export_data_button.jpg b/export_data_button.jpg
diff --git a/health_home.PNG b/health_home.PNG