upload utility usage files and import them live

ericoc · ericoc · commit c901ba6d2311 · 2025-04-19T18:45:39.000-04:00
diff --git a/apps/core/forms.py b/apps/core/forms.py
@@ -0,0 +1,193 @@
+from csv import DictReader
+from datetime import date, datetime, time
+from django.conf import settings
+from django.core.validators import FileExtensionValidator
+from django.forms import Form, FileField, ValidationError
+from django.utils import timezone
+from openpyxl import load_workbook
+from pathlib import Path
+from warnings import catch_warnings, filterwarnings
+
+
+class UploadUsageDataForm(Form):
+    """
+    Form to handle upload of utility usage data file.
+    """
+    file = FileField(
+        validators=(
+            FileExtensionValidator(
+                allowed_extensions=settings.USAGE_FILE_SUFFIXES
+            ),
+        )
+    )
+
+    def clean_file(self):
+        """
+        Validate various uploaded utility usage data files.
+        """
+        file = self.cleaned_data.get("file")
+        if not file:
+            raise ValidationError("Missing file!")
+
+        encoding = "utf-8"
+        file_path = Path(settings.MEDIA_ROOT, file.name)
+        usage = []
+        utility = None
+
+        _valid_units = {
+            "electric": ("TYPE", "Electric usage", "kWh"),
+            "water": (" Units", " Gallons"),
+        }
+
+        # Get uploaded file name suffix, and validate MIME type.
+        suffix = file_path.suffix.lower()
+
+        # Write uploaded file locally.
+        with open(file_path, mode="wb+") as fh_w:
+            for chunk in file.chunks():
+                fh_w.write(chunk)
+
+        """
+        Handle the uploaded file (which was just written) uniquely per utility.
+        """
+
+        """Electric or Water usage: comma-separated values (.csv)"""
+        if suffix == ".csv":
+
+            # Different encoding for electric usage CSV file.
+            if file.name == settings.WATER_FILENAME:
+                utility = "water"
+
+            # Different encoding for electric usage CSV file.
+            if file.name.startswith(settings.ELECTRIC_PREFIX):
+                encoding += "-sig"
+                utility = "electric"
+
+            # Open CSV file that we just wrote, with proper encoding.
+            with open(file_path, mode="r", encoding=encoding) as read_fh:
+                csv_lines = read_fh.readlines()
+                to_read = csv_lines
+
+                if utility == "electric":
+
+                    # Ensure electric usage (7th row, 5th column) is "kWh".
+                    uf = csv_lines[6].split(",")[4]
+                    unit_found = uf.split("(")[1].split(")")[0]
+                    assert unit_found ==  _valid_units[utility][2], (
+                        f"Invalid {utility} unit column!"
+                    )
+
+                    # Skip header of electric usage file.
+                    to_read = csv_lines[6:]
+
+                # Get valid unit for electric or water usage CSV rows.
+                utility_unit = _valid_units[utility][1]
+
+                # Read CSV of electric or water usage data, iterating rows.
+                reader = DictReader(to_read)
+                for row in reader:
+
+                    # Electric or water CSV file rows must be valid units.
+                    row_unit = row[_valid_units[utility][0]]
+                    assert row_unit == utility_unit, (
+                        f"Invalid {utility} unit ({row_unit})!"
+                    )
+
+                    """
+                    Parse electric usage row.
+                    """
+                    if utility == "electric":
+
+                        # Parse date/time columns for each electric usage row.
+                        time_pcs = row["START TIME"].split(':')
+
+                        # Map hour to electricity usage in floating point kWh.
+                        usage.append({
+                            "hour": timezone.make_aware(
+                                value=datetime.combine(
+                                    date=date.fromisoformat(row["DATE"]),
+                                    time=time(
+                                        hour=int(time_pcs[0]),
+                                        minute=int(time_pcs[1])
+                                    )
+                                )
+                            ),
+                            "kwh": float(row["USAGE (kWh)"])
+                        })
+
+                    """
+                    Parse water usage row.
+                    """
+                    if utility == "water":
+
+                        # Parse date column for each water usage row.
+                        date_pcs = row[" Time Interval"].strip().split('/')
+                        date_iso = f"{date_pcs[2]}-{date_pcs[0]}-{date_pcs[1]}"
+
+                        # Map each day to water usage floating point gallons.
+                        usage.append({
+                            "day": date.fromisoformat(date_iso),
+                            "gallons": float(row[" Consumption"].strip())
+                        })
+
+        """Natural Gas usage: Microsoft Excel (.xlsx)"""
+        xlsx_prefix = settings.NATURAL_GAS_PREFIX
+        if suffix == ".xlsx" and file.name.startswith(xlsx_prefix):
+            utility = "natural_gas"
+
+            # Filter warnings about spreadsheet style.
+            with catch_warnings():
+                filterwarnings(
+                    action="ignore",
+                    category=UserWarning,
+                    module="openpyxl.styles.stylesheet"
+                )
+
+                # Load/read the spreadsheet workbook.
+                xlsx_wb = load_workbook(filename=file_path, read_only=True)
+                book_obj = xlsx_wb.active
+                sheet_obj = book_obj
+                sheet_title = sheet_obj.title or ""
+
+            # Confirm worksheet title.
+            assert sheet_title == xlsx_prefix, (
+                f"Invalid worksheet ({sheet_title})!"
+            )
+
+            # Gather all worksheet rows into a list.
+            rows = list(sheet_obj.iter_rows())
+
+            # Ensure natural gas unit is "CCF" (5th row, 2nd column).
+            unit_found = rows[4][1].value.split('(')[1].split(')')[0]
+            assert unit_found == "CCF", f"Invalid {utility} unit!"
+
+            # Skip header rows to parse columns of natural gas usage data.
+            #   Bill Month, Units Consumed (CCF), Period Start, Period End
+            for row in rows[5:]:
+                """
+                Parse natural gas usage row.
+                """
+                row_month, row_ccf, row_start, row_end = row
+
+                # Parse "Bill Month" column from each natural gas usage row.
+                #    Example: "Mar, 2025"
+                row_dt = datetime.strptime(
+                    row_month.value, "%b, %Y"
+                ).date()
+
+                # Map each month to natural gas usage in CCF as floating point.
+                usage.append({
+                    "month": row_dt,
+                    "ccf": float(row_ccf.value)
+                })
+
+        if usage:
+            self.cleaned_data["usage"] = usage
+
+        if utility:
+            self.cleaned_data["utility"] = utility
+
+        # Delete the local copy of the uploaded usage data file.
+        if file_path and file_path.is_file() and file_path.exists():
+            if file_path.unlink():
+                return file
diff --git a/apps/core/forms/__init__.py b/apps/core/forms/__init__.py
diff --git a/apps/core/forms/upload.py b/apps/core/forms/upload.py
diff --git a/apps/core/templates/add.html b/apps/core/templates/add.html
@@ -1,12 +1,41 @@
 {% extends 'base.html' %}
 {% block main %}
-    <div class="border m-3 p-3 rounded">
-        <form action="{% url 'add' %}" method="post">
-            {{ form.as_p }}
+    <div id="add">
+    {% if form.errors %}
+        <div class="alert alert-danger border list-group m-3 p-3 errors rounded">
+        {% for error in form.errors.values %}
+            <div class="list-group-item list-group-item-danger">{{ error }}</div>
+        {% endfor %}
+        </div>
+    {% endif %}
+    {% if messages %}
+        <div class="alert border list-group m-3 p-3 messages rounded">
+        {% for message in messages %}
+            <div class="list-group-item list-group-item-{% if message.tags %}{{ message.tags }}{% else %}success{% endif %}">
+                {{ message }}
+            </div>
+        {% endfor %}
+        </div>
+    {% endif %}
+    {% if form %}
+        <form action="{% url 'add' %}" class="border m-3 p-3 rounded" enctype="multipart/form-data" method="post">
+            <h3 title="{{ title }}">
+                {{ title }}
+            </h3>
+            <p class="form-text">
+                Please upload a comma-separated values (<span class="font-monospace">csv</span> - electric or water)
+                file, or Microsoft Excel (<span class="font-monospace">xlsx</span> - natural gas) spreadsheet
+                to add new data.
+            </p>
+            <p>
+                {{ form.file }}
+            </p>
+            {% csrf_token %}
             <p>
-                <input class="btn btn-outline-primary" type="submit" value="Upload!">
+                <button class="btn btn-outline-primary" type="submit">{{ title }}</button>
             </p>
         </form>
+    {% endif %}
     </div>
 {% endblock main %}
 {% block scripts %}{% endblock scripts %}
diff --git a/apps/core/views/add.py b/apps/core/views/add.py
@@ -1,13 +1,98 @@
 from django.contrib.auth.mixins import LoginRequiredMixin
+from django.contrib import messages
+from django.forms import ValidationError
+from django.utils.html import format_html
+from django.utils.translation import ngettext
 from django.views.generic.edit import FormView
 
-from ..forms.upload import UploadUsageDataForm
+from apps.electric.models import ElectricUsage
+from apps.natural_gas.models import NaturalGasUsage
+from apps.water.models import WaterUsage
+
 from .base import BaseView
+from ..forms import UploadUsageDataForm
+
 
 class AddView(LoginRequiredMixin, BaseView, FormView):
     """Add data via upload form view."""
     color = "var(--bs-success)"
     form_class = UploadUsageDataForm
+    http_method_names = ("get", "post")
+    model = None
     success_url = "/add/"
     template_name = "add.html"
     title = "Add"
+
+    def form_valid(self, form):
+
+        model = None
+        num_created = 0
+        utility = form.cleaned_data.get("utility")
+        if not utility:
+            raise ValidationError("Missing utility!")
+
+        # Choose model uniquely per utility.
+        if utility == "electric":
+            model = ElectricUsage
+        if utility == "natural_gas":
+            model = NaturalGasUsage
+        if utility == "water":
+            model = WaterUsage
+
+        # Ensure that a model is chosen.
+        if not model:
+            raise ValidationError(f"Missing model (for {utility})!")
+
+        # Ensure that data was found from the uploaded utility usage file.
+        usage = form.cleaned_data.get("usage")
+        if not usage:
+            raise ValidationError(f"Missing usage (for {utility}!")
+
+        # Count the number of usage records found in the uploaded file.
+        num_found = len(usage)
+        utility_title = utility.replace("_", " ")
+
+        # Message the utility that was detected based upon the file.
+        messages.add_message(
+            request=self.request,
+            level=messages.INFO,
+            message=format_html(f"<b>Utility</b>: {utility_title.title()}")
+        )
+
+        # Message the count of usage events that were found in the file.
+        found_msg = ngettext(
+            singular=f"%d {model._meta.verbose_name.title()}",
+            plural=f"%d {model._meta.verbose_name_plural.title()}",
+            number=num_found,
+        ) % num_found
+        messages.add_message(
+            request=self.request,
+            level=messages.INFO,
+            message=format_html(f"<b>Found</b>: {found_msg}")
+        )
+
+        # Add each usage item to database.
+        for usage_item in usage:
+            obj, created = model.objects.update_or_create(
+                **usage_item,
+                defaults=usage_item
+            )
+            if created:
+                num_created += 1
+
+        # Message count of usage events that were created from uploaded file.
+        create_level = messages.INFO
+        if num_created > 0:
+            create_level = messages.SUCCESS
+        create_msg = ngettext(
+            singular=f"%d {model._meta.verbose_name.title()}",
+            plural=f"%d {model._meta.verbose_name_plural.title()}",
+            number=num_created,
+        ) % num_created
+        messages.add_message(
+            request=self.request,
+            level=create_level,
+            message=format_html(f"<b>Created</b>: {create_msg}")
+        )
+
+        return super().form_valid(form)
diff --git a/settings.example.py b/settings.example.py
@@ -116,6 +116,8 @@
 NATURAL_GAS_PREFIX = "UsageData"
 WATER_FILENAME = "ChartData.csv"
 
+USAGE_FILE_SUFFIXES = ("csv", "xlsx")
+
 WEBSITE_TITLE = "Utilities"
 
 JAZZMIN_SETTINGS = {