Issue #6 - lots of todos

M3Works · Nov 6, 2024 · e36259b · e36259b
1 parent 56c96b6
commit e36259b
Show file tree

Hide file tree

Showing 4 changed files with 80 additions and 8 deletions.
diff --git a/insitupy/campaigns/campaign.py b/insitupy/campaigns/campaign.py
@@ -10,6 +10,7 @@
 import pandas as pd
 
 from insitupy.io.metadata import MetaDataParser, ProfileMetaData
+from insitupy.profiles.base import ProfileData
 from insitupy.variables import (
     BasePrimaryVariables, BaseMetadataVariables, MeasurementDescription,
     ExtendableVariables
@@ -78,10 +79,11 @@ class ProfileDataCollection:
     """
     This could be a collection of pits, profiles, etc
     """
+    META_PARSER = MetaDataParser
+    PROFILE_DATA_CLASS = ProfileData
 
-    def __init__(self, df):
-        self._df = df
-        pass
+    def __init__(self, profiles: List[ProfileData]):
+        self._profiles = profiles
 
     @property
     def SWE(self):
@@ -125,7 +127,38 @@ def points_from_geometry(
         pass
 
     @classmethod
-    def from_files(cls):
+    def _read(cls, fname, columns, header_pos, metadata):
+        # retrun list of ProfileData
+        # TODO: read in the df
+        # TODO: split into invididual datasets
+        # TODO: return a list of profile data from those datasets
+        # Iterate columns
+        # TODO: rename to standard names for multi sample measurements
+        # TODO: share reading logic with ProfileData
+        # cls.PROFILE_DATA_CLASS.some_shared_reading_logic
+        result = [
+            ProfileData(
+                df, metadata, variable,  # variable is a MeasurementDescription
+                original_file=fname
+            )
+        ]
+        return None
+
+    @classmethod
+    def from_csv(cls, fname):
+        # TODO: timezone here (mapped from site?)
         # parse mlutiple files and create an iterable of ProfileData
-        pass
+        # TODO: if this is multisample or multi variables,
+        #   we should split into n dataframes contained in n objects
+        #   (n being sample or variables). This means that we could return
+        #   multiple SnowExProfileData instantiated classes for on read
+        meta_parser = cls.META_PARSER(fname, "US/Mountain")
+        # Parse the metadata and column info
+        metadata, columns, header_pos = meta_parser.parse()
+        # read in the actual data
+        profiles = cls._read(fname, columns, header_pos, metadata)
+
+        # TODO: return a list of classes always
+
+        return cls(profiles, metadata)
 
diff --git a/insitupy/campaigns/snowex/snowex_campaign.py b/insitupy/campaigns/snowex/snowex_campaign.py
@@ -1,3 +1,5 @@
+import logging
+
 import pandas as pd
 from pathlib import Path
 
@@ -7,16 +9,39 @@
 from insitupy.profiles.base import ProfileData, standardize_depth
 
 
+LOG = logging.getLogger(__name__)
+
+
 class SnowExMetadataParser(MetaDataParser):
     METADATA_VARIABLE_CLASS = SnowExMetadataVariables
     PRIMARY_VARIABLES_CLASS = SnowExPrimaryVariables
 
 
+# class SingleProfile():
+#     def __init__(self, not sure):
+#
+#     @classmethod
+#     def from_file(cls, fname, varname=None):
+#         # get columns and headers
+#         header, columns = find_header_info()
+#
+#         if varname is None:
+#             primary = columns[0]
+#         else:
+#             for c in columns:
+#                 if c == varname:
+#                     prime = c
+#                     break
+#         return cls(primary, ....)
+
+
 class SnowExProfileData(ProfileData):
     META_PARSER = SnowExMetadataParser
 
     @classmethod
-    def from_file(cls, fname, variable: MeasurementDescription):
+    def from_csv(
+        cls, fname, variable: MeasurementDescription
+    ):
         # TODO: timezone here (mapped from site?)
         meta_parser = cls.META_PARSER(fname, "US/Mountain")
         # Parse the metadata and column info
@@ -26,6 +51,12 @@ def from_file(cls, fname, variable: MeasurementDescription):
 
         return cls(data, metadata, variable)
 
+    @classmethod
+    def from_dataframe(cls, df, metadata):
+        # Instantiate from a read in file
+        pass
+
+
     @staticmethod
     def _read(profile_filename, columns, header_position):
         """
@@ -80,4 +111,8 @@ def _read(profile_filename, columns, header_position):
                 f'File contains a profile with'
                 f' with {len(df)} layers across {delta:0.2f} cm'
             )
+
+        # TODO: if this is multisample or multivariable, we are just returning
+        #   the requested variable OR the first variable
+
         return df
diff --git a/insitupy/io/metadata.py b/insitupy/io/metadata.py
@@ -366,7 +366,7 @@ def _parse_columns(self, str_line):
         standard_cols = [StringManager.standardize_key(c) for c in raw_cols]
         final_cols = []
         for c in standard_cols:
-            mapped_col, col_map = self.VARIABLES_CLASS.from_mapping(c)
+            mapped_col, col_map = self.PRIMARY_VARIABLES_CLASS.from_mapping(c)
             final_cols.append(mapped_col)
 
         return final_cols
@@ -404,6 +404,7 @@ def find_header_info(self, filename=None):
         # Find the column names and where it is in the file
         else:
             header_pos, header_indicator = self._find_header_position(lines)
+            # TODO: identify columns, map columns,
             columns = self._parse_columns(lines[header_pos])
             LOG.debug(
                 f'Column Data found to be {len(columns)} columns based on'

diff --git a/insitupy/profiles/base.py b/insitupy/profiles/base.py
@@ -21,6 +21,7 @@ class ProfileData:
 
     def __init__(
         self, input_df, metadata: ProfileMetaData, variable: MeasurementDescription,
+        original_file=None
     ):
         """
         Take df of layered data (SMP, pit, etc)
@@ -30,6 +31,7 @@ def __init__(
                 Should include sample or sample_a, sample_b, etc
 
         """
+        self._original_file = None
         self._depth_layer = self.META_PARSER.PRIMARY_VARIABLES_CLASS.VARIABLES.DEPTH
         self._lower_depth_layer = self.META_PARSER.PRIMARY_VARIABLES_CLASS.BOTTOM_DEPTH
         self._metadata = metadata
@@ -180,10 +182,11 @@ def get_profile(self, snow_datum="ground"):
         return df.loc[:, columns_of_interest]
 
     @classmethod
-    def from_file(self, fname, variable: ExtendableVariables):
+    def from_csv(self, fname, variable: ExtendableVariables):
         raise NotImplementedError("Not implemented")
 
 
+
 def standardize_depth(depths, desired_format='snow_height', is_smp=False):
     """
     Data that is a function of depth comes in 2 formats. Sometimes 0 is