Merge pull request #9 from k-kramer/feature/detect_column_ancillary

Feature/detect column ancillary
TUW-GEO · Jan 23, 2025 · cabada5 · cabada5
2 parents b6eb439 + 30d7441
commit cabada5
Show file tree

Hide file tree

Showing 8 changed files with 56 additions and 26 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -9,18 +9,19 @@ name: Automated Tests
 on:
   push:
   pull_request:
-  workflow_dispatch:
+  schedule:
+    - cron: "0 11 * * 1" # Every Monday at 11 UTC
 
 jobs:
   build:
     name: Build py${{ matrix.python-version }} @ ${{ matrix.os }} 🐍
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10']
+        python-version: ['3.9', '3.13']
         os: ["ubuntu-latest"]
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
         with:
           submodules: true
           fetch-depth: 0
@@ -67,7 +68,7 @@ jobs:
           python setup.py sdist --dist-dir .artifacts/dist
           ls .artifacts/dist
       - name: Upload Artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: Artifacts
           path: .artifacts/*
@@ -93,7 +94,7 @@ jobs:
           echo "GITHUB_REF = $GITHUB_REF"
           echo "GITHUB_REPOSITORY = $GITHUB_REPOSITORY"
       - name: Download Artifacts
-        uses: actions/download-artifact@v2
+        uses: actions/download-artifact@v4
       - name: Display downloaded files
         run: ls -aR
       - name: Upload to PyPI

diff --git a/docs/example.rst b/docs/example.rst
@@ -41,12 +41,12 @@ As Input a pandas.DataFrame of the following format is required:
 
     # initialize interface and run all flagging procedures
     flag = flagit.Interface(df)
-    result_df = flag.run(sat_point = 42.7)
+    result_df = flag.run(sat_point = 42.7) # Saturation Point in % vol
 
-    # alternatively: choose only specific procedures by providing a list or string as name:
+    # alternatively: choose only specific procedures by providing a list as name:
     flag = flagit.Interface(df)
     result_df = flag.run(name = ['D06', 'D07', 'D09'])
-    result_df = flag.run(name = 'C01')
+    result_df = flag.run(name = ['C01'])
 
 .. code:: python
 

diff --git a/environment.yml b/environment.yml
@@ -4,8 +4,9 @@ channels:
 dependencies:
   - numpy
   - scipy
-  - pandas
+  - pandas<3
   - pip
   - pip:
     - pytest-cov
     - pytest
+    - setuptools
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,2 @@
+[tool.pytest.ini_options]
+pythonpath = ["src"]
diff --git a/setup.cfg b/setup.cfg
@@ -26,7 +26,11 @@ package_dir =
 # DON'T CHANGE THE FOLLOWING LINE! IT WILL BE UPDATED BY PYSCAFFOLD!
 setup_requires = pyscaffold>=3.2a0,<3.3a0
 # Add here dependencies of your project (semicolon/line-separated), e.g.
-install_requires = numpy; scipy; pandas;
+install_requires =
+    setuptools
+    numpy
+    scipy
+    pandas
 # The usage of test_requires is discouraged, see `Dependency Management` docs
 # tests_require = pytest; pytest-cov
 # Require a specific Python version, e.g. Python 2.7 or >= 3.4

diff --git a/src/flagit/flagit.py b/src/flagit/flagit.py
@@ -31,6 +31,8 @@
 class FormatError(Exception):
     pass
 
+class VariableNotKnown(Exception):
+    pass
 
 t = Variables()
 
@@ -57,7 +59,7 @@ class provides interface to apply ISMN quality control procedures to in situ soi
     data : pandas.DataFrame
         Input for Interface Object containing in situ soil moisture measurements
     sat_point : float
-            Saturation Point for soil at the respective location.
+            Saturation Point in % vol for soil at the respective location.
             At ISMN the saturation point is calculated from Harmonized World Soil Database (HWSD) sand, clay and organic
             content for each station using Equations [2,3,5] from Saxton & Rawls (2006).
             (Saxton, K. E., & Rawls, W. J. (2006). Soil water characteristic estimates by texture and organic matter for
@@ -90,7 +92,7 @@ def __init__(self, data, sat_point=None, depth_from=None):
             raise FormatError('Please provide pandas.DataFrame as data.')
 
         if 'soil_moisture' not in self.data.columns:
-            self.variable = self.data.keys()[0]
+            self.variable = self.get_variable_from_data()
             self.data['qflag'] = data[self.variable].apply(lambda x: set())
 
         else:
@@ -106,10 +108,10 @@ def run(
 
         Parameters
         ----------
-        name : string or list, optional
-            provide name of flag or list of flags to only apply these flags
+        name : list
+            provide list of flags to only apply these flags
         sat_point : float
-                Saturation Point for soil at the respective location.
+                Saturation Point in % vol for soil at the respective location.
                 At ISMN the saturation point is calculated from Harmonized World Soil Database (HWSD) sand, clay
                 and organic content for each station using Equations [2,3,5] from Saxton & Rawls (2006).
                 (Saxton, K. E., & Rawls, W. J. (2006). Soil water characteristic estimates by texture and organic matter
@@ -127,6 +129,10 @@ def run(
             DataFrame including ISMN quality flags in column "qflag".
         """
         keys = self.data.keys()
+
+        if name:
+            assert isinstance(name, (list)), "If 'name' is provided then it must be a list"
+
         if not self.sat_point:
             self.sat_point = sat_point
         if not self.depth_from:
@@ -200,6 +206,19 @@ def apply_savgol(self) -> None:
         self.data['deriv1'] = savgol(self.data.soil_moisture, 3, 2, 1, mode='nearest')
         self.data['deriv2'] = savgol(self.data.soil_moisture, 3, 2, 2, mode='nearest')
 
+
+    def get_variable_from_data(self) -> str:
+        """
+        Gets first occuring and known Variable from the pandas dataframe
+        Returns v:string
+        -------
+
+        """
+        for v in self.data.keys():
+            if v in t.variable_list:
+                return v
+        raise VariableNotKnown
+
     def flag_C01(self, tag):
         """
         Soil moisture below threshold:

diff --git a/src/flagit/settings.py b/src/flagit/settings.py
@@ -6,6 +6,9 @@ class Variables():
     ancillary_ts_lower = 0
     ancillary_p_min = 0.2
     plateau_count = 0
+
+    variable_list = ['soil_moisture','soil_temperature','air_temperature','precipitation',
+                         'surface_temperature', 'soil_suction', 'snow_water_equivalent','snow_depth']
 
     def low_boundary(self, var):
         """

diff --git a/tests/test_flagit.py b/tests/test_flagit.py
@@ -49,47 +49,47 @@ def test_check_C01(self) -> None:
         """
         Test flag C01
         """
-        self.iface.run(name='C01')
+        self.iface.run(name=['C01'])
         assert self.data.qflag[30] == {'C01'}
         assert self.data.qflag[31] == set()
 
     def test_check_C02(self) -> None:
         """
         Test flag C02
         """
-        self.iface.run(name='C02')
+        self.iface.run(name=['C02'])
         assert self.data.qflag[70] == {'C02'}
         assert self.data.qflag[69] == set()
 
     def test_check_C03(self) -> None:
         """
         Test flag C03
         """
-        self.iface.run(name='C03')
+        self.iface.run(name=['C03'])
         assert self.data.qflag[80] == {'C03'}
         assert self.data.qflag[79] == set()
 
     def test_check_D01(self) -> None:
         """
         Test flag D01
         """
-        self.iface.run(name='D01')
+        self.iface.run(name=['D01'])
         assert self.data.qflag[35] == {'D01'}
         assert self.data.qflag[136] == set()
 
     def test_check_D02(self) -> None:
         """
         Test flag D02
         """
-        self.iface.run(name='D02')
+        self.iface.run(name=['D02'])
         assert self.data.qflag[2] == {'D02'}
         assert self.data.qflag[62] == set()
 
     def test_check_D03(self) -> None:
         """
         Test flag D03
         """
-        self.iface.run(name='D03')
+        self.iface.run(name=['D03'])
         assert self.data.qflag[70] == {'D03'}
         assert self.data.qflag[636] == set()
         assert self.data.qflag[0] == {'D03'}
@@ -98,23 +98,23 @@ def test_check_D04(self) -> None:
         """
         Test flag D04
         """
-        self.iface.run(name='D04')
+        self.iface.run(name=['D04'])
         assert self.data.qflag[70] == {'D04'}
         assert self.data.qflag[71] == set()
 
     def test_check_D05(self) -> None:
         """
         Test flag D05
         """
-        self.iface.run(name='D05')
+        self.iface.run(name=['D05'])
         assert self.data.qflag[70] == {'D05'}
         assert self.data.qflag[636] == set()
 
     def test_check_D06(self) -> None:
         """
         Test flag D06
         """
-        self.iface.run(name='D06')
+        self.iface.run(name=['D06'])
         np.testing.assert_almost_equal(self.data.deriv1[58], -5.551115123125783e-17)
         np.testing.assert_almost_equal(self.data.deriv2[29], -6.200000000000003)
         assert self.data.qflag[30] == {'D06'}
@@ -135,15 +135,15 @@ def test_check_D10(self) -> None:
         """
         Test flag D10
         """
-        self.iface.run(name='D10')
+        self.iface.run(name=['D10'])
         assert self.data.qflag[99] == {'D10'}
         assert self.data.qflag[75] == set()
 
     def test_check_good(self) -> None:
         """
         Test flag "good"
         """
-        self.iface.run(name='G')
+        self.iface.run(name=['G'])
         assert self.data.qflag[3] == {'G'}
         assert len(np.unique(self.data.qflag)) == 1
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[tool.pytest.ini_options]
		pythonpath = ["src"]