2475 bug hi l1c need to allow for arbitrary calibration product numbers (#2477)

subagonsouth · web-flow · commit 3fe84244317b · 2025-12-04T13:00:43.000-07:00
* Add method to CalibrationProductConfig for getting sorted calibration product numbers

* Allow for arbitrary calibration product numbers in Hi L1C

* Test that out of order calibration products get sorted
diff --git a/imap_processing/hi/hi_l1c.py b/imap_processing/hi/hi_l1c.py
@@ -104,7 +104,7 @@ def generate_pset_dataset(
     pset_dataset = empty_pset_dataset(
         de_dataset.ccsds_met.data.mean(),
         de_dataset.esa_energy_step,
-        config_df.cal_prod_config.number_of_products,
+        config_df.cal_prod_config.calibration_product_numbers,
         logical_source_parts["sensor"],
     )
     # Calculate and add despun_z, hae_latitude, and hae_longitude variables to
@@ -124,7 +124,10 @@ def generate_pset_dataset(
 
 
 def empty_pset_dataset(
-    l1b_met: float, l1b_energy_steps: xr.DataArray, n_cal_prods: int, sensor_str: str
+    l1b_met: float,
+    l1b_energy_steps: xr.DataArray,
+    cal_prod_numbers: npt.NDArray[np.int_],
+    sensor_str: str,
 ) -> xr.Dataset:
     """
     Allocate an empty xarray.Dataset with appropriate pset coordinates.
@@ -136,8 +139,9 @@ def empty_pset_dataset(
         repoint-table data to get the start and end times of the pointing.
     l1b_energy_steps : xarray.DataArray
         The array of esa_energy_step data from the L1B DE product.
-    n_cal_prods : int
-        Number of calibration products to allocate.
+    cal_prod_numbers : numpy.ndarray
+        Array of calibration product numbers from the configuration file.
+        These can be arbitrary integers, not necessarily starting at 0.
     sensor_str : str
         '45sensor' or '90sensor'.
 
@@ -191,7 +195,7 @@ def empty_pset_dataset(
     ).copy()
     dtype = attrs.pop("dtype")
     coords["calibration_prod"] = xr.DataArray(
-        np.arange(n_cal_prods, dtype=dtype),
+        cal_prod_numbers.astype(dtype),
         name="calibration_prod",
         dims=["calibration_prod"],
         attrs=attrs,
@@ -349,6 +353,12 @@ def pset_counts(
         fill_value=0,
     )
 
+    # Create mapping from calibration product numbers to array indices
+    cal_prod_to_index = {
+        cal_prod: idx
+        for idx, cal_prod in enumerate(pset_coords["calibration_prod"].values)
+    }
+
     # Drop events with FILLVAL for trigger_id. This should only occur for a
     # pointing with no events that gets a single fill event
     de_ds = l1b_de_dataset.drop_dims("epoch")
@@ -406,9 +416,10 @@ def pset_counts(
             # When iterating over rows of a dataframe, the names of the multi-index
             # are not preserved. Below, `config_row.Index[0]` gets the
             # calibration_prod value from the namedtuple representing the
-            # dataframe row.
+            # dataframe row. We map this to the array index using cal_prod_to_index.
+            i_cal_prod = cal_prod_to_index[config_row.Index[0]]
             np.add.at(
-                counts_var["counts"].data[0, i_esa, config_row.Index[0]],
+                counts_var["counts"].data[0, i_esa, i_cal_prod],
                 spin_bin_indices,
                 1,
             )
diff --git a/imap_processing/hi/utils.py b/imap_processing/hi/utils.py
@@ -11,6 +11,7 @@
 import numpy as np
 import pandas as pd
 import xarray as xr
+from numpy import typing as npt
 
 from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
 
@@ -501,3 +502,21 @@ def number_of_products(self) -> int:
             calibration product definitions.
         """
         return len(self._obj.index.unique(level="calibration_prod"))
+
+    @property
+    def calibration_product_numbers(self) -> npt.NDArray[np.int_]:
+        """
+        Get the calibration product numbers from the current configuration.
+
+        Returns
+        -------
+        cal_prod_numbers : numpy.ndarray
+            Array of calibration product numbers from the configuration.
+            These are sorted in ascending order and can be arbitrary integers.
+        """
+        return (
+            self._obj.index.get_level_values("calibration_prod")
+            .unique()
+            .sort_values()
+            .values
+        )
diff --git a/imap_processing/tests/hi/test_hi_l1c.py b/imap_processing/tests/hi/test_hi_l1c.py
@@ -1,5 +1,6 @@
 """Test coverage for imap_processing.hi.l1c.hi_l1c.py"""
 
+import io
 from collections import namedtuple
 from unittest import mock
 from unittest.mock import MagicMock
@@ -146,15 +147,16 @@ def test_empty_pset_dataset(use_fake_repoint_data_for_time):
         data=np.concat((np.arange(n_energy_steps + 1).repeat(2), np.array([255, 255]))),
         attrs={"FILLVAL": 255},
     )
-    n_calibration_prods = 5
+    # Create calibration product numbers array (0, 1, 2, 3, 4)
+    cal_prod_numbers = np.arange(5)
     sensor_str = HIAPID.H90_SCI_DE.sensor
     l1b_met = 482373065
     use_fake_repoint_data_for_time(
         np.asarray([l1b_met - 15 * 60, l1b_met + 24 * 60 * 60])
     )
 
     dataset = hi_l1c.empty_pset_dataset(
-        l1b_met, l1b_esa_energy_steps, n_calibration_prods, sensor_str
+        l1b_met, l1b_esa_energy_steps, cal_prod_numbers, sensor_str
     )
 
     assert dataset.epoch.size == 1
@@ -164,7 +166,8 @@ def test_empty_pset_dataset(use_fake_repoint_data_for_time):
     np.testing.assert_array_equal(
         dataset.esa_energy_step.data, np.arange(n_energy_steps) + 1
     )
-    assert dataset.calibration_prod.size == n_calibration_prods
+    assert dataset.calibration_prod.size == len(cal_prod_numbers)
+    np.testing.assert_array_equal(dataset.calibration_prod.data, cal_prod_numbers)
 
     # verify that attrs defined in hi_pset_epoch have overwritten default
     # epoch attributes
@@ -229,7 +232,7 @@ def test_pset_counts(
     empty_pset = hi_l1c.empty_pset_dataset(
         100,
         l1b_dataset.esa_energy_step,
-        cal_config_df.cal_prod_config.number_of_products,
+        cal_config_df.cal_prod_config.calibration_product_numbers,
         HIAPID.H90_SCI_DE.sensor,
     )
     counts_var = hi_l1c.pset_counts(empty_pset.coords, cal_config_df, l1b_dataset)
@@ -255,7 +258,7 @@ def test_pset_counts_empty_l1b(
     empty_pset = hi_l1c.empty_pset_dataset(
         100,
         l1b_dataset.esa_energy_step,
-        cal_config_df.cal_prod_config.number_of_products,
+        cal_config_df.cal_prod_config.calibration_product_numbers,
         HIAPID.H90_SCI_DE.sensor,
     )
     counts_var = hi_l1c.pset_counts(empty_pset.coords, cal_config_df, l1b_dataset)
@@ -325,6 +328,103 @@ def test_get_tof_window_mask():
     np.testing.assert_array_equal(expected_mask, window_mask)
 
 
+def test_empty_pset_dataset_arbitrary_cal_prod_numbers(use_fake_repoint_data_for_time):
+    """Test empty_pset_dataset with non-sequential calibration product numbers."""
+    n_energy_steps = 3
+    l1b_esa_energy_steps = xr.DataArray(
+        data=np.concat((np.arange(n_energy_steps + 1).repeat(2), np.array([255, 255]))),
+        attrs={"FILLVAL": 255},
+    )
+    # Use non-sequential calibration product numbers
+    cal_prod_numbers = np.array([5, 10, 100])
+    sensor_str = HIAPID.H45_SCI_DE.sensor
+    l1b_met = 482373065
+    use_fake_repoint_data_for_time(
+        np.asarray([l1b_met - 15 * 60, l1b_met + 24 * 60 * 60])
+    )
+
+    dataset = hi_l1c.empty_pset_dataset(
+        l1b_met, l1b_esa_energy_steps, cal_prod_numbers, sensor_str
+    )
+
+    # Verify calibration_prod coordinate has the correct non-sequential values
+    assert dataset.calibration_prod.size == len(cal_prod_numbers)
+    np.testing.assert_array_equal(dataset.calibration_prod.data, cal_prod_numbers)
+    # Verify the calibration_prod_label reflects the actual numbers
+    expected_labels = np.array(["5", "10", "100"])
+    np.testing.assert_array_equal(dataset.calibration_prod_label.data, expected_labels)
+
+
+@pytest.mark.external_test_data
+def test_pset_counts_arbitrary_cal_prod_numbers(
+    hi_l1_test_data_path, use_fake_repoint_data_for_time
+):
+    """Test pset_counts with non-sequential calibration product numbers."""
+    # Create a test calibration product config with non-sequential numbers
+    csv_content = """\
+calibration_prod,esa_energy_step,geometric_factor,coincidence_type_list,tof_ab_low,tof_ab_high,tof_ac1_low,tof_ac1_high,tof_bc1_low,tof_bc1_high,tof_c1c2_low,tof_c1c2_high
+5,1,0.00055,ABC1C2,0,1023,-1023,1023,-1023,1023,0,1023
+5,2,0.00085,ABC1C2,0,1023,-1023,1023,-1023,1023,0,1023
+10,1,0.00055,BC1C2,0,1023,-1023,1023,-1023,1023,0,1023
+10,2,0.00085,BC1C2,0,1023,-1023,1023,-1023,1023,0,1023
+    """
+
+    l1b_de_path = hi_l1_test_data_path / "imap_hi_l1b_45sensor-de_20250415_v999.cdf"
+    l1b_dataset = load_cdf(l1b_de_path)
+
+    cal_config_df = imap_processing.hi.utils.CalibrationProductConfig.from_csv(
+        io.StringIO(csv_content)
+    )
+
+    # Create PSET with non-sequential calibration product numbers
+    l1b_met = 482373065
+    use_fake_repoint_data_for_time(
+        np.asarray([l1b_met - 15 * 60, l1b_met + 24 * 60 * 60])
+    )
+
+    empty_pset = hi_l1c.empty_pset_dataset(
+        l1b_met,
+        l1b_dataset.esa_energy_step,
+        cal_config_df.cal_prod_config.calibration_product_numbers,
+        HIAPID.H90_SCI_DE.sensor,
+    )
+
+    # Verify the calibration_prod coordinate has non-sequential values
+    np.testing.assert_array_equal(empty_pset.calibration_prod.data, np.array([5, 10]))
+
+    # Mock get_pointing_times to avoid SPICE kernel requirements
+    with mock.patch(
+        "imap_processing.hi.hi_l1c.get_pointing_times", return_value=(100, 200)
+    ):
+        counts_var = hi_l1c.pset_counts(empty_pset.coords, cal_config_df, l1b_dataset)
+
+    # Verify counts array has correct shape based on coordinates
+    assert "counts" in counts_var
+    # Shape should be (n_epoch, n_esa_energy, n_cal_prod, n_spin_bins)
+    # where n_cal_prod is 2 (for products 5 and 10)
+    expected_shape = (
+        1,
+        empty_pset.esa_energy_step.size,
+        2,  # Two calibration products: 5 and 10
+        3600,
+    )
+    assert counts_var["counts"].data.shape == expected_shape
+    # Check that total number of expected counts is correct
+    # ABC1C2 is coincidence type 15
+    esa_1_2_mask = (l1b_dataset["esa_step"][l1b_dataset["ccsds_index"]] < 3).values
+    coincidence_15_mask = (l1b_dataset["coincidence_type"] == 15).values
+    np.testing.assert_equal(
+        np.sum(counts_var["counts"].data[:, :, 0]),
+        np.sum(coincidence_15_mask & esa_1_2_mask),
+    )
+    # BC1C2 is coincidence type 7
+    coincidence_7_mask = (l1b_dataset["coincidence_type"] == 7).values
+    np.testing.assert_equal(
+        np.sum(counts_var["counts"].data[:, :, 1]),
+        np.sum(coincidence_7_mask & esa_1_2_mask),
+    )
+
+
 def test_pset_backgrounds():
     """Test coverage for pset_backgrounds function."""
     # Create some fake coordinates to use
@@ -369,7 +469,7 @@ def test_pset_exposure(
         attrs={"FILLVAL": 255},
     )
     empty_pset = hi_l1c.empty_pset_dataset(
-        100, l1b_energy_steps, 2, HIAPID.H90_SCI_DE.sensor
+        100, l1b_energy_steps, np.array([0, 1]), HIAPID.H90_SCI_DE.sensor
     )
     # Set the mock of find_second_de_packet_data to return a xr.Dataset
     # with some dummy data. ESA 1 will get binned data once, ESA 2 will get
diff --git a/imap_processing/tests/hi/test_utils.py b/imap_processing/tests/hi/test_utils.py
@@ -1,5 +1,7 @@
 """Test coverage for imap_processing.hi.utils.py"""
 
+import io
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -372,3 +374,35 @@ def test_number_of_products(self, hi_test_cal_prod_config_path):
             hi_test_cal_prod_config_path
         )
         assert df.cal_prod_config.number_of_products == 2
+
+    def test_calibration_product_numbers(self, hi_test_cal_prod_config_path):
+        """Test coverage for calibration_product_numbers accessor."""
+        df = imap_processing.hi.utils.CalibrationProductConfig.from_csv(
+            hi_test_cal_prod_config_path
+        )
+        cal_prod_numbers = df.cal_prod_config.calibration_product_numbers
+        # The test config file has calibration products 0 and 1
+        np.testing.assert_array_equal(cal_prod_numbers, np.array([0, 1]))
+        # Verify it's a numpy array of integers
+        assert isinstance(cal_prod_numbers, np.ndarray)
+        assert cal_prod_numbers.dtype in [np.int32, np.int64]
+
+    def test_calibration_product_numbers_arbitrary_values(self):
+        """Test calibration_product_numbers with arbitrary non-sequential values."""
+        # Create a temporary CSV with non-sequential calibration product numbers
+        csv_content = """\
+calibration_prod,esa_energy_step,geometric_factor,coincidence_type_list,tof_ab_low,tof_ab_high,tof_ac1_low,tof_ac1_high,tof_bc1_low,tof_bc1_high,tof_c1c2_low,tof_c1c2_high
+10,1,0.00055,BC1C2,15,55,0,70,-50,10,5,25
+10,2,0.00085,BC1C2,15,55,0,70,-50,10,5,25
+5,1,0.00055,ABC1C2,15,55,0,70,-50,10,5,25
+5,2,0.00085,ABC1C2,15,55,0,70,-50,10,5,25
+100,1,0.00055,AC1,15,55,0,70,-50,10,5,25
+100,2,0.00085,AC1,15,55,0,70,-50,10,5,25
+        """
+
+        df = CalibrationProductConfig.from_csv(io.StringIO(csv_content))
+        cal_prod_numbers = df.cal_prod_config.calibration_product_numbers
+
+        # Should return sorted unique calibration product numbers
+        np.testing.assert_array_equal(cal_prod_numbers, np.array([5, 10, 100]))
+        assert isinstance(cal_prod_numbers, np.ndarray)