Skip to content

Commit 3fe8424

Browse files
authored
2475 bug hi l1c need to allow for arbitrary calibration product numbers (#2477)
* Add method to CalibrationProductConfig for getting sorted calibration product numbers * Allow for arbitrary calibration product numbers in Hi L1C * Test that out of order calibration products get sorted
1 parent 7f3ac25 commit 3fe8424

File tree

4 files changed

+177
-13
lines changed

4 files changed

+177
-13
lines changed

imap_processing/hi/hi_l1c.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def generate_pset_dataset(
104104
pset_dataset = empty_pset_dataset(
105105
de_dataset.ccsds_met.data.mean(),
106106
de_dataset.esa_energy_step,
107-
config_df.cal_prod_config.number_of_products,
107+
config_df.cal_prod_config.calibration_product_numbers,
108108
logical_source_parts["sensor"],
109109
)
110110
# Calculate and add despun_z, hae_latitude, and hae_longitude variables to
@@ -124,7 +124,10 @@ def generate_pset_dataset(
124124

125125

126126
def empty_pset_dataset(
127-
l1b_met: float, l1b_energy_steps: xr.DataArray, n_cal_prods: int, sensor_str: str
127+
l1b_met: float,
128+
l1b_energy_steps: xr.DataArray,
129+
cal_prod_numbers: npt.NDArray[np.int_],
130+
sensor_str: str,
128131
) -> xr.Dataset:
129132
"""
130133
Allocate an empty xarray.Dataset with appropriate pset coordinates.
@@ -136,8 +139,9 @@ def empty_pset_dataset(
136139
repoint-table data to get the start and end times of the pointing.
137140
l1b_energy_steps : xarray.DataArray
138141
The array of esa_energy_step data from the L1B DE product.
139-
n_cal_prods : int
140-
Number of calibration products to allocate.
142+
cal_prod_numbers : numpy.ndarray
143+
Array of calibration product numbers from the configuration file.
144+
These can be arbitrary integers, not necessarily starting at 0.
141145
sensor_str : str
142146
'45sensor' or '90sensor'.
143147
@@ -191,7 +195,7 @@ def empty_pset_dataset(
191195
).copy()
192196
dtype = attrs.pop("dtype")
193197
coords["calibration_prod"] = xr.DataArray(
194-
np.arange(n_cal_prods, dtype=dtype),
198+
cal_prod_numbers.astype(dtype),
195199
name="calibration_prod",
196200
dims=["calibration_prod"],
197201
attrs=attrs,
@@ -349,6 +353,12 @@ def pset_counts(
349353
fill_value=0,
350354
)
351355

356+
# Create mapping from calibration product numbers to array indices
357+
cal_prod_to_index = {
358+
cal_prod: idx
359+
for idx, cal_prod in enumerate(pset_coords["calibration_prod"].values)
360+
}
361+
352362
# Drop events with FILLVAL for trigger_id. This should only occur for a
353363
# pointing with no events that gets a single fill event
354364
de_ds = l1b_de_dataset.drop_dims("epoch")
@@ -406,9 +416,10 @@ def pset_counts(
406416
# When iterating over rows of a dataframe, the names of the multi-index
407417
# are not preserved. Below, `config_row.Index[0]` gets the
408418
# calibration_prod value from the namedtuple representing the
409-
# dataframe row.
419+
# dataframe row. We map this to the array index using cal_prod_to_index.
420+
i_cal_prod = cal_prod_to_index[config_row.Index[0]]
410421
np.add.at(
411-
counts_var["counts"].data[0, i_esa, config_row.Index[0]],
422+
counts_var["counts"].data[0, i_esa, i_cal_prod],
412423
spin_bin_indices,
413424
1,
414425
)

imap_processing/hi/utils.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import numpy as np
1212
import pandas as pd
1313
import xarray as xr
14+
from numpy import typing as npt
1415

1516
from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
1617

@@ -501,3 +502,21 @@ def number_of_products(self) -> int:
501502
calibration product definitions.
502503
"""
503504
return len(self._obj.index.unique(level="calibration_prod"))
505+
506+
@property
507+
def calibration_product_numbers(self) -> npt.NDArray[np.int_]:
508+
"""
509+
Get the calibration product numbers from the current configuration.
510+
511+
Returns
512+
-------
513+
cal_prod_numbers : numpy.ndarray
514+
Array of calibration product numbers from the configuration.
515+
These are sorted in ascending order and can be arbitrary integers.
516+
"""
517+
return (
518+
self._obj.index.get_level_values("calibration_prod")
519+
.unique()
520+
.sort_values()
521+
.values
522+
)

imap_processing/tests/hi/test_hi_l1c.py

Lines changed: 106 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Test coverage for imap_processing.hi.l1c.hi_l1c.py"""
22

3+
import io
34
from collections import namedtuple
45
from unittest import mock
56
from unittest.mock import MagicMock
@@ -146,15 +147,16 @@ def test_empty_pset_dataset(use_fake_repoint_data_for_time):
146147
data=np.concat((np.arange(n_energy_steps + 1).repeat(2), np.array([255, 255]))),
147148
attrs={"FILLVAL": 255},
148149
)
149-
n_calibration_prods = 5
150+
# Create calibration product numbers array (0, 1, 2, 3, 4)
151+
cal_prod_numbers = np.arange(5)
150152
sensor_str = HIAPID.H90_SCI_DE.sensor
151153
l1b_met = 482373065
152154
use_fake_repoint_data_for_time(
153155
np.asarray([l1b_met - 15 * 60, l1b_met + 24 * 60 * 60])
154156
)
155157

156158
dataset = hi_l1c.empty_pset_dataset(
157-
l1b_met, l1b_esa_energy_steps, n_calibration_prods, sensor_str
159+
l1b_met, l1b_esa_energy_steps, cal_prod_numbers, sensor_str
158160
)
159161

160162
assert dataset.epoch.size == 1
@@ -164,7 +166,8 @@ def test_empty_pset_dataset(use_fake_repoint_data_for_time):
164166
np.testing.assert_array_equal(
165167
dataset.esa_energy_step.data, np.arange(n_energy_steps) + 1
166168
)
167-
assert dataset.calibration_prod.size == n_calibration_prods
169+
assert dataset.calibration_prod.size == len(cal_prod_numbers)
170+
np.testing.assert_array_equal(dataset.calibration_prod.data, cal_prod_numbers)
168171

169172
# verify that attrs defined in hi_pset_epoch have overwritten default
170173
# epoch attributes
@@ -229,7 +232,7 @@ def test_pset_counts(
229232
empty_pset = hi_l1c.empty_pset_dataset(
230233
100,
231234
l1b_dataset.esa_energy_step,
232-
cal_config_df.cal_prod_config.number_of_products,
235+
cal_config_df.cal_prod_config.calibration_product_numbers,
233236
HIAPID.H90_SCI_DE.sensor,
234237
)
235238
counts_var = hi_l1c.pset_counts(empty_pset.coords, cal_config_df, l1b_dataset)
@@ -255,7 +258,7 @@ def test_pset_counts_empty_l1b(
255258
empty_pset = hi_l1c.empty_pset_dataset(
256259
100,
257260
l1b_dataset.esa_energy_step,
258-
cal_config_df.cal_prod_config.number_of_products,
261+
cal_config_df.cal_prod_config.calibration_product_numbers,
259262
HIAPID.H90_SCI_DE.sensor,
260263
)
261264
counts_var = hi_l1c.pset_counts(empty_pset.coords, cal_config_df, l1b_dataset)
@@ -325,6 +328,103 @@ def test_get_tof_window_mask():
325328
np.testing.assert_array_equal(expected_mask, window_mask)
326329

327330

331+
def test_empty_pset_dataset_arbitrary_cal_prod_numbers(use_fake_repoint_data_for_time):
332+
"""Test empty_pset_dataset with non-sequential calibration product numbers."""
333+
n_energy_steps = 3
334+
l1b_esa_energy_steps = xr.DataArray(
335+
data=np.concat((np.arange(n_energy_steps + 1).repeat(2), np.array([255, 255]))),
336+
attrs={"FILLVAL": 255},
337+
)
338+
# Use non-sequential calibration product numbers
339+
cal_prod_numbers = np.array([5, 10, 100])
340+
sensor_str = HIAPID.H45_SCI_DE.sensor
341+
l1b_met = 482373065
342+
use_fake_repoint_data_for_time(
343+
np.asarray([l1b_met - 15 * 60, l1b_met + 24 * 60 * 60])
344+
)
345+
346+
dataset = hi_l1c.empty_pset_dataset(
347+
l1b_met, l1b_esa_energy_steps, cal_prod_numbers, sensor_str
348+
)
349+
350+
# Verify calibration_prod coordinate has the correct non-sequential values
351+
assert dataset.calibration_prod.size == len(cal_prod_numbers)
352+
np.testing.assert_array_equal(dataset.calibration_prod.data, cal_prod_numbers)
353+
# Verify the calibration_prod_label reflects the actual numbers
354+
expected_labels = np.array(["5", "10", "100"])
355+
np.testing.assert_array_equal(dataset.calibration_prod_label.data, expected_labels)
356+
357+
358+
@pytest.mark.external_test_data
359+
def test_pset_counts_arbitrary_cal_prod_numbers(
360+
hi_l1_test_data_path, use_fake_repoint_data_for_time
361+
):
362+
"""Test pset_counts with non-sequential calibration product numbers."""
363+
# Create a test calibration product config with non-sequential numbers
364+
csv_content = """\
365+
calibration_prod,esa_energy_step,geometric_factor,coincidence_type_list,tof_ab_low,tof_ab_high,tof_ac1_low,tof_ac1_high,tof_bc1_low,tof_bc1_high,tof_c1c2_low,tof_c1c2_high
366+
5,1,0.00055,ABC1C2,0,1023,-1023,1023,-1023,1023,0,1023
367+
5,2,0.00085,ABC1C2,0,1023,-1023,1023,-1023,1023,0,1023
368+
10,1,0.00055,BC1C2,0,1023,-1023,1023,-1023,1023,0,1023
369+
10,2,0.00085,BC1C2,0,1023,-1023,1023,-1023,1023,0,1023
370+
"""
371+
372+
l1b_de_path = hi_l1_test_data_path / "imap_hi_l1b_45sensor-de_20250415_v999.cdf"
373+
l1b_dataset = load_cdf(l1b_de_path)
374+
375+
cal_config_df = imap_processing.hi.utils.CalibrationProductConfig.from_csv(
376+
io.StringIO(csv_content)
377+
)
378+
379+
# Create PSET with non-sequential calibration product numbers
380+
l1b_met = 482373065
381+
use_fake_repoint_data_for_time(
382+
np.asarray([l1b_met - 15 * 60, l1b_met + 24 * 60 * 60])
383+
)
384+
385+
empty_pset = hi_l1c.empty_pset_dataset(
386+
l1b_met,
387+
l1b_dataset.esa_energy_step,
388+
cal_config_df.cal_prod_config.calibration_product_numbers,
389+
HIAPID.H90_SCI_DE.sensor,
390+
)
391+
392+
# Verify the calibration_prod coordinate has non-sequential values
393+
np.testing.assert_array_equal(empty_pset.calibration_prod.data, np.array([5, 10]))
394+
395+
# Mock get_pointing_times to avoid SPICE kernel requirements
396+
with mock.patch(
397+
"imap_processing.hi.hi_l1c.get_pointing_times", return_value=(100, 200)
398+
):
399+
counts_var = hi_l1c.pset_counts(empty_pset.coords, cal_config_df, l1b_dataset)
400+
401+
# Verify counts array has correct shape based on coordinates
402+
assert "counts" in counts_var
403+
# Shape should be (n_epoch, n_esa_energy, n_cal_prod, n_spin_bins)
404+
# where n_cal_prod is 2 (for products 5 and 10)
405+
expected_shape = (
406+
1,
407+
empty_pset.esa_energy_step.size,
408+
2, # Two calibration products: 5 and 10
409+
3600,
410+
)
411+
assert counts_var["counts"].data.shape == expected_shape
412+
# Check that total number of expected counts is correct
413+
# ABC1C2 is coincidence type 15
414+
esa_1_2_mask = (l1b_dataset["esa_step"][l1b_dataset["ccsds_index"]] < 3).values
415+
coincidence_15_mask = (l1b_dataset["coincidence_type"] == 15).values
416+
np.testing.assert_equal(
417+
np.sum(counts_var["counts"].data[:, :, 0]),
418+
np.sum(coincidence_15_mask & esa_1_2_mask),
419+
)
420+
# BC1C2 is coincidence type 7
421+
coincidence_7_mask = (l1b_dataset["coincidence_type"] == 7).values
422+
np.testing.assert_equal(
423+
np.sum(counts_var["counts"].data[:, :, 1]),
424+
np.sum(coincidence_7_mask & esa_1_2_mask),
425+
)
426+
427+
328428
def test_pset_backgrounds():
329429
"""Test coverage for pset_backgrounds function."""
330430
# Create some fake coordinates to use
@@ -369,7 +469,7 @@ def test_pset_exposure(
369469
attrs={"FILLVAL": 255},
370470
)
371471
empty_pset = hi_l1c.empty_pset_dataset(
372-
100, l1b_energy_steps, 2, HIAPID.H90_SCI_DE.sensor
472+
100, l1b_energy_steps, np.array([0, 1]), HIAPID.H90_SCI_DE.sensor
373473
)
374474
# Set the mock of find_second_de_packet_data to return a xr.Dataset
375475
# with some dummy data. ESA 1 will get binned data once, ESA 2 will get

imap_processing/tests/hi/test_utils.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Test coverage for imap_processing.hi.utils.py"""
22

3+
import io
4+
35
import numpy as np
46
import pandas as pd
57
import pytest
@@ -372,3 +374,35 @@ def test_number_of_products(self, hi_test_cal_prod_config_path):
372374
hi_test_cal_prod_config_path
373375
)
374376
assert df.cal_prod_config.number_of_products == 2
377+
378+
def test_calibration_product_numbers(self, hi_test_cal_prod_config_path):
379+
"""Test coverage for calibration_product_numbers accessor."""
380+
df = imap_processing.hi.utils.CalibrationProductConfig.from_csv(
381+
hi_test_cal_prod_config_path
382+
)
383+
cal_prod_numbers = df.cal_prod_config.calibration_product_numbers
384+
# The test config file has calibration products 0 and 1
385+
np.testing.assert_array_equal(cal_prod_numbers, np.array([0, 1]))
386+
# Verify it's a numpy array of integers
387+
assert isinstance(cal_prod_numbers, np.ndarray)
388+
assert cal_prod_numbers.dtype in [np.int32, np.int64]
389+
390+
def test_calibration_product_numbers_arbitrary_values(self):
391+
"""Test calibration_product_numbers with arbitrary non-sequential values."""
392+
# Create a temporary CSV with non-sequential calibration product numbers
393+
csv_content = """\
394+
calibration_prod,esa_energy_step,geometric_factor,coincidence_type_list,tof_ab_low,tof_ab_high,tof_ac1_low,tof_ac1_high,tof_bc1_low,tof_bc1_high,tof_c1c2_low,tof_c1c2_high
395+
10,1,0.00055,BC1C2,15,55,0,70,-50,10,5,25
396+
10,2,0.00085,BC1C2,15,55,0,70,-50,10,5,25
397+
5,1,0.00055,ABC1C2,15,55,0,70,-50,10,5,25
398+
5,2,0.00085,ABC1C2,15,55,0,70,-50,10,5,25
399+
100,1,0.00055,AC1,15,55,0,70,-50,10,5,25
400+
100,2,0.00085,AC1,15,55,0,70,-50,10,5,25
401+
"""
402+
403+
df = CalibrationProductConfig.from_csv(io.StringIO(csv_content))
404+
cal_prod_numbers = df.cal_prod_config.calibration_product_numbers
405+
406+
# Should return sorted unique calibration product numbers
407+
np.testing.assert_array_equal(cal_prod_numbers, np.array([5, 10, 100]))
408+
assert isinstance(cal_prod_numbers, np.ndarray)

0 commit comments

Comments
 (0)