diff --git a/docs/changes/2659.bugfix.rst b/docs/changes/2659.bugfix.rst new file mode 100644 index 00000000000..1d82b35517a --- /dev/null +++ b/docs/changes/2659.bugfix.rst @@ -0,0 +1,10 @@ +Fix error in ``ctapipe-process`` when in the middle of a simtel file +that has true images available, a telescope event is missing the true image. +This can happen rarely in case a telescope triggered on pure NSB or +is oversaturated to the point where the true pe didn't fit into memory constraints. + +The error was due to the ``DataWriter`` trying to write a ``None`` into an +already setup table for the true images. + +The ``SimTelEventSource`` will now create an invalid true image filled with ``-1`` +for such events. diff --git a/src/ctapipe/io/simteleventsource.py b/src/ctapipe/io/simteleventsource.py index 45d35b03b51..4c98e864aac 100644 --- a/src/ctapipe/io/simteleventsource.py +++ b/src/ctapipe/io/simteleventsource.py @@ -83,6 +83,11 @@ "MirrorClass", ] +# default for MAX_PHOTOELECTRONS in simtel_array is 2_500_000 +# so this should be a safe limit to distinguish "dim image true missing" +# from "extremely bright true image missing", see issue #2344 +MISSING_IMAGE_BRIGHTNESS_LIMIT = 1_000_000 + # Mapping of SimTelArray Calibration trigger types to EventType: # from simtelarray: type Dark (0), pedestal (1), in-lid LED (2) or laser/LED (3+) data. SIMTEL_TO_CTA_EVENT_TYPE = { @@ -570,6 +575,7 @@ def __init__(self, input_url=Undefined, config=None, parent=None, **kwargs): self.file_, kind=self.atmosphere_profile_choice ) + self._has_true_image = None self.log.debug(f"Using gain selector {self.gain_selector}") def __exit__(self, exc_type, exc_val, exc_tb): @@ -861,6 +867,20 @@ def _generate_events(self): impact_distances = np.full(len(self.subarray), np.nan) * u.m for tel_id, telescope_event in telescope_events.items(): + if tel_id not in trigger.tels_with_trigger: + # skip additional telescopes that have data but did not + # participate in the subarray trigger decision for this stereo event + # see #2660 for details + self.log.warning( + "Encountered telescope event not present in" + " stereo trigger information, skipping." + " event_id = %d, tel_id = %d, tels_with_trigger: %s", + event_id, + tel_id, + trigger.tels_with_trigger, + ) + continue + adc_samples = telescope_event.get("adc_samples") if adc_samples is None: adc_samples = telescope_event["adc_sums"][:, :, np.newaxis] @@ -871,6 +891,30 @@ def _generate_events(self): .get(tel_id - 1, {}) .get("photoelectrons", None) ) + true_image_sum = true_image_sums[ + self.telescope_indices_original[tel_id] + ] + + if self._has_true_image is None: + self._has_true_image = true_image is not None + + if self._has_true_image and true_image is None: + if true_image_sum > MISSING_IMAGE_BRIGHTNESS_LIMIT: + self.log.warning( + "Encountered extremely bright telescope event with missing true_image in" + "file that has true images: event_id = %d, tel_id = %d." + "event might be truncated, skipping telescope event", + event_id, + tel_id, + ) + continue + else: + self.log.info( + "telescope event event_id = %d, tel_id = %d is missing true image", + event_id, + tel_id, + ) + true_image = np.full(n_pixels, -1, dtype=np.int32) if data.simulation is not None: if data.simulation.shower is not None: @@ -887,9 +931,7 @@ def _generate_events(self): ) data.simulation.tel[tel_id] = SimulatedCameraContainer( - true_image_sum=true_image_sums[ - self.telescope_indices_original[tel_id] - ], + true_image_sum=true_image_sum, true_image=true_image, impact=impact_container, ) diff --git a/src/ctapipe/io/tableloader.py b/src/ctapipe/io/tableloader.py index 2b10ad96506..041f68f5151 100644 --- a/src/ctapipe/io/tableloader.py +++ b/src/ctapipe/io/tableloader.py @@ -125,7 +125,9 @@ def _join_telescope_events(table1, table2): def _merge_table_same_index(table1, table2, index_keys, fallback_join_type="left"): """Merge two tables assuming their primary keys are identical""" if len(table1) != len(table2): - raise ValueError("Tables must have identical length") + raise ValueError( + f"Tables must have identical length, {len(table1)} != {len(table2)}" + ) if len(table1) == 0: return table1 diff --git a/src/ctapipe/io/tests/test_datawriter.py b/src/ctapipe/io/tests/test_datawriter.py index c42dcca6858..f4924b4c3dd 100644 --- a/src/ctapipe/io/tests/test_datawriter.py +++ b/src/ctapipe/io/tests/test_datawriter.py @@ -201,15 +201,16 @@ def test_roundtrip(tmpdir: Path): # make sure it is readable by the event source and matches the images - for event in EventSource(output_path): - for tel_id, dl1 in event.dl1.tel.items(): - original_image = events[event.count].dl1.tel[tel_id].image - read_image = dl1.image - assert np.allclose(original_image, read_image, atol=0.1) - - original_peaktime = events[event.count].dl1.tel[tel_id].peak_time - read_peaktime = dl1.peak_time - assert np.allclose(original_peaktime, read_peaktime, atol=0.01) + with EventSource(output_path) as source: + for event in source: + for tel_id, dl1 in event.dl1.tel.items(): + original_image = events[event.count].dl1.tel[tel_id].image + read_image = dl1.image + assert np.allclose(original_image, read_image, atol=0.1) + + original_peaktime = events[event.count].dl1.tel[tel_id].peak_time + read_peaktime = dl1.peak_time + assert np.allclose(original_peaktime, read_peaktime, atol=0.01) def test_dl1writer_no_events(tmpdir: Path): diff --git a/src/ctapipe/io/tests/test_hdf5.py b/src/ctapipe/io/tests/test_hdf5.py index 681567068a6..65b9e9088da 100644 --- a/src/ctapipe/io/tests/test_hdf5.py +++ b/src/ctapipe/io/tests/test_hdf5.py @@ -988,19 +988,19 @@ class Container1(Container): # But when explicitly giving the prefixes, this works and order # should not be important - reader = HDF5TableReader(path) - generator = reader.read( - "/values", - [Container1, Container1], - prefixes=["bar", "foo"], - ) + with HDF5TableReader(path) as reader: + generator = reader.read( + "/values", + [Container1, Container1], + prefixes=["bar", "foo"], + ) - for value in (1, 2, 3): - c1, c2 = next(generator) - assert c1.value == 5 * value - assert c1.prefix == "bar" - assert c2.value == value - assert c2.prefix == "foo" + for value in (1, 2, 3): + c1, c2 = next(generator) + assert c1.value == 5 * value + assert c1.prefix == "bar" + assert c2.value == value + assert c2.prefix == "foo" @pytest.mark.parametrize("input_type", (str, Path, tables.File)) diff --git a/src/ctapipe/io/tests/test_simteleventsource.py b/src/ctapipe/io/tests/test_simteleventsource.py index 7296fb7def5..a285160c056 100644 --- a/src/ctapipe/io/tests/test_simteleventsource.py +++ b/src/ctapipe/io/tests/test_simteleventsource.py @@ -653,3 +653,38 @@ def test_provenance(provenance, prod5_gamma_simtel_path): assert len(inputs) == 1 assert inputs[0]["url"] == str(prod5_gamma_simtel_path) assert inputs[0]["reference_meta"] is None + + +def test_prod6_issues(): + """Test behavior of source on file from prod6, see issues #2344 and #2660""" + input_url = "dataset://prod6_issues.simtel.zst" + + events_checked_trigger = set() + events_checked_image = set() + + # events with two telescope events but only one in stereo trigger in simtel + strange_trigger_events = { + 1548602: 3, + 2247909: 32, + 3974908: 2, + 4839806: 1, + } + missing_true_images = {1664106: 32} + + with SimTelEventSource(input_url) as source: + for e in source: + event_id = e.index.event_id + if event_id in strange_trigger_events: + expected_tel_id = strange_trigger_events[event_id] + np.testing.assert_equal(e.trigger.tels_with_trigger, [expected_tel_id]) + assert e.trigger.tel.keys() == {expected_tel_id} + assert e.r1.tel.keys() == {expected_tel_id} + events_checked_trigger.add(event_id) + + if event_id in missing_true_images: + tel_id = missing_true_images[event_id] + np.testing.assert_equal(e.simulation.tel[tel_id].true_image, -1) + events_checked_image.add(event_id) + + assert strange_trigger_events.keys() == events_checked_trigger + assert missing_true_images.keys() == events_checked_image diff --git a/src/ctapipe/tools/tests/test_process.py b/src/ctapipe/tools/tests/test_process.py index 49e70144602..24a4a4f4ef1 100644 --- a/src/ctapipe/tools/tests/test_process.py +++ b/src/ctapipe/tools/tests/test_process.py @@ -535,3 +535,36 @@ def test_on_old_file(input_url, args, tmp_path): np.testing.assert_array_equal( finite_reco, events["HillasReconstructor_is_valid"] ) + + +def test_prod6_issues(tmp_path): + """Test behavior of source on file from prod6, see issues #2344 and #2660""" + input_url = "dataset://prod6_issues.simtel.zst" + output_path = tmp_path / "test.dl1.h5" + + run_tool( + ProcessorTool(), + argv=[ + f"--input={input_url}", + f"--output={output_path}", + "--write-images", + "--write-showers", + "--overwrite", + ], + cwd=tmp_path, + raises=True, + ) + + with TableLoader(output_path) as loader: + tel_events = loader.read_telescope_events() + subarray_events = loader.read_subarray_events() + + trigger_counts = np.count_nonzero(subarray_events["tels_with_trigger"], axis=0) + _, tel_event_counts = np.unique(tel_events["tel_id"], return_counts=True) + + mask = trigger_counts > 0 + np.testing.assert_equal(trigger_counts[mask], tel_event_counts) + + images = loader.read_telescope_events([32], true_images=True) + images.add_index("event_id") + np.testing.assert_array_equal(images.loc[1664106]["true_image"], -1)