Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A time clustering algorithm for image cleaning #2694

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions src/ctapipe/image/cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

__all__ = [
"tailcuts_clean",
"time_clustering",
"bright_cleaning",
"dilate",
"mars_cleaning_1st_pass",
Expand All @@ -24,6 +25,7 @@
"nsb_image_cleaning",
"ImageCleaner",
"TailcutsImageCleaner",
"TimeCleaner",
"NSBImageCleaner",
"MARSImageCleaner",
"FACTImageCleaner",
Expand All @@ -33,6 +35,7 @@
from abc import abstractmethod

import numpy as np
from sklearn.cluster import DBSCAN

from ctapipe.image.statistics import n_largest

Expand Down Expand Up @@ -119,6 +122,78 @@ def tailcuts_clean(
)


def time_clustering(
geom,
image,
time,
minpts=5,
eps=1.0,
time_scale_ns=4.0,
space_scale_m=0.25,
hard_cut_pe=4,
image_add_pe=4,
):
"""
Clean an image by selecting pixels which pass a time clustering algorithm using DBSCAN.
Previously used for HESS [timecleaning]_.
As a neighbor-based image extractor algorithm can lead to biases in the time reconstruction of noise pixels,
specially those next to the shower, a cut in the minimum signal image is applied.
DBSCAN runs with the reconstructed times and pixel positions after scaling. Scaling is needed because eps
is not dimension dependent. If scaling is performed properly, eps can be set to 1. DBSCAN returns the
cluster IDs of each point. Pixels associated to cluster ID -1 are classified as noise.
At the end, all pixels above image_add_pe cut (even those that did not pass dbscan) pass also the cleaning
if they have at least one neighbour above the same threshold.
Parameters
----------
geom: `ctapipe.instrument.CameraGeometry`
Camera geometry information
image: array
pixel charge information
time: array
pixel timing information
minpts: int
Minimum number of points to consider a cluster
eps: float
Minimum distance in dbscan
time_scale_ns: float
Time scale in ns
space_scale: float
Space scale in m
hard_cut_pe: float
Hard cut in the number of signal pe
image_add_pe: float
Cut in pe above which a pixel will pass even if dbscan did not find it.
Returns
-------
A boolean mask of *clean* pixels.
"""
precut_mask = image > hard_cut_pe
arr = np.zeros(len(image), dtype=float)
arr[~precut_mask] = -1

pix_x = geom.pix_x.value[precut_mask] / space_scale_m
pix_y = geom.pix_y.value[precut_mask] / space_scale_m

if len(time[precut_mask]) == 0:
mask = np.zeros(len(time)) != 0
else:
X = np.column_stack((time[precut_mask] / time_scale_ns, pix_x, pix_y))
labels = DBSCAN(eps=eps, min_samples=minpts).fit_predict(X)

y = np.array(arr[(arr == 0)])
y[(labels == -1)] = -1
arr[arr == 0] = y
mask = arr == 0 # we keep these events

high_charge = image_add_pe
neighs = 1
number_of_neighbors = geom.neighbor_matrix_sparse.dot((image >= high_charge))

mask = mask | ((image >= high_charge) & (number_of_neighbors >= neighs))

return mask


def bright_cleaning(image, threshold, fraction, n_pixels=3):
"""
Clean an image by removing pixels below a fraction of the mean charge
Expand Down Expand Up @@ -725,6 +800,48 @@ def __call__(
)


class TimeCleaner(ImageCleaner):
"""
Clean images using the time clustering cleaning method
"""

space_scale_m = FloatTelescopeParameter(
default_value=0.25, help="Pixel space scaling parameter in m"
).tag(config=True)
time_scale_ns = FloatTelescopeParameter(
default_value=4.0, help="Time scale parameter in ns"
).tag(config=True)
minpts = IntTelescopeParameter(
default_value=5, help="minimum number of points to form a cluster"
).tag(config=True)
eps = FloatTelescopeParameter(
default_value=1.0, help="minimum distance in DBSCAN"
).tag(config=True)
hard_cut_pe = FloatTelescopeParameter(
default_value=2.5, help="Hard cut in the number of pe"
).tag(config=True)
image_add_pe = FloatTelescopeParameter(
default_value=4,
help="Add all pixels with signal above this cut and with at least one neighbour above the same threshold",
).tag(config=True)

def __call__(
self, tel_id: int, image: np.ndarray, arrival_times=None
) -> np.ndarray:
"""Apply HESS image cleaning. see ImageCleaner.__call__()"""
return time_clustering(
geom=self.subarray.tel[tel_id].camera.geometry,
image=image,
time=arrival_times,
eps=self.eps.tel[tel_id],
space_scale_m=self.space_scale_m.tel[tel_id],
time_scale_ns=self.time_scale_ns.tel[tel_id],
minpts=self.minpts.tel[tel_id],
hard_cut_pe=self.hard_cut_pe.tel[tel_id],
image_add_pe=self.image_add_pe.tel[tel_id],
)


class NSBImageCleaner(TailcutsImageCleaner):
"""
Clean images based on lstchains image cleaning technique described in
Expand Down
87 changes: 86 additions & 1 deletion src/ctapipe/image/reducer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
TelescopeParameter,
)
from ctapipe.image import TailcutsImageCleaner
from ctapipe.image.cleaning import dilate
from ctapipe.image.cleaning import TimeCleaner, dilate
from ctapipe.image.extractor import ImageExtractor

__all__ = ["DataVolumeReducer", "NullDataVolumeReducer", "TailCutsDataVolumeReducer"]
Expand Down Expand Up @@ -214,3 +214,88 @@ def select_pixels(self, waveforms, tel_id=None, selected_gain_channel=None):
mask = dilate(camera_geom, mask)

return mask


class TimeDataVolumeReducer(DataVolumeReducer):
"""
Reduce the time integrated shower image in 3 Steps:
1) Select pixels with tailcuts_clean.
2) Add iteratively all pixels with Signal S >= boundary_thresh
with ctapipe module dilate until no new pixels were added.
3) Adding new pixels with dilate to get more conservative.
Attributes
----------
image_extractor_type: String
Name of the image_extractor to be used.
n_end_dilates: IntTelescopeParameter
Number of how many times to dilate at the end.
"""

image_extractor_type = TelescopeParameter(
trait=ComponentName(ImageExtractor, default_value="NeighborPeakWindowSum"),
default_value="NeighborPeakWindowSum",
help="Name of the ImageExtractor subclass to be used.",
).tag(config=True)
n_end_dilates = IntTelescopeParameter(
default_value=0, help="Number of how many times to dilate at the end."
).tag(config=True)

def __init__(
self,
subarray,
config=None,
parent=None,
cleaner=None,
image_extractor=None,
**kwargs,
):
"""
Parameters
----------
subarray: ctapipe.instrument.SubarrayDescription
Description of the subarray
config: traitlets.loader.Config
Configuration specified by config file or cmdline arguments.
Used to set traitlet values.
Set to None if no configuration to pass.
kwargs
"""
super().__init__(config=config, parent=parent, subarray=subarray, **kwargs)

if cleaner is None:
self.cleaner = TimeCleaner(parent=self, subarray=self.subarray)
else:
self.cleaner = cleaner

self.image_extractors = {}
if image_extractor is None:
for _, _, name in self.image_extractor_type:
self.image_extractors[name] = ImageExtractor.from_name(
name, subarray=self.subarray, parent=self
)
else:
name = image_extractor.__class__.__name__
self.image_extractor_type = [("type", "*", name)]
self.image_extractors[name] = image_extractor

def select_pixels(self, waveforms, tel_id=None, selected_gain_channel=None):
camera_geom = self.subarray.tel[tel_id].camera.geometry
# Pulse-integrate waveforms
extractor = self.image_extractors[self.image_extractor_type.tel[tel_id]]
# do not treat broken pixels in data volume reduction
# broken_pixels = np.zeros(camera_geom.n_pixels, dtype=bool)
broken_pixels = np.zeros(camera_geom.n_pixels, dtype=bool)
dl1: DL1CameraContainer = extractor(
waveforms,
tel_id=tel_id,
selected_gain_channel=selected_gain_channel,
broken_pixels=broken_pixels,
)

mask = self.cleaner(tel_id, dl1.image, dl1.peak_time)

for _ in range(self.n_end_dilates.tel[tel_id]):
mask = dilate(camera_geom, mask)
mask = dilate(camera_geom, mask)

return mask
Loading