From 29fbebd6fe1901b06ff75928a95eda50dd53353e Mon Sep 17 00:00:00 2001
From: Nathan Prouvost <nathan.prouvost@gmail.com>
Date: Fri, 15 Nov 2024 14:16:59 +0100
Subject: [PATCH 1/7] add first version of selection of dataset depending on
 channel

---
 hbt/selection/lepton.py | 141 ++++++++++++++++++++--------------------
 1 file changed, 72 insertions(+), 69 deletions(-)

diff --git a/hbt/selection/lepton.py b/hbt/selection/lepton.py
index 5a22a61..69ba301 100644
--- a/hbt/selection/lepton.py
+++ b/hbt/selection/lepton.py
@@ -462,79 +462,82 @@ def lepton_selection(
         # lepton pair selecton per trigger via lepton counting
 
         if trigger.has_tag({"single_e", "cross_e_tau"}):
-            # expect 1 electron, 1 veto electron (the same one), 0 veto muons, and at least one tau
-            is_etau = (
-                trigger_fired &
-                (ak.num(electron_indices, axis=1) == 1) &
-                (ak.num(electron_veto_indices, axis=1) == 1) &
-                (ak.num(muon_veto_indices, axis=1) == 0) &
-                (ak.num(tau_indices, axis=1) >= 1)
-            )
-            is_iso = ak.sum(tau_iso_mask, axis=1) >= 1
-            # determine the os/ss charge sign relation
-            e_charge = ak.firsts(events.Electron[electron_indices].charge, axis=1)
-            tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
-            is_os = e_charge == -tau_charge
-            # store global variables
-            channel_id = update_channel_ids(events, channel_id, ch_etau.id, is_etau)
-            tau2_isolated = ak.where(is_etau, is_iso, tau2_isolated)
-            leptons_os = ak.where(is_etau, is_os, leptons_os)
-            single_triggered = ak.where(is_etau & is_single, True, single_triggered)
-            cross_triggered = ak.where(is_etau & is_cross, True, cross_triggered)
-            sel_electron_indices = ak.where(is_etau, electron_indices, sel_electron_indices)
-            sel_tau_indices = ak.where(is_etau, tau_indices, sel_tau_indices)
+            if (self.dataset_inst.is_mc) or ("data_e_" in self.dataset_inst.name):
+                # expect 1 electron, 1 veto electron (the same one), 0 veto muons, and at least one tau
+                is_etau = (
+                    trigger_fired &
+                    (ak.num(electron_indices, axis=1) == 1) &
+                    (ak.num(electron_veto_indices, axis=1) == 1) &
+                    (ak.num(muon_veto_indices, axis=1) == 0) &
+                    (ak.num(tau_indices, axis=1) >= 1)
+                )
+                is_iso = ak.sum(tau_iso_mask, axis=1) >= 1
+                # determine the os/ss charge sign relation
+                e_charge = ak.firsts(events.Electron[electron_indices].charge, axis=1)
+                tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
+                is_os = e_charge == -tau_charge
+                # store global variables
+                channel_id = update_channel_ids(events, channel_id, ch_etau.id, is_etau)
+                tau2_isolated = ak.where(is_etau, is_iso, tau2_isolated)
+                leptons_os = ak.where(is_etau, is_os, leptons_os)
+                single_triggered = ak.where(is_etau & is_single, True, single_triggered)
+                cross_triggered = ak.where(is_etau & is_cross, True, cross_triggered)
+                sel_electron_indices = ak.where(is_etau, electron_indices, sel_electron_indices)
+                sel_tau_indices = ak.where(is_etau, tau_indices, sel_tau_indices)
 
         elif trigger.has_tag({"single_mu", "cross_mu_tau"}):
-            # expect 1 muon, 1 veto muon (the same one), 0 veto electrons, and at least one tau
-            is_mutau = (
-                trigger_fired &
-                (ak.num(muon_indices, axis=1) == 1) &
-                (ak.num(muon_veto_indices, axis=1) == 1) &
-                (ak.num(electron_veto_indices, axis=1) == 0) &
-                (ak.num(tau_indices, axis=1) >= 1)
-            )
-            is_iso = ak.sum(tau_iso_mask, axis=1) >= 1
-            # determine the os/ss charge sign relation
-            mu_charge = ak.firsts(events.Muon[muon_indices].charge, axis=1)
-            tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
-            is_os = mu_charge == -tau_charge
-            # store global variables
-            channel_id = update_channel_ids(events, channel_id, ch_mutau.id, is_mutau)
-            tau2_isolated = ak.where(is_mutau, is_iso, tau2_isolated)
-            leptons_os = ak.where(is_mutau, is_os, leptons_os)
-            single_triggered = ak.where(is_mutau & is_single, True, single_triggered)
-            cross_triggered = ak.where(is_mutau & is_cross, True, cross_triggered)
-            sel_muon_indices = ak.where(is_mutau, muon_indices, sel_muon_indices)
-            sel_tau_indices = ak.where(is_mutau, tau_indices, sel_tau_indices)
+            if (self.dataset_inst.is_mc) or ("data_mu_" in self.dataset_inst.name):
+                # expect 1 muon, 1 veto muon (the same one), 0 veto electrons, and at least one tau
+                is_mutau = (
+                    trigger_fired &
+                    (ak.num(muon_indices, axis=1) == 1) &
+                    (ak.num(muon_veto_indices, axis=1) == 1) &
+                    (ak.num(electron_veto_indices, axis=1) == 0) &
+                    (ak.num(tau_indices, axis=1) >= 1)
+                )
+                is_iso = ak.sum(tau_iso_mask, axis=1) >= 1
+                # determine the os/ss charge sign relation
+                mu_charge = ak.firsts(events.Muon[muon_indices].charge, axis=1)
+                tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
+                is_os = mu_charge == -tau_charge
+                # store global variables
+                channel_id = update_channel_ids(events, channel_id, ch_mutau.id, is_mutau)
+                tau2_isolated = ak.where(is_mutau, is_iso, tau2_isolated)
+                leptons_os = ak.where(is_mutau, is_os, leptons_os)
+                single_triggered = ak.where(is_mutau & is_single, True, single_triggered)
+                cross_triggered = ak.where(is_mutau & is_cross, True, cross_triggered)
+                sel_muon_indices = ak.where(is_mutau, muon_indices, sel_muon_indices)
+                sel_tau_indices = ak.where(is_mutau, tau_indices, sel_tau_indices)
 
         elif trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf", "cross_tau_tau_jet"}):
-            # expect 0 veto electrons, 0 veto muons and at least two taus of which one is isolated
-            is_tautau = (
-                trigger_fired &
-                (ak.num(electron_veto_indices, axis=1) == 0) &
-                (ak.num(muon_veto_indices, axis=1) == 0) &
-                (ak.num(tau_indices, axis=1) >= 2) &
-                (ak.sum(tau_iso_mask, axis=1) >= 1)
-            )
-            # special case for cross tau vbf trigger:
-            # to avoid overlap, with non-vbf triggers, only one tau is allowed to have pt > 40
-            if trigger.has_tag("cross_tau_tau_vbf"):
-                is_tautau = is_tautau & (ak.sum(events.Tau[tau_indices].pt > 40, axis=1) <= 1)
-            is_iso = ak.sum(tau_iso_mask, axis=1) >= 2
-            # tau_indices are sorted by highest isolation as cond. 1 and highest pt as cond. 2, so
-            # the first two indices are exactly those selected by the full-blown pairing algorithm
-            # and there is no need here to apply it again :)
-            # determine the os/ss charge sign relation
-            tau1_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
-            tau2_charge = ak.firsts(events.Tau[tau_indices].charge[..., 1:], axis=1)
-            is_os = tau1_charge == -tau2_charge
-            # store global variables
-            channel_id = update_channel_ids(events, channel_id, ch_tautau.id, is_tautau)
-            tau2_isolated = ak.where(is_tautau, is_iso, tau2_isolated)
-            leptons_os = ak.where(is_tautau, is_os, leptons_os)
-            single_triggered = ak.where(is_tautau & is_single, True, single_triggered)
-            cross_triggered = ak.where(is_tautau & is_cross, True, cross_triggered)
-            sel_tau_indices = ak.where(is_tautau, tau_indices, sel_tau_indices)
+            if (self.dataset_inst.is_mc) or ("data_tau_" in self.dataset_inst.name):
+                # expect 0 veto electrons, 0 veto muons and at least two taus of which one is isolated
+                is_tautau = (
+                    trigger_fired &
+                    (ak.num(electron_veto_indices, axis=1) == 0) &
+                    (ak.num(muon_veto_indices, axis=1) == 0) &
+                    (ak.num(tau_indices, axis=1) >= 2) &
+                    (ak.sum(tau_iso_mask, axis=1) >= 1)
+                )
+                # special case for cross tau vbf trigger:
+                # to avoid overlap, with non-vbf triggers, only one tau is allowed to have pt > 40
+                if trigger.has_tag("cross_tau_tau_vbf"):
+                    is_tautau = is_tautau & (ak.sum(events.Tau[tau_indices].pt > 40, axis=1) <= 1)
+                is_iso = ak.sum(tau_iso_mask, axis=1) >= 2
+                # tau_indices are sorted by highest isolation as cond. 1 and highest pt as cond. 2, so
+                # the first two indices are exactly those selected by the full-blown pairing algorithm
+                # and there is no need here to apply it again :)
+                # determine the os/ss charge sign relation
+                tau1_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
+                tau2_charge = ak.firsts(events.Tau[tau_indices].charge[..., 1:], axis=1)
+                is_os = tau1_charge == -tau2_charge
+                # store global variables
+                channel_id = update_channel_ids(events, channel_id, ch_tautau.id, is_tautau)
+                tau2_isolated = ak.where(is_tautau, is_iso, tau2_isolated)
+                leptons_os = ak.where(is_tautau, is_os, leptons_os)
+                single_triggered = ak.where(is_tautau & is_single, True, single_triggered)
+                cross_triggered = ak.where(is_tautau & is_cross, True, cross_triggered)
+                sel_tau_indices = ak.where(is_tautau, tau_indices, sel_tau_indices)
 
     # some final type conversions
     channel_id = ak.values_astype(channel_id, np.uint8)

From 0d34199f56ce2e9fb62398a825227c8e22d27269 Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Tue, 26 Nov 2024 14:28:38 +0100
Subject: [PATCH 2/7] Update upstream cf.

---
 law.cfg                   | 3 ++-
 modules/columnflow        | 2 +-
 sandboxes/columnar_tf.txt | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/law.cfg b/law.cfg
index 7e80b77..365058a 100644
--- a/law.cfg
+++ b/law.cfg
@@ -38,7 +38,7 @@ job_file_dir_mkdtemp: sub_{{task_id}}_XXX
 [analysis]
 
 default_analysis: hbt.config.analysis_hbt.analysis_hbt
-default_config: run3_2022_postEE
+default_config: run3_2022_preEE
 default_dataset: hh_ggf_hbb_htt_kl1_kt1_powheg
 
 calibration_modules: columnflow.calibration.cms.{jets,met,tau}, hbt.calibration.{default,fake_triggers}
@@ -450,3 +450,4 @@ naf_wiedersb: 5000
 naf_prouvost: 5000
 naf_haddadan: 5000
 naf_nguyenth: 5000
+naf_wardrobe: 5000
diff --git a/modules/columnflow b/modules/columnflow
index 273b0ba..545a918 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit 273b0ba7049e4999b742910ab53a67ad94526233
+Subproject commit 545a918d94aa3ddb6c67f558d875023ec8c2308b
diff --git a/sandboxes/columnar_tf.txt b/sandboxes/columnar_tf.txt
index 869daec..7c7aab3 100644
--- a/sandboxes/columnar_tf.txt
+++ b/sandboxes/columnar_tf.txt
@@ -1,4 +1,4 @@
-# version 8
+# version 9
 
 -r ../modules/columnflow/sandboxes/columnar.txt
 

From afce61c07617331fb225b99b31c018174d1efc64 Mon Sep 17 00:00:00 2001
From: Bogdan-Wiederspan <79155113+Bogdan-Wiederspan@users.noreply.github.com>
Date: Wed, 27 Nov 2024 13:50:26 +0100
Subject: [PATCH 3/7] Add hash function to find duplicate events (#49)

* Add overlap checking task.

* add a simple hash function that calculates a unique number using 'event', 'luminisityBlock' and 'run' information of a nano AOD

* linting

* Completed the task to find overlapping events within our custom Nano AOD and the centralized created NANOAOD.

Due to different compressions between our and the central NANOAODs we need to find out which overlap both have and filter them out.
This task finds the events that overlap and savin the unique event identifier as tuple in a json.
This json also contains the relative number of overlap as information.

* make it more clear why the value is padded to this specific value

* rearrange order of fields to the one used in the hash

* add an assertion to check if unique identifier column exceed a specfic value, which is given by data (and may exceed in far future

* moved output of unique ovelap identifier from json to parquet file, use this file to create filter mask in sync csv task

* linting

* moved type cast to hash function, refactor names

* move imports into hash function

* linting and add maybe import for numpy

* overlapping events should not be chunk depending

* add new check so that people will not create an int64 overflow

* previous overlapping find compared chunkwise, but chunks are not always of same size. Changed this to a global comparison.

* Changed default value of file path to empty string, since None is resolved by law to `/None`

* Added case padding handling for array of dim < 2

Some array variables, e.g. like MET.covXX is of type numpy and not ListNumpy, meaning slicing is not possible.

* added more variables for the sync

* swapped order of the components for the algorith, to get more head room to the limit of uint64 numbers

---------

Co-authored-by: Marcel R. <github.riga@icloud.com>
Co-authored-by: Marcel Rieger <riga@users.noreply.github.com>
---
 hbt/util.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/hbt/util.py b/hbt/util.py
index 5b50c02..5df01c0 100644
--- a/hbt/util.py
+++ b/hbt/util.py
@@ -77,18 +77,18 @@ def assert_value(arr: np.ndarray, field: str, max_value: int) -> None:
         assert digits <= max_value, f"{field} digit count is {digits} and exceed max value {max_value}"
 
     max_digits_run = 6
-    max_digits_luminosityBlock = 5
+    max_digits_luminosityBlock = 6
     max_digits_event = 8
     assert_value(arr, "run", max_digits_run)
     assert_value(arr, "luminosityBlock", max_digits_luminosityBlock)
     assert_value(arr, "event", max_digits_event)
 
     max_digits_hash = max_digits_event + max_digits_luminosityBlock + max_digits_run
-    assert max_digits_hash <= 19, "sum of digits exceeds int64"
+    assert max_digits_hash <= 20, "sum of digits exceeds int64"
 
     # upcast to int64 to avoid overflow
     return (
-        ak.values_astype(arr.event, np.int64) * 10**(max_digits_luminosityBlock + max_digits_run) +
-        ak.values_astype(arr.luminosityBlock, np.int64) * 10**max_digits_run +
-        ak.values_astype(arr.run, np.int64)
+        ak.values_astype(arr.run, np.int64) * 10**(max_digits_luminosityBlock + max_digits_event) +
+        ak.values_astype(arr.luminosityBlock, np.int64) * 10**max_digits_event +
+        ak.values_astype(arr.event, np.int64)
     )

From dcfe264af561338fdece8aff707d68f338b6d16e Mon Sep 17 00:00:00 2001
From: Nathan Prouvost <nathan.prouvost@gmail.com>
Date: Wed, 27 Nov 2024 14:15:48 +0100
Subject: [PATCH 4/7] update cmsdb to add single muon dataset to run c 2022

---
 modules/cmsdb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/cmsdb b/modules/cmsdb
index 2731b76..0403c5f 160000
--- a/modules/cmsdb
+++ b/modules/cmsdb
@@ -1 +1 @@
-Subproject commit 2731b76dc166fb5cad377e38958fb3ae77caa51c
+Subproject commit 0403c5f981887b87f03e5d9673071629a1c1cec4

From 99f4ce0a32c784fa6f826a47a40a4987a023f57e Mon Sep 17 00:00:00 2001
From: "Marcel R." <github.riga@icloud.com>
Date: Wed, 27 Nov 2024 14:23:54 +0100
Subject: [PATCH 5/7] Use deterministic jer in default calibration.

---
 hbt/calibration/default.py | 9 ++++++---
 modules/columnflow         | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/hbt/calibration/default.py b/hbt/calibration/default.py
index 98ed60f..6f1b567 100644
--- a/hbt/calibration/default.py
+++ b/hbt/calibration/default.py
@@ -20,14 +20,17 @@
 # derive calibrators to add settings
 jec_full = jec.derive("jec_full", cls_dict={"mc_only": True, "nominal_only": True})
 
+# version of jer that uses the first random number from deterministic_seeds
+deterministic_jer = jer.derive("deterministic_jer", cls_dict={"deterministic_seed_index": 0})
+
 
 @calibrator(
     uses={
-        mc_weight, jec_nominal, jec_full, jer, tec_nominal, tec, deterministic_seeds,
+        mc_weight, deterministic_seeds, jec_nominal, jec_full, deterministic_jer, tec_nominal, tec,
         IF_RUN_2(met_phi),
     },
     produces={
-        mc_weight, jec_nominal, jec_full, jer, tec_nominal, tec, deterministic_seeds,
+        mc_weight, deterministic_seeds, jec_nominal, jec_full, deterministic_jer, tec_nominal, tec,
         IF_RUN_2(met_phi),
     },
 )
@@ -40,7 +43,7 @@ def default(self: Calibrator, events: ak.Array, **kwargs) -> ak.Array:
         events = self[jec_nominal](events, **kwargs)
     else:
         events = self[jec_full](events, **kwargs)
-        events = self[jer](events, **kwargs)
+        events = self[deterministic_jer](events, **kwargs)
 
     if self.config_inst.campaign.x.run == 2:
         events = self[met_phi](events, **kwargs)
diff --git a/modules/columnflow b/modules/columnflow
index 545a918..ea1e234 160000
--- a/modules/columnflow
+++ b/modules/columnflow
@@ -1 +1 @@
-Subproject commit 545a918d94aa3ddb6c67f558d875023ec8c2308b
+Subproject commit ea1e234b5adf0bb0ba3d9c889bae71e7f3fa9ded

From ee378760a9bbd3f8c2a6e8e27760e32efe5ca392 Mon Sep 17 00:00:00 2001
From: Nathan Prouvost <nathan.prouvost@gmail.com>
Date: Wed, 27 Nov 2024 18:29:07 +0100
Subject: [PATCH 6/7] correct double counting, add check for applies_to_dataset

---
 hbt/config/configs_hbt.py |   6 ++
 hbt/config/triggers.py    | 215 ++++++++++++--------------------------
 hbt/config/util.py        |   2 +
 hbt/selection/lepton.py   | 165 +++++++++++++++--------------
 4 files changed, 163 insertions(+), 225 deletions(-)

diff --git a/hbt/config/configs_hbt.py b/hbt/config/configs_hbt.py
index 5d15f62..500324d 100644
--- a/hbt/config/configs_hbt.py
+++ b/hbt/config/configs_hbt.py
@@ -333,6 +333,12 @@ def if_era(
         dataset = cfg.add_dataset(campaign.get_dataset(dataset_name))
 
         # add tags to datasets
+        if dataset.name.startswith("data_e_"):
+            dataset.add_tag({"etau", "emu"})
+        if dataset.name.startswith("data_mu_"):
+            dataset.add_tag({"mutau", "emu", "mumu"})
+        if dataset.name.startswith("data_tau_"):
+            dataset.add_tag({"tautau"})
         if dataset.name.startswith("tt_"):
             dataset.add_tag({"has_top", "ttbar", "tt"})
         if dataset.name.startswith("st_"):
diff --git a/hbt/config/triggers.py b/hbt/config/triggers.py
index 8f74848..a786376 100644
--- a/hbt/config/triggers.py
+++ b/hbt/config/triggers.py
@@ -809,23 +809,13 @@ def add_triggers_2022(config: od.Config) -> None:
                     trigger_bits=2,
                 ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("etau") or
+                dataset_inst.has_tag("emu")
+            ),
             tags={"single_trigger", "single_e", "channel_e_tau"},
         ),
-        # Currently disabled
-        # Trigger(
-        #     name="HLT_Ele35_WPTight_Gsf",
-        #     id=203,
-        #     legs=[
-        #         TriggerLeg(
-        #             pdg_id=11,
-        #             # min_pt=36.0,
-        #             # filter names:
-        #             # hltEle35noerWPTightGsfTrackIsoFilter (Single e WPTight)
-        #             trigger_bits=2,
-        #         ),
-        #     ],
-        #     tags={"single_trigger", "single_e", "channel_e_tau"},
-        # ),
 
         #
         # single muon
@@ -842,23 +832,14 @@ def add_triggers_2022(config: od.Config) -> None:
                     trigger_bits=2 + 8,
                 ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("mutau") or
+                dataset_inst.has_tag("emu") or
+                dataset_inst.has_tag("mumu")
+            ),
             tags={"single_trigger", "single_mu", "channel_mu_tau"},
         ),
-        # Currently disabled
-        # Trigger(
-        #     name="HLT_IsoMu27",
-        #     id=102,
-        #     legs=[
-        #         TriggerLeg(
-        #             pdg_id=13,
-        #             # min_pt=28.0,
-        #             # filter names:
-        #             # hltL3crIsoL1sMu22Or25L1f0L2f10QL3f27QL3trkIsoFiltered0p08 (1mu + Iso)
-        #             trigger_bits=2 + 8,
-        #         ),
-        #     ],
-        #     tags={"single_trigger", "single_mu", "channel_mu_tau"},
-        # ),
 
         #
         # e tauh
@@ -884,6 +865,11 @@ def add_triggers_2022(config: od.Config) -> None:
                     trigger_bits=8 + 32,
                 ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("etau") or
+                dataset_inst.has_tag("emu")
+            ),
             tags={"cross_trigger", "cross_e_tau", "channel_e_tau"},
         ),
 
@@ -911,6 +897,12 @@ def add_triggers_2022(config: od.Config) -> None:
                     trigger_bits=8 + 32,
                 ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("mutau") or
+                dataset_inst.has_tag("emu") or
+                dataset_inst.has_tag("mumu")
+            ),
             tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"},
         ),
 
@@ -936,52 +928,12 @@ def add_triggers_2022(config: od.Config) -> None:
                     trigger_bits=8 + 32,
                 ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("tautau")
+            ),
             tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
         ),
-        # Currently disabled
-        # Trigger(
-        #     name="HLT_DoubleMediumChargedIsoPFTauHPS40_Trk1_eta2p1",
-        #     id=506,
-        #     legs=[
-        #         TriggerLeg(
-        #             pdg_id=15,
-        #             # min_pt=45.0,
-        #             # filter names:
-        #             trigger_bits=2 + 32 + 64,
-        #         ),
-        #         TriggerLeg(
-        #             pdg_id=15,
-        #             # min_pt=45.0,
-        #             # filter names:
-        #             trigger_bits=2 + 32 + 64,
-        #         ),
-        #     ],
-        #     applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc or dataset_inst.x.era >= "E"),
-        #     tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
-        # ),
-
-        # Trigger(
-        #     name="HLT_DoubleMediumChargedIsoDisplacedPFTauHPS32_Trk1_eta2p1",
-        #     id=507,
-        #     legs=[
-        #         TriggerLeg(
-        #             pdg_id=15,
-        #             # min_pt=45.0,
-        #             # filter names:
-        #             # hltHpsDoubleMediumChargedIsoDisplPFTau32Dxy0p005
-        #             trigger_bits=2 + 32 + 64 + 32768,
-        #         ),
-        #         TriggerLeg(
-        #             pdg_id=15,
-        #             # min_pt=45.0,
-        #             # filter names:
-        #             # hltHpsDoubleMediumChargedIsoDisplPFTau32Dxy0p005
-        #             trigger_bits=2 + 32 + 64 + 32768,
-        #         ),
-        #     ],
-        #     applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data and dataset_inst.x.era < "E"),
-        #     tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
-        # ),
 
         #
         # vbf
@@ -1020,49 +972,13 @@ def add_triggers_2022(config: od.Config) -> None:
                 #     trigger_bits=1,
                 # ),
             ],
-            # applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data and config.has_tag("pre")),
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("tautau")
+            ),
             tags={"cross_trigger", "cross_tau_tau_vbf", "channel_tau_tau"},
         ),
 
-        # currently disabled
-        # Trigger(
-        #     name="HLT_VBF_DoubleLooseChargedIsoPFTauHPS20_Trk1_eta2p1",
-        #     id=602,
-        #     legs=[
-        #         TriggerLeg(
-        #             pdg_id=15,
-        #             # min_pt=25.0,
-        #             # filter names:
-        #             # hltMatchedVBFOnePFJet2CrossCleanedFromDoubleLooseChargedIsoPFTauHPS20 (LooseChargedIso + HPS + run 3 VBF+ditau) # noqa
-        #             # hltMatchedVBFTwoPFJets2CrossCleanedFromDoubleLooseChargedIsoPFTauHPS20
-        #             # hltHpsDoublePFTau20TrackLooseChargedIso
-        #             trigger_bits=1 + 32 + 4096,
-        #         ),
-        #         TriggerLeg(
-        #             pdg_id=15,
-        #             # min_pt=25.0,
-        #             # filter names:
-        #             # hltMatchedVBFOnePFJet2CrossCleanedFromDoubleLooseChargedIsoPFTauHPS20 (LooseChargedIso + HPS + run 3 VBF+ditau)  # noqa
-        #             # hltMatchedVBFTwoPFJets2CrossCleanedFromDoubleLooseChargedIsoPFTauHPS20
-        #             # hltHpsDoublePFTau20TrackLooseChargedIso
-        #             trigger_bits=1 + 32 + 4096,
-        #         ),
-        #         # additional leg infos for vbf jets
-        #         TriggerLeg(  # TODO
-        #             # min_pt=115.0,
-        #             # filter names:
-        #             trigger_bits=None,
-        #         ),
-        #         TriggerLeg(
-        #             # min_pt=40.0,
-        #             # filter names:
-        #             trigger_bits=None,
-        #         ),
-        #     ],
-        #     applies_to_dataset=(lambda dataset_inst: dataset_inst.is_mc and config.has_tag("pre")),
-        #     tags={"cross_trigger", "cross_tau_tau_vbf", "channel_tau_tau"},
-        # ),
-
         # Currently disabled since it may not be needed
         # Trigger(
         #     name="HLT_DoublePFJets40_Mass500_MediumDeepTauPFTauHPS45_L2NN_MediumDeepTauPFTauHPS20_eta2p1",
@@ -1112,40 +1028,12 @@ def add_triggers_2022(config: od.Config) -> None:
                 #     trigger_bits=1,
                 # ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("tautau")
+            ),
             tags={"cross_trigger", "cross_tau_tau_jet", "channel_tau_tau"},
         ),
-        # Currently disabled
-        # Trigger(
-        #     name="HLT_DoubleMediumDeepTauPFTauHPS30_L2NN_eta2p1_PFJet75",
-        #     id=702,
-        #     legs=[
-        #         TriggerLeg(
-        #             pdg_id=15,
-        #             # min_pt=35.0,
-        #             # filter names:
-        #             # hltHpsOverlapFilterDeepTauDoublePFTau30PFJet75 (Deeptau + HPS + di-tau + PFJet)
-        #             # hltHpsDoublePFTau30MediumDitauWPDeepTauL1HLTMatchedDoubleTauJet
-        #             # hlt1PFJet75L1HLTMatched
-        #             trigger_bits=8 + 32 + 16384,
-        #         ),
-        #         TriggerLeg(
-        #             pdg_id=15,
-        #             # min_pt=35.0,
-        #             # filter names:
-        #             # hltHpsOverlapFilterDeepTauDoublePFTau30PFJet75
-        #             # hltHpsDoublePFTau30MediumDitauWPDeepTauL1HLTMatchedDoubleTauJet
-        #             # hlt1PFJet75L1HLTMatched
-        #             trigger_bits=8 + 32 + 16384,
-        #         ),
-        #         TriggerLeg(
-        #             # min_pt=75.0,
-        #             # filter names:
-        #             # hltHpsOverlapFilterDeepTauDoublePFTau30PFJet75
-        #             trigger_bits=None,
-        #         ),
-        #     ],
-        #     tags={"cross_trigger", "cross_tau_tau_jet", "channel_tau_tau"},
-        # ),
     ])
 
 
@@ -1170,6 +1058,11 @@ def add_triggers_2023(config: od.Config) -> None:
                     trigger_bits=2,
                 ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("etau") or
+                dataset_inst.has_tag("emu")
+            ),
             tags={"single_trigger", "single_e", "channel_e_tau"},
         ),
 
@@ -1188,6 +1081,12 @@ def add_triggers_2023(config: od.Config) -> None:
                     trigger_bits=2 + 8,
                 ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("mutau") or
+                dataset_inst.has_tag("emu") or
+                dataset_inst.has_tag("mumu")
+            ),
             tags={"single_trigger", "single_mu", "channel_mu_tau"},
         ),
 
@@ -1215,6 +1114,11 @@ def add_triggers_2023(config: od.Config) -> None:
                     trigger_bits=8 + 32,
                 ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("etau") or
+                dataset_inst.has_tag("emu")
+            ),
             tags={"cross_trigger", "cross_e_tau", "channel_e_tau"},
         ),
 
@@ -1242,6 +1146,12 @@ def add_triggers_2023(config: od.Config) -> None:
                     trigger_bits=8 + 32,
                 ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("mutau") or
+                dataset_inst.has_tag("emu") or
+                dataset_inst.has_tag("mumu")
+            ),
             tags={"cross_trigger", "cross_mu_tau", "channel_mu_tau"},
         ),
 
@@ -1267,6 +1177,10 @@ def add_triggers_2023(config: od.Config) -> None:
                     trigger_bits=8 + 32,
                 ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("tautau")
+            ),
             tags={"cross_trigger", "cross_tau_tau", "channel_tau_tau"},
         ),
 
@@ -1297,7 +1211,10 @@ def add_triggers_2023(config: od.Config) -> None:
                     trigger_bits=8 + 32 + 4096,
                 ),
             ],
-            # applies_to_dataset=(lambda dataset_inst: dataset_inst.is_data and config.has_tag("pre")),
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("tautau")
+            ),
             tags={"cross_trigger", "cross_tau_tau_vbf", "channel_tau_tau"},
         ),
 
@@ -1324,6 +1241,10 @@ def add_triggers_2023(config: od.Config) -> None:
                     trigger_bits=16 + 16384,
                 ),
             ],
+            applies_to_dataset=(
+                lambda dataset_inst: dataset_inst.is_mc or
+                dataset_inst.has_tag("tautau")
+            ),
             tags={"cross_trigger", "cross_tau_tau_jet", "channel_tau_tau"},
         ),
     ])
diff --git a/hbt/config/util.py b/hbt/config/util.py
index d29b307..664e07c 100644
--- a/hbt/config/util.py
+++ b/hbt/config/util.py
@@ -215,6 +215,8 @@ def legs(
     @typed
     def applies_to_dataset(self, func: Callable | bool | Any) -> Callable:
         if not callable(func):
+            if func is not None:
+                raise TypeError(f"invalid applies_to_dataset: {func}")
             decision = True if func is None else bool(func)
             func = lambda dataset_inst: decision
 
diff --git a/hbt/selection/lepton.py b/hbt/selection/lepton.py
index ddb191e..594dad1 100644
--- a/hbt/selection/lepton.py
+++ b/hbt/selection/lepton.py
@@ -415,7 +415,7 @@ def lepton_selection(
     leptons_os = false_mask
     single_triggered = false_mask
     cross_triggered = false_mask
-    empty_indices = ak.zeros_like(1 * events.event, dtype=np.uint16)[..., None][..., :0]
+    empty_indices = events.Tau[:, :0].charge * 1  # ak.zeros_like(1 * events.event, dtype=np.uint16)[..., None][..., :0]
     sel_electron_indices = empty_indices
     sel_muon_indices = empty_indices
     sel_tau_indices = empty_indices
@@ -454,83 +454,90 @@ def lepton_selection(
 
         # lepton pair selecton per trigger via lepton counting
 
-        if trigger.has_tag({"single_e", "cross_e_tau"}):
-            if (self.dataset_inst.is_mc) or ("data_e_" in self.dataset_inst.name):
-                # expect 1 electron, 1 veto electron (the same one), 0 veto muons, and at least one tau
-                is_etau = (
-                    trigger_fired &
-                    (ak.num(electron_indices, axis=1) == 1) &
-                    (ak.num(electron_veto_indices, axis=1) == 1) &
-                    (ak.num(muon_veto_indices, axis=1) == 0) &
-                    (ak.num(tau_indices, axis=1) >= 1)
-                )
-                is_iso = ak.sum(tau_iso_mask, axis=1) >= 1
-                # determine the os/ss charge sign relation
-                e_charge = ak.firsts(events.Electron[electron_indices].charge, axis=1)
-                tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
-                is_os = e_charge == -tau_charge
-                # store global variables
-                channel_id = update_channel_ids(events, channel_id, ch_etau.id, is_etau)
-                tau2_isolated = ak.where(is_etau, is_iso, tau2_isolated)
-                leptons_os = ak.where(is_etau, is_os, leptons_os)
-                single_triggered = ak.where(is_etau & is_single, True, single_triggered)
-                cross_triggered = ak.where(is_etau & is_cross, True, cross_triggered)
-                sel_electron_indices = ak.where(is_etau, electron_indices, sel_electron_indices)
-                sel_tau_indices = ak.where(is_etau, tau_indices, sel_tau_indices)
-
-        elif trigger.has_tag({"single_mu", "cross_mu_tau"}):
-            if (self.dataset_inst.is_mc) or ("data_mu_" in self.dataset_inst.name):
-                # expect 1 muon, 1 veto muon (the same one), 0 veto electrons, and at least one tau
-                is_mutau = (
-                    trigger_fired &
-                    (ak.num(muon_indices, axis=1) == 1) &
-                    (ak.num(muon_veto_indices, axis=1) == 1) &
-                    (ak.num(electron_veto_indices, axis=1) == 0) &
-                    (ak.num(tau_indices, axis=1) >= 1)
-                )
-                is_iso = ak.sum(tau_iso_mask, axis=1) >= 1
-                # determine the os/ss charge sign relation
-                mu_charge = ak.firsts(events.Muon[muon_indices].charge, axis=1)
-                tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
-                is_os = mu_charge == -tau_charge
-                # store global variables
-                channel_id = update_channel_ids(events, channel_id, ch_mutau.id, is_mutau)
-                tau2_isolated = ak.where(is_mutau, is_iso, tau2_isolated)
-                leptons_os = ak.where(is_mutau, is_os, leptons_os)
-                single_triggered = ak.where(is_mutau & is_single, True, single_triggered)
-                cross_triggered = ak.where(is_mutau & is_cross, True, cross_triggered)
-                sel_muon_indices = ak.where(is_mutau, muon_indices, sel_muon_indices)
-                sel_tau_indices = ak.where(is_mutau, tau_indices, sel_tau_indices)
-
-        elif trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf", "cross_tau_tau_jet"}):
-            if (self.dataset_inst.is_mc) or ("data_tau_" in self.dataset_inst.name):
-                # expect 0 veto electrons, 0 veto muons and at least two taus of which one is isolated
-                is_tautau = (
-                    trigger_fired &
-                    (ak.num(electron_veto_indices, axis=1) == 0) &
-                    (ak.num(muon_veto_indices, axis=1) == 0) &
-                    (ak.num(tau_indices, axis=1) >= 2) &
-                    (ak.sum(tau_iso_mask, axis=1) >= 1)
-                )
-                # special case for cross tau vbf trigger:
-                # to avoid overlap, with non-vbf triggers, only one tau is allowed to have pt > 40
-                if trigger.has_tag("cross_tau_tau_vbf"):
-                    is_tautau = is_tautau & (ak.sum(events.Tau[tau_indices].pt > 40, axis=1) <= 1)
-                is_iso = ak.sum(tau_iso_mask, axis=1) >= 2
-                # tau_indices are sorted by highest isolation as cond. 1 and highest pt as cond. 2, so
-                # the first two indices are exactly those selected by the full-blown pairing algorithm
-                # and there is no need here to apply it again :)
-                # determine the os/ss charge sign relation
-                tau1_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
-                tau2_charge = ak.firsts(events.Tau[tau_indices].charge[..., 1:], axis=1)
-                is_os = tau1_charge == -tau2_charge
-                # store global variables
-                channel_id = update_channel_ids(events, channel_id, ch_tautau.id, is_tautau)
-                tau2_isolated = ak.where(is_tautau, is_iso, tau2_isolated)
-                leptons_os = ak.where(is_tautau, is_os, leptons_os)
-                single_triggered = ak.where(is_tautau & is_single, True, single_triggered)
-                cross_triggered = ak.where(is_tautau & is_cross, True, cross_triggered)
-                sel_tau_indices = ak.where(is_tautau, tau_indices, sel_tau_indices)
+        if trigger.has_tag({"single_e", "cross_e_tau"}) and (
+            self.dataset_inst.is_mc or
+            self.dataset_inst.has_tag("etau"),
+        ):
+            # expect 1 electron, 1 veto electron (the same one), 0 veto muons, and at least one tau
+            is_etau = (
+                trigger_fired &
+                (ak.num(electron_indices, axis=1) == 1) &
+                (ak.num(electron_veto_indices, axis=1) == 1) &
+                (ak.num(muon_veto_indices, axis=1) == 0) &
+                (ak.num(tau_indices, axis=1) >= 1)
+            )
+            is_iso = ak.sum(tau_iso_mask, axis=1) >= 1
+            # determine the os/ss charge sign relation
+            e_charge = ak.firsts(events.Electron[electron_indices].charge, axis=1)
+            tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
+            is_os = e_charge == -tau_charge
+            # store global variables
+            channel_id = update_channel_ids(events, channel_id, ch_etau.id, is_etau)
+            tau2_isolated = ak.where(is_etau, is_iso, tau2_isolated)
+            leptons_os = ak.where(is_etau, is_os, leptons_os)
+            single_triggered = ak.where(is_etau & is_single, True, single_triggered)
+            cross_triggered = ak.where(is_etau & is_cross, True, cross_triggered)
+            sel_electron_indices = ak.where(is_etau, electron_indices, sel_electron_indices)
+            sel_tau_indices = ak.where(is_etau, tau_indices, sel_tau_indices)
+
+        elif trigger.has_tag({"single_mu", "cross_mu_tau"}) and (
+            self.dataset_inst.is_mc or
+            self.dataset_inst.has_tag("mutau"),
+        ):
+            # expect 1 muon, 1 veto muon (the same one), 0 veto electrons, and at least one tau
+            is_mutau = (
+                trigger_fired &
+                (ak.num(muon_indices, axis=1) == 1) &
+                (ak.num(muon_veto_indices, axis=1) == 1) &
+                (ak.num(electron_veto_indices, axis=1) == 0) &
+                (ak.num(tau_indices, axis=1) >= 1)
+            )
+            is_iso = ak.sum(tau_iso_mask, axis=1) >= 1
+            # determine the os/ss charge sign relation
+            mu_charge = ak.firsts(events.Muon[muon_indices].charge, axis=1)
+            tau_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
+            is_os = mu_charge == -tau_charge
+            # store global variables
+            channel_id = update_channel_ids(events, channel_id, ch_mutau.id, is_mutau)
+            tau2_isolated = ak.where(is_mutau, is_iso, tau2_isolated)
+            leptons_os = ak.where(is_mutau, is_os, leptons_os)
+            single_triggered = ak.where(is_mutau & is_single, True, single_triggered)
+            cross_triggered = ak.where(is_mutau & is_cross, True, cross_triggered)
+            sel_muon_indices = ak.where(is_mutau, muon_indices, sel_muon_indices)
+            sel_tau_indices = ak.where(is_mutau, tau_indices, sel_tau_indices)
+
+        elif trigger.has_tag({"cross_tau_tau", "cross_tau_tau_vbf", "cross_tau_tau_jet"}) and (
+            self.dataset_inst.is_mc or
+            self.dataset_inst.has_tag("tautau"),
+        ):
+            # expect 0 veto electrons, 0 veto muons and at least two taus of which one is isolated
+            is_tautau = (
+                trigger_fired &
+                (ak.num(electron_veto_indices, axis=1) == 0) &
+                (ak.num(muon_veto_indices, axis=1) == 0) &
+                (ak.num(tau_indices, axis=1) >= 2) &
+                (ak.sum(tau_iso_mask, axis=1) >= 1)
+            )
+            # special case for cross tau vbf trigger:
+            # to avoid overlap, with non-vbf triggers, only one tau is allowed to have pt > 40
+            if trigger.has_tag("cross_tau_tau_vbf"):
+                is_tautau = is_tautau & (ak.sum(events.Tau[tau_indices].pt > 40, axis=1) <= 1)
+            is_iso = ak.sum(tau_iso_mask, axis=1) >= 2
+            # tau_indices are sorted by highest isolation as cond. 1 and highest pt as cond. 2, so
+            # the first two indices are exactly those selected by the full-blown pairing algorithm
+            # and there is no need here to apply it again :)
+            # determine the os/ss charge sign relation
+            tau1_charge = ak.firsts(events.Tau[tau_indices].charge, axis=1)
+            tau2_charge = ak.firsts(events.Tau[tau_indices].charge[..., 1:], axis=1)
+            is_os = tau1_charge == -tau2_charge
+            # store global variables
+            channel_id = update_channel_ids(events, channel_id, ch_tautau.id, is_tautau)
+            tau2_isolated = ak.where(is_tautau, is_iso, tau2_isolated)
+            leptons_os = ak.where(is_tautau, is_os, leptons_os)
+            single_triggered = ak.where(is_tautau & is_single, True, single_triggered)
+            cross_triggered = ak.where(is_tautau & is_cross, True, cross_triggered)
+            sel_tau_indices = ak.where(is_tautau, tau_indices, sel_tau_indices)
+        # add here additional channels emu and mumu
 
     # some final type conversions
     channel_id = ak.values_astype(channel_id, np.uint8)
@@ -546,6 +553,8 @@ def lepton_selection(
     events = set_ak_column(events, "single_triggered", single_triggered)
     events = set_ak_column(events, "cross_triggered", cross_triggered)
 
+    from IPython import embed; embed(header="lepton_selection")
+
     return events, SelectionResult(
         steps={
             "lepton": channel_id != 0,

From 716e0bc5d54bc70b63b281e44b18bdaa29e1458b Mon Sep 17 00:00:00 2001
From: Nathan Prouvost <nathan.prouvost@gmail.com>
Date: Wed, 27 Nov 2024 18:31:39 +0100
Subject: [PATCH 7/7] remove IPython shell

---
 hbt/selection/lepton.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/hbt/selection/lepton.py b/hbt/selection/lepton.py
index 594dad1..06b9343 100644
--- a/hbt/selection/lepton.py
+++ b/hbt/selection/lepton.py
@@ -553,8 +553,6 @@ def lepton_selection(
     events = set_ak_column(events, "single_triggered", single_triggered)
     events = set_ak_column(events, "cross_triggered", cross_triggered)
 
-    from IPython import embed; embed(header="lepton_selection")
-
     return events, SelectionResult(
         steps={
             "lepton": channel_id != 0,