Skip to content

Commit

Permalink
Merge pull request #23 from uhh-cms/feature/get_das_info_top_variations
Browse files Browse the repository at this point in the history
add function to convert dataset information for top datasets
  • Loading branch information
dsavoiu authored Feb 13, 2024
2 parents 89cd288 + 4611f3d commit 04a894c
Showing 1 changed file with 113 additions and 6 deletions.
119 changes: 113 additions & 6 deletions scripts/get_das_info.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding: utf-8

# USAGE: python GetDASinfo.py -d das_string
# USAGE: python get_das_info.py -d das_string
# e.g. /JetHT/Run2018C-UL2018_MiniAODv2_JMENanoAODv9-v1/NANOAOD

from __future__ import annotations
Expand All @@ -12,7 +12,10 @@
import law


def convert_to_desired_structure(data: dict) -> str:
def convert_default(data: dict) -> str:
"""
Function that converts dataset info into one order Dataset per query
"""
return f"""cpn.add_dataset(
name="PLACEHOLDER",
id={data['dataset_id']},
Expand All @@ -22,10 +25,107 @@ def convert_to_desired_structure(data: dict) -> str:
],
n_files={data['nfiles']},
n_events={data['nevents']},
)
"""


identifier_map = {
"_TuneCP5Down_": "tune_down",
"_TuneCP5Up_": "tune_up",
"_TuneCP5CR1_": "cr_1",
"_TuneCP5CR2_": "cr_2",
"_Hdamp-158_": "hdamp_down",
"_Hdamp-418_": "hdamp_up",
"_MT-171p5_": "mtop_down",
"_MT-173p5_": "mtop_up",
# dataset types that I have no use for but want to keep anyways
"_MT-166p5_": "comment",
"_MT-169p5_": "comment",
"_MT-175p5_": "comment",
"_MT-178p5_": "comment",
"_DS_TuneCP5_": "comment",
"_TuneCP5_ERDOn_": "comment",
"_TuneCH3_": "comment",
# dataset types that I want to skip completely
# "example_key": "ignore",
# nominal entry as the last one such that other dataset types get priority
"_TuneCP5_": "nominal",
}


def convert_top(data: dict) -> str:
"""
Function that converts dataset info into either an order Datset for nominal datasets
or to a DatasetInfo for variations of datasets such as tune or mtop.
Exemplary usage:
python get_das_info.py -c top -d "/TTtoLNu2Q*/Run3Summer22EENanoAODv12-130X_*/NANOAODSIM"
"""
dataset_type = None

for identifier in identifier_map:
if identifier in data["name"]:
dataset_type = identifier_map[identifier]
break

if not dataset_type:
return f"""
#####
#####ERROR! Did not manage to determine type of dataset {data['name']}
#####
"""

if dataset_type == "nominal":
return f"""cpn.add_dataset(
name="PLACEHOLDER",
id={data['dataset_id']},
processes=[procs.PLACEHOLDER],
info=dict(
nominal=DatasetInfo(
keys=[
"{data['name']}", # noqa
],
n_files={data['nfiles']},
n_events={data['nevents']},
),
),
)"""
elif dataset_type == "comment":
# comment out this dataset
return f""" # {identifier}=DatasetInfo(
# keys=[
# "{data['name']}", # noqa
# ],
# n_files={data['nfiles']},
# n_events={data['nevents']},
# ),"""
elif dataset_type == "ignore":
return ""
else:
# some known variation of the dataset
return f""" {dataset_type}=DatasetInfo(
keys=[
"{data['name']}", # noqa
],
n_files={data['nfiles']},
n_events={data['nevents']},
),"""


convert_functions = {
"default": convert_default,
"top": convert_top,
}


def print_das_info(
das_strings: list[str],
keys_of_interest: tuple | None = None,
convert_function_str: str | None = None,
):
# get the requested convert function
convert_function = convert_functions[convert_function_str]

def print_das_info(das_strings: list[str], keys_of_interest: tuple | None = None):
for das_string in das_strings:
# set default keys of interest
keys_of_interest = keys_of_interest or (
Expand Down Expand Up @@ -76,13 +176,20 @@ def print_das_info(das_strings: list[str], keys_of_interest: tuple | None = None
info_of_interest["nfiles"] = dataset_info.get("nfiles", "")
info_of_interest["nevents"] = dataset_info.get("nevents", "")

desired_output = convert_to_desired_structure(info_of_interest)
desired_output = convert_function(info_of_interest)
print(desired_output)
print()


if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("-d", "--dataset", dest="dataset", nargs="+", help="das name")
parser.add_argument(
"-c",
"--convert",
dest="convert",
help="function that converts info into code",
default="default",
choices=list(convert_functions),
)
args = parser.parse_args()
print_das_info(args.dataset)
print_das_info(args.dataset, convert_function_str=args.convert)

0 comments on commit 04a894c

Please sign in to comment.