From c598081ddd0f5b47f326a3bad3f741262f59714d Mon Sep 17 00:00:00 2001
From: Shashank Reddy Boyapally <sboyapal@redhat.com>
Date: Fri, 9 Feb 2024 14:46:16 -0500
Subject: [PATCH] hunter integration and refactor (#11)

* hunter integration and refactor

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>

* updated version windows for requirements and updated pylint workflow

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>

* added compatibilty notes to Readme

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>

* reduced requirements.txt

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>

* pylinting only 3.11

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>

---------

Signed-off-by: Shashank Reddy Boyapally <sboyapal@redhat.com>
---
 .github/workflows/pylint.yml |   4 +-
 README.md                    |   8 ++
 orion.py                     | 131 +++++-----------------------
 requirements.txt             |   5 +-
 setup.py                     |   4 +-
 utils/__init__.py            |   0
 utils/orion_funcs.py         | 164 +++++++++++++++++++++++++++++++++++
 7 files changed, 200 insertions(+), 116 deletions(-)
 create mode 100644 utils/__init__.py
 create mode 100644 utils/orion_funcs.py

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 9ded563..d8c23ef 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.11"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}
@@ -18,6 +18,8 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pylint
+        pip install -r requirements.txt
+        pip install .
     - name: Analysing the code with pylint
       run: |
         pylint -d C0103 $(git ls-files '*.py')
\ No newline at end of file
diff --git a/README.md b/README.md
index 24d81fa..986e8d4 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,10 @@ tests :
 ## Build Orion
 Building Orion is a straightforward process. Follow these commands:
 
+**Note: Orion Compatibility**
+
+Orion currently supports Python versions `3.8.x`, `3.9.x`, `3.10.x`, and `3.11.x`. Please be aware that using other Python versions might lead to dependency conflicts caused by hunter, creating a challenging situation known as "dependency hell." It's crucial to highlight that Python `3.12.x` may result in errors due to the removal of distutils, a dependency used by numpy. This information is essential to ensure a smooth experience with Orion and avoid potential compatibility issues.
+
 Clone the current repository using git clone.
 
 ```
@@ -84,8 +88,12 @@ Orion provides flexibility in configuring its behavior by allowing users to set
 
 For enhanced troubleshooting and debugging, Orion supports the ```--debug``` flag, enabling the generation of detailed debug logs. 
 
+Activate Orion's regression detection tool for performance-scale CPT runs effortlessly with the ```--hunter-analyze``` command. This seamlessly integrates with metadata and hunter, ensuring a robust and efficient regression detection process.
+
 Additionally, users can specify a custom path for the output CSV file using the ```--output``` flag, providing control over the location where the generated CSV will be stored.
 
+
+
 Orion's seamless integration with metadata and hunter ensures a robust regression detection tool for perf-scale CPT runs.
 
 
diff --git a/orion.py b/orion.py
index 891d0bd..b0e24c3 100644
--- a/orion.py
+++ b/orion.py
@@ -8,9 +8,11 @@
 import os
 
 import click
-import yaml
 import pandas as pd
+
 from fmatch.matcher import Matcher
+from utils.orion_funcs import run_hunter_analyze, get_metadata, \
+                                set_logging, load_config, get_metric_data
 
 
 @click.group()
@@ -24,7 +26,8 @@ def cli():
 @click.option("--config", default="config.yaml", help="Path to the configuration file")
 @click.option("--output", default="output.csv", help="Path to save the output csv file")
 @click.option("--debug", is_flag=True, help="log level ")
-def orion(config, debug, output):
+@click.option("--hunter-analyze",is_flag=True, help="run hunter analyze")
+def orion(config, debug, output,hunter_analyze):
     """Orion is the cli tool to detect regressions over the runs
 
     Args:
@@ -35,25 +38,22 @@ def orion(config, debug, output):
     level = logging.DEBUG if debug else logging.INFO
     logger = logging.getLogger("Orion")
     logger = set_logging(level, logger)
+    data = load_config(config,logger)
+    ES_URL=None
+
+    if "ES_SERVER" in data.keys():
+        ES_URL = data['ES_SERVER']
+    else:
+        if 'ES_SERVER' in os.environ:
+            ES_URL=os.environ.get("ES_SERVER")
+        else:
+            logger.error("ES_SERVER environment variable/config variable not set")
+            sys.exit(1)
 
-    if "ES_SERVER" not in os.environ:
-        logger.error("ES_SERVER environment variable not set")
-        sys.exit(1)
-
-    try:
-        with open(config, "r", encoding="utf-8") as file:
-            data = yaml.safe_load(file)
-            logger.debug("The %s file has successfully loaded", config)
-    except FileNotFoundError as e:
-        logger.error("Config file not found: %s", e)
-        sys.exit(1)
-    except Exception as e:  # pylint: disable=broad-exception-caught
-        logger.error("An error occurred: %s", e)
-        sys.exit(1)
     for test in data["tests"]:
         metadata = get_metadata(test, logger)
         logger.info("The test %s has started", test["name"])
-        match = Matcher(index="perf_scale_ci", level=level)
+        match = Matcher(index="perf_scale_ci", level=level, ES_URL=ES_URL)
         uuids = match.get_uuid_by_metadata(metadata)
         if len(uuids) == 0:
             print("No UUID present for given metadata")
@@ -77,103 +77,12 @@ def orion(config, debug, output):
             lambda left, right: pd.merge(left, right, on="uuid", how="inner"),
             dataframe_list,
         )
-        match.save_results(merged_df, csv_file_path=output)
-
-
-def get_metric_data(ids, index, metrics, match, logger):
-    """Gets details metrics basked on metric yaml list
+        match.save_results(merged_df, csv_file_path=output.split(".")[0]+"-"+test['name']+".csv")
 
-    Args:
-        ids (list): list of all uuids
-        index (dict): index in es of where to find data
-        metrics (dict): metrics to gather data on
-        match (Matcher): current matcher instance
-        logger (logger): log data to one output
-
-    Returns:
-        dataframe_list: dataframe of the all metrics
-    """
-    dataframe_list = []
-    for metric in metrics:
-        metric_name = metric['name']
-        logger.info("Collecting %s", metric_name)
-        metric_of_interest = metric['metric_of_interest']
-
-        if "agg" in metric.keys():
-            try:
-                cpu = match.get_agg_metric_query(
-                    ids, index, metric
-                )
-                agg_value = metric['agg']['value']
-                agg_type = metric['agg']['agg_type']
-                agg_name = agg_value + "_" + agg_type
-                cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name])
-                cpu_df = cpu_df.rename(
-                    columns={agg_name: metric_name+ "_" +  agg_name}
-                )
-                dataframe_list.append(cpu_df)
-                logger.debug(cpu_df)
-
-            except Exception as e:  # pylint: disable=broad-exception-caught
-                logger.error(
-                    "Couldn't get agg metrics %s, exception %s",
-                    metric_name,
-                    e,
-                )
-        else:
-            try:
-                podl = match.getResults("", ids, index, metric)
-                podl_df = match.convert_to_df(
-                    podl, columns=["uuid", "timestamp", metric_of_interest]
-                )
-                dataframe_list.append(podl_df)
-                logger.debug(podl_df)
-            except Exception as e:  # pylint: disable=broad-exception-caught
-                logger.error(
-                    "Couldn't get metrics %s, exception %s",
-                    metric_name,
-                    e,
-                )
-    return dataframe_list
-
-def get_metadata(test,logger):
-    """Gets metadata of the run from each test
+        if hunter_analyze:
+            run_hunter_analyze(merged_df,test)
 
-    Args:
-        test (dict): test dictionary
 
-    Returns:
-        dict: dictionary of the metadata
-    """
-    metadata = {}
-    for k,v in test.items():
-        if k in ["metrics","name"]:
-            continue
-        metadata[k] = v
-    metadata["ocpVersion"] = str(metadata["ocpVersion"])
-    logger.debug('metadata' + str(metadata))
-    return metadata
-
-
-def set_logging(level, logger):
-    """sets log level and format
-
-    Args:
-        level (_type_): level of the log
-        logger (_type_): logger object
-
-    Returns:
-        logging.Logger: a formatted and level set logger
-    """
-    logger.setLevel(level)
-    handler = logging.StreamHandler(sys.stdout)
-    handler.setLevel(level)
-    formatter = logging.Formatter(
-        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-    )
-    handler.setFormatter(formatter)
-    logger.addHandler(handler)
-    return logger
 
 
 if __name__ == "__main__":
diff --git a/requirements.txt b/requirements.txt
index 218fe88..0c7d487 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,13 @@
+hunter @ git+https://github.com/datastax-labs/hunter.git@8ff166979d000780ad548e49f006ef2a15d54123
 certifi==2023.11.17
 click==8.1.7
 elastic-transport==8.11.0
 elasticsearch==8.11.1
 elasticsearch7==7.13.0
 fmatch==0.0.4
-numpy==1.26.3
-pandas==2.1.4
 python-dateutil==2.8.2
 pytz==2023.3.post1
 PyYAML==6.0.1
 six==1.16.0
 tzdata==2023.4
-urllib3==1.26.18
+urllib3==1.26.18
\ No newline at end of file
diff --git a/setup.py b/setup.py
index be07410..52fdafc 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 """
 setup.py for orion cli tool
 """
-from setuptools import setup
+from setuptools import setup, find_packages
 
 setup(
     name='orion',
@@ -17,6 +17,8 @@
             'orion = orion:orion',
         ],
     },
+    packages=find_packages(),
+    package_data={'utils': ['utils.py'],'hunter': ['*.py']},
     classifiers=[
         'Programming Language :: Python :: 3',
         'License :: OSI Approved :: MIT License',
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/orion_funcs.py b/utils/orion_funcs.py
new file mode 100644
index 0000000..f0fdd4b
--- /dev/null
+++ b/utils/orion_funcs.py
@@ -0,0 +1,164 @@
+# pylint: disable=cyclic-import
+"""
+module for all utility functions orion uses
+"""
+# pylint: disable = import-error
+
+import logging
+import sys
+
+import yaml
+import pandas as pd
+
+from hunter.report import Report, ReportType
+from hunter.series import Metric, Series
+
+
+def run_hunter_analyze(merged_df,test):
+    """Start hunter analyze function
+
+    Args:
+        merged_df (Dataframe): merged dataframe of all the metrics
+        test (dict): test dictionary with the each test information
+    """
+    merged_df["timestamp"] = pd.to_datetime(merged_df["timestamp"])
+    merged_df["timestamp"] = merged_df["timestamp"].astype(int) // 10**9
+    metrics = {column: Metric(1, 1.0)
+               for column in merged_df.columns
+               if column not in ["uuid","timestamp"]}
+    data = {column: merged_df[column]
+            for column in merged_df.columns
+            if column not in ["uuid","timestamp"]}
+    attributes={column: merged_df[column] for column in merged_df.columns if column in ["uuid"]}
+    series=Series(
+        test_name=test["name"],
+        branch=None,
+        time=list(merged_df["timestamp"]),
+        metrics=metrics,
+        data=data,
+        attributes=attributes
+    )
+    change_points=series.analyze().change_points_by_time
+    report=Report(series,change_points)
+    output = report.produce_report(test_name="test",report_type=ReportType.LOG)
+    print(output)
+
+# pylint: disable=too-many-locals
+def get_metric_data(ids, index, metrics, match, logger):
+    """Gets details metrics basked on metric yaml list
+
+    Args:
+        ids (list): list of all uuids
+        index (dict): index in es of where to find data
+        metrics (dict): metrics to gather data on
+        match (Matcher): current matcher instance
+        logger (logger): log data to one output
+
+    Returns:
+        dataframe_list: dataframe of the all metrics
+    """
+    dataframe_list = []
+    for metric in metrics:
+        metric_name = metric['name']
+        logger.info("Collecting %s", metric_name)
+        metric_of_interest = metric['metric_of_interest']
+
+        if "agg" in metric.keys():
+            try:
+                cpu = match.get_agg_metric_query(
+                    ids, index, metric
+                )
+                agg_value = metric['agg']['value']
+                agg_type = metric['agg']['agg_type']
+                agg_name = agg_value + "_" + agg_type
+                cpu_df = match.convert_to_df(cpu, columns=["uuid", agg_name])
+                cpu_df = cpu_df.rename(
+                    columns={agg_name: metric_name+ "_" +  agg_name}
+                )
+                dataframe_list.append(cpu_df)
+                logger.debug(cpu_df)
+
+            except Exception as e:  # pylint: disable=broad-exception-caught
+                logger.error(
+                    "Couldn't get agg metrics %s, exception %s",
+                    metric_name,
+                    e,
+                )
+        else:
+            try:
+                podl = match.getResults("", ids, index, metric)
+                podl_df = match.convert_to_df(
+                    podl, columns=["uuid", "timestamp", metric_of_interest]
+                )
+                dataframe_list.append(podl_df)
+                logger.debug(podl_df)
+            except Exception as e:  # pylint: disable=broad-exception-caught
+                logger.error(
+                    "Couldn't get metrics %s, exception %s",
+                    metric_name,
+                    e,
+                )
+    return dataframe_list
+
+
+def get_metadata(test,logger):
+    """Gets metadata of the run from each test
+
+    Args:
+        test (dict): test dictionary
+
+    Returns:
+        dict: dictionary of the metadata
+    """
+    metadata = {}
+    for k,v in test.items():
+        if k in ["metrics","name"]:
+            continue
+        metadata[k] = v
+    metadata["ocpVersion"] = str(metadata["ocpVersion"])
+    logger.debug('metadata' + str(metadata))
+    return metadata
+
+
+
+def set_logging(level, logger):
+    """sets log level and format
+
+    Args:
+        level (_type_): level of the log
+        logger (_type_): logger object
+
+    Returns:
+        logging.Logger: a formatted and level set logger
+    """
+    logger.setLevel(level)
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setLevel(level)
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    return logger
+
+def load_config(config,logger):
+    """Loads config file
+
+    Args:
+        config (str): path to config file
+        logger (Logger): logger
+
+    Returns:
+        dict: dictionary of the config file
+    """
+    try:
+        with open(config, "r", encoding="utf-8") as file:
+            data = yaml.safe_load(file)
+            logger.debug("The %s file has successfully loaded", config)
+    except FileNotFoundError as e:
+        logger.error("Config file not found: %s", e)
+        sys.exit(1)
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        logger.error("An error occurred: %s", e)
+        sys.exit(1)
+    return data