Skip to content

Commit d690d5b

Browse files
refactored code
Signed-off-by: Shashank Reddy Boyapally <[email protected]>
1 parent 3bade98 commit d690d5b

File tree

14 files changed

+501
-488
lines changed

14 files changed

+501
-488
lines changed

orion.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
import logging
77
import sys
88
import warnings
9+
from typing import Any
910
import click
1011
import uvicorn
1112
from fmatch.logrus import SingletonLogger
1213
from pkg.runTest import run
1314
from pkg.utils import load_config
1415
import pkg.constants as cnsts
15-
from typing import Any
1616

1717
warnings.filterwarnings("ignore", message="Unverified HTTPS request.*")
1818
warnings.filterwarnings(
@@ -104,12 +104,8 @@ def cli(max_content_width=120): # pylint: disable=unused-argument
104104
)
105105
@click.option("--lookback", help="Get data from last X days and Y hours. Format in XdYh")
106106
@click.option("--convert-tinyurl", is_flag=True, help="Convert buildUrls to tiny url format for better formatting")
107-
<<<<<<< HEAD
108107
@click.option("--collapse", is_flag=True, help="Only outputs changepoints, previous and later runs in the xml format")
109108
def cmd_analysis(**kwargs):
110-
=======
111-
def cmd_analysis(**kwargs: dict[str, dict]) -> None:
112-
>>>>>>> 3488c73 (added typing)
113109
"""
114110
Orion runs on command line mode, and helps in detecting regressions
115111
"""

pkg/algorithm.py

Lines changed: 0 additions & 49 deletions
This file was deleted.

pkg/algorithms/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"""
2+
Init for pkg module
3+
"""
4+
5+
from .edivisive.edivisive import EDivisive
6+
from .isolationforest.isolationForest import IsolationForestWeightedMean
7+
from .algorithmFactory import AlgorithmFactory
8+
from .algorithm import Algorithm

pkg/algorithms/algorithm.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
"""Module for Generic Algorithm class"""
2+
3+
from abc import ABC, abstractmethod
4+
from itertools import groupby
5+
import json
6+
from typing import Any, Dict, List, Tuple, Union
7+
import pandas as pd
8+
from fmatch.matcher import Matcher
9+
from hunter.report import Report, ReportType
10+
from hunter.series import Series, Metric, ChangePoint, ChangePointGroup
11+
import pkg.constants as cnsts
12+
13+
14+
from pkg.utils import json_to_junit
15+
16+
17+
class Algorithm(ABC):
18+
"""Generic Algorithm class for algorithm factory"""
19+
20+
def __init__( # pylint: disable = too-many-arguments
21+
self,
22+
matcher: Matcher,
23+
dataframe: pd.DataFrame,
24+
test: dict,
25+
options: dict,
26+
metrics_config: dict[str, dict],
27+
) -> None:
28+
self.matcher = matcher
29+
self.dataframe = dataframe
30+
self.test = test
31+
self.options = options
32+
self.metrics_config = metrics_config
33+
34+
def output_json(self) -> Tuple[str, str]:
35+
"""Method to output json output
36+
37+
Returns:
38+
Tuple[str, str]: returns test_name and json output
39+
"""
40+
_, change_points_by_metric = self._analyze()
41+
dataframe_json = self.dataframe.to_json(orient="records")
42+
dataframe_json = json.loads(dataframe_json)
43+
44+
for index, entry in enumerate(dataframe_json):
45+
entry["metrics"] = {
46+
key: {"value": entry.pop(key), "percentage_change": 0}
47+
for key in self.metrics_config
48+
}
49+
entry["is_changepoint"] = False
50+
51+
for key, value in change_points_by_metric.items():
52+
for change_point in value:
53+
index = change_point.index
54+
percentage_change = (
55+
(change_point.stats.mean_2 - change_point.stats.mean_1)
56+
/ change_point.stats.mean_1
57+
) * 100
58+
if (
59+
percentage_change * self.metrics_config[key]["direction"] > 0
60+
or self.metrics_config[key]["direction"] == 0
61+
):
62+
dataframe_json[index]["metrics"][key][
63+
"percentage_change"
64+
] = percentage_change
65+
dataframe_json[index]["is_changepoint"] = True
66+
67+
return self.test["name"], json.dumps(dataframe_json, indent=2)
68+
69+
def output_text(self) -> Tuple[str,str]:
70+
"""Outputs the data in text/tabular format"""
71+
series, change_points_by_metric = self._analyze()
72+
change_points_by_time = self.group_change_points_by_time(
73+
series, change_points_by_metric
74+
)
75+
report = Report(series, change_points_by_time)
76+
output_table = report.produce_report(
77+
test_name=self.test["name"], report_type=ReportType.LOG
78+
)
79+
return self.test["name"], output_table
80+
81+
def output_junit(self) -> Tuple[str,str]:
82+
"""Output junit format
83+
84+
Returns:
85+
_type_: return
86+
"""
87+
test_name, data_json = self.output_json()
88+
data_json = json.loads(data_json)
89+
data_junit = json_to_junit(
90+
test_name=test_name,
91+
data_json=data_json,
92+
metrics_config=self.metrics_config,
93+
options=self.options,
94+
)
95+
return test_name, data_junit
96+
97+
@abstractmethod
98+
def _analyze(self):
99+
"""Analyze algorithm"""
100+
101+
def group_change_points_by_time(
102+
self, series: Series, change_points: Dict[str, List[ChangePoint]]
103+
) -> List[ChangePointGroup]:
104+
"""Return changepoint by time
105+
106+
Args:
107+
series (Series): Series of data
108+
change_points (Dict[str, List[ChangePoint]]): Group of changepoints wrt time
109+
110+
Returns:
111+
List[ChangePointGroup]: _description_
112+
"""
113+
changes: List[ChangePoint] = []
114+
for metric in change_points.keys():
115+
changes += change_points[metric]
116+
117+
changes.sort(key=lambda c: c.index)
118+
points = []
119+
for k, g in groupby(changes, key=lambda c: c.index):
120+
cp = ChangePointGroup(
121+
index=k,
122+
time=series.time[k],
123+
prev_time=series.time[k - 1],
124+
attributes=series.attributes_at(k),
125+
prev_attributes=series.attributes_at(k - 1),
126+
changes=list(g),
127+
)
128+
points.append(cp)
129+
130+
return points
131+
132+
def setup_series(self) -> Series:
133+
"""
134+
Returns series
135+
Returns:
136+
_type_: _description_
137+
"""
138+
metrics = {
139+
column: Metric(value.get("direction", 1), 1.0)
140+
for column, value in self.metrics_config.items()
141+
}
142+
data = {column: self.dataframe[column] for column in self.metrics_config}
143+
attributes = {
144+
column: self.dataframe[column]
145+
for column in self.dataframe.columns
146+
if column in ["uuid", "buildUrl"]
147+
}
148+
series = Series(
149+
test_name=self.test["name"],
150+
branch=None,
151+
time=list(self.dataframe["timestamp"]),
152+
metrics=metrics,
153+
data=data,
154+
attributes=attributes,
155+
)
156+
157+
return series
158+
159+
def output(self, output_format) -> Union[Any,None]:
160+
"""Method to select output method
161+
162+
Args:
163+
output_format (str): format of the output
164+
165+
Raises:
166+
ValueError: In case of unmatched output
167+
168+
Returns:
169+
method: return method to be used
170+
"""
171+
if output_format == cnsts.JSON:
172+
return self.output_json()
173+
if output_format == cnsts.TEXT:
174+
return self.output_text()
175+
if output_format == cnsts.JUNIT:
176+
return self.output_junit()
177+
raise ValueError("Unsupported output format {output_format} selected")

pkg/algorithmFactory.py renamed to pkg/algorithms/algorithmFactory.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
"""
22
Algorithm Factory to choose avaiable algorithms
33
"""
4-
from pkg.edivisive import EDivisive
5-
from pkg.isolationForest import IsolationForestWeightedMean
6-
import pkg.constants as cnsts
74
from fmatch.matcher import Matcher
85
import pandas as pd
9-
class AlgorithmFactory: # pylint: disable= too-few-public-methods, too-many-arguments
6+
import pkg.constants as cnsts
7+
from .edivisive import EDivisive
8+
from .isolationforest import IsolationForestWeightedMean
9+
10+
11+
class AlgorithmFactory: # pylint: disable= too-few-public-methods, too-many-arguments, line-too-long
1012
"""Algorithm Factory to choose algorithm
1113
"""
1214
def instantiate_algorithm(self, algorithm: str, matcher: Matcher, dataframe:pd.DataFrame, test: dict, options: dict, metrics_config: dict[str,dict]):
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""
2+
Init for E-Divisive Algorithm
3+
"""
4+
from .edivisive import EDivisive
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""EDivisive Algorithm from hunter"""
2+
3+
# pylint: disable = line-too-long
4+
import pandas as pd
5+
from pkg.algorithms.algorithm import Algorithm
6+
7+
8+
class EDivisive(Algorithm):
9+
"""Implementation of the EDivisive algorithm using hunter
10+
11+
Args:
12+
Algorithm (Algorithm): Inherits
13+
"""
14+
15+
16+
def _analyze(self):
17+
self.dataframe["timestamp"] = pd.to_datetime(self.dataframe["timestamp"])
18+
self.dataframe["timestamp"] = self.dataframe["timestamp"].astype(int) // 10**9
19+
series= self.setup_series()
20+
change_points_by_metric = series.analyze().change_points
21+
22+
# filter by direction
23+
for metric, changepoint_list in change_points_by_metric.items():
24+
for i in range(len(changepoint_list)-1, -1, -1):
25+
if ((self.metrics_config[metric]["direction"] == 1 and changepoint_list[i].stats.mean_1 > changepoint_list[i].stats.mean_2) or
26+
(self.metrics_config[metric]["direction"] == -1 and changepoint_list[i].stats.mean_1 < changepoint_list[i].stats.mean_2) ):
27+
del changepoint_list[i]
28+
29+
return series, change_points_by_metric
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""
2+
init for isolation forest
3+
"""
4+
from .isolationForest import IsolationForestWeightedMean
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# pylint: disable = too-many-locals, line-too-long
2+
"""The implementation module for Isolation forest and weighted mean"""
3+
from sklearn.ensemble import IsolationForest
4+
import pandas as pd
5+
from fmatch.logrus import SingletonLogger
6+
from hunter.series import ChangePoint, ComparativeStats
7+
from pkg.algorithms import Algorithm
8+
9+
10+
class IsolationForestWeightedMean(Algorithm):
11+
"""Isolation forest with weighted mean
12+
13+
Args:
14+
Algorithm (Algorithm): _description_
15+
"""
16+
17+
def _analyze(self):
18+
"""Analyzing the data
19+
20+
Args:
21+
dataframe (pd.DataFrame): _description_
22+
23+
Returns:
24+
pd.Dataframe, pd.Dataframe: _description_
25+
"""
26+
self.dataframe["timestamp"] = pd.to_datetime(self.dataframe["timestamp"])
27+
self.dataframe["timestamp"] = self.dataframe["timestamp"].astype(int) // 10**9
28+
dataframe = self.dataframe.copy(deep=True)
29+
series = self.setup_series()
30+
31+
logger_instance = SingletonLogger.getLogger("Orion")
32+
logger_instance.info("Starting analysis using Isolation Forest")
33+
metric_columns = self.metrics_config.keys()
34+
dataframe_with_metrics = dataframe[metric_columns]
35+
model = IsolationForest(contamination="auto", random_state=42)
36+
model.fit(dataframe_with_metrics)
37+
predictions = model.predict(dataframe_with_metrics)
38+
dataframe["is_anomaly"] = predictions
39+
anomaly_scores = model.decision_function(dataframe_with_metrics)
40+
# Add anomaly scores to the DataFrame
41+
dataframe["anomaly_score"] = anomaly_scores
42+
43+
# Calculate moving average for each metric
44+
window_size = (5 if self.options.get("anomaly_window",None) is None else int(self.options.get("anomaly_window",None)))
45+
moving_averages = dataframe_with_metrics.rolling(window=window_size).mean()
46+
47+
# Initialize percentage change columns for all metrics
48+
for feature in dataframe_with_metrics.columns:
49+
dataframe[f"{feature}_pct_change"] = 0.0
50+
51+
change_points_by_metric={ k:[] for k in metric_columns }
52+
53+
for idx, row in dataframe.iterrows():
54+
if row["is_anomaly"] == -1:
55+
for feature in metric_columns:
56+
pct_change = (
57+
(row[feature] - moving_averages.at[idx, feature])
58+
/ moving_averages.at[idx, feature]
59+
) * 100
60+
if abs(pct_change) > (10 if self.options.get("min_anomaly_percent",None) is None else int(self.options.get("min_anomaly_percent",None))):
61+
if (pct_change * self.metrics_config[feature]["direction"] > 0) or self.metrics_config[feature]["direction"]==0:
62+
change_point = ChangePoint(metric=feature,
63+
index=idx,
64+
time=row['timestamp'],
65+
stats=ComparativeStats(
66+
mean_1=moving_averages.at[idx, feature],
67+
mean_2=row[feature],
68+
std_1=0,
69+
std_2=0,
70+
pvalue=1
71+
))
72+
change_points_by_metric[feature].append(change_point)
73+
74+
return series, change_points_by_metric

0 commit comments

Comments
 (0)