Skip to content

Issue finder #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions issue_finder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from fnmatch import fnmatch
from http.client import IncompleteRead
import os
import pathlib
from urllib.error import HTTPError
import urllib.request
import yaml
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to add requirements.txt file (or Pipfile if you're using pipenv) for pyyaml?

not part of this commit but perhaps we should also add some python version > 3.6 in requirements.txt file since f strings not supported in python3 versions prior to 3.6

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree


from issues.issues import Issues


class IssuesFinder:
"""Known OCP CI issues."""

def __init__(self):
self.issues_found = []
self.issues = Issues()
self.issue_yamls = self.read_issue_yaml()

def read_issue_yaml(self):
"""Return a list of objects read in from issue yaml defs."""
issue_yamls = []
script_dir_path = pathlib.Path(__file__).parent.resolve()
for (dirpath, dirnames, filenames) in os.walk(
os.path.join(script_dir_path, "issues")
):
for name in filenames:
if fnmatch(name, "issue_*.yaml"):
yaml_path = os.path.join(dirpath, name)
content = yaml.load(open(yaml_path), Loader=yaml.SafeLoader)
issue_yamls.append(content)
return issue_yamls

def find_issues(self, logs):
"""Returns a list of known issues found in the test logs."""
if logs is not None:
log_text = self.get_file_from_url(logs)
if log_text is not None:
for issue_def in self.issue_yamls:
# This could take awhile.
# Let the user know something is happening
print(" .")
if self.issues.search(log_text, issue_def):
self.issues_found.append(issue_def["description"])
return self.issues_found

def get_file_from_url(self, url: str):
"Return file content downloaded from url."
# Try three times to help with intermittent connection issues.
for i in range(3):
content = None
try:
content = urllib.request.urlopen(url).read().decode("utf8")
except IncompleteRead:
print(f"Caught IncompleteRead in iteration {i}.")
continue
except HTTPError:
print(f"Skipping download due to HTTPError: {url}")
break
else:
break
return content
4 changes: 4 additions & 0 deletions issues/issue_OCPQE_5204.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
description: 'https://issues.redhat.com/browse/OCPQE-5204'
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the intention here to get known issues no matter which test case id the known issue is logged for as long as the string is matched?
I don't see an association between the known issue and testcase id the known issue is logged for which is fine as long as that is the intention.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct there is no requirement that an issue be mapped 1:1 to a test case. Many bugs or infrastructure issues can cause failures across multiple test cases.

match_ordered_strings:
- 'Scenario: Etcd basic verification ==='
- 'RuntimeError: See log, waiting for labeled pods futile: k8s-app=etcd'
5 changes: 5 additions & 0 deletions issues/issue_OCPQE_5743.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
description: 'https://issues.redhat.com/browse/OCPQE-5743'
match_ordered_strings:
- '=== After Scenario:'
- 'the server is currently unable to handle the request \(get projects.project.openshift.io\)'
- 'RuntimeError: error getting projects by user'
51 changes: 51 additions & 0 deletions issues/issues.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import re


class Issues:
"""Issues definitions."""

def match_ordered_strings(self, log: str, targets: list):
"""Returns True if all the regex strings in targets are found in order."""
found_all = set()
last_match_line = 0
for target in targets:
line_count = 0
found = False
for line in log.splitlines():
if line_count < last_match_line:
continue
line_count += 1
match = re.search(target, line)
if match:
found = True
break
found_all.add(found)
return all(found_all)

def get_method_refs(self, names):
"""Return a dict of callable method refs, keyed by name in names."""
refs = dict()
for name in names:
if name == "description":
continue
refs[name] = getattr(self, name)
return refs

def search(self, log_text, issue_def):
"""
Return True if log_text matches all the criteria in issue_def.
issue_def: A dict with keys that match method names defined in this
class.
description: issue_def.keys() is expected to be a list of method
names, that match up with methods defined in this class. Each
method is called with log_text and the coresponding issue_def
value as arguments. The issue_def value must contain all the
structured data that is required by the called method.
"""

found_all = set()
method_refs = self.get_method_refs(issue_def.keys())
for name, ref in method_refs.items():
found_all.add(ref(log_text, issue_def[name]))
return all(found_all)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when this class is extended to add other ways to find Issues, I wonder if it makes sense to use any instead of all?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is that for any given known issue, there are a set of circumstances that must be true in order to positively identify the issue you're trying to match. Each circumstance may require performing a unique check. For example, profile X in use, operator Y installed and configured and Z error message present in the log. Currently this PR only acts on the cucushift console.html. However, the extended version might also gain access to additional test artifacts such as jenkins logs, must-gather archives, etc. All checks would need to resolve to true to produce a positive match.

37 changes: 33 additions & 4 deletions parse_ci_monitor_json.py
Original file line number Diff line number Diff line change
@@ -6,10 +6,11 @@
import sys
import os
import subprocess
import pprint
from datetime import datetime
from typing import List

from issue_finder import IssuesFinder


def argparser():
parser = argparse.ArgumentParser(
@@ -28,14 +29,31 @@ def argparser():
help="location to write output file",
default=f"./{datetime.today().strftime('%Y%m%d')}.json",
)
parser.add_argument("-v", "--version", help="Specify OCP version", required=True)
parser.add_argument(
"-i",
"--issues",
action="store_true",
help="Searching for known issues.",
)
parser.add_argument(
"-v",
"--version",
help="Specify OCP version",
required=True,
)
return parser.parse_args()


def get_test_failure_profile(content: str, profile: str):
def get_link_to_log(content: str):
content = re.search("(http.*)", content)
if content:
linkto_logs = "[Link to logs|" + content.groups()[0] + "]|" + profile
return content.groups()[0]


def get_test_failure_profile(content: str, profile: str):
content = get_link_to_log(content)
if content:
linkto_logs = "[Link to logs|" + content + "]|" + profile
else:
linkto_logs = f"not found|{profile}"
return linkto_logs
@@ -132,6 +150,17 @@ def main():
)
else:
report_struct[owner] = {automation_script: {id: [linkto_logs]}}
# Find known issues
if args.issues:
issue_finder = IssuesFinder()
issues = issue_finder.find_issues(
get_link_to_log(record["comment"]["content"])
)
if issues:
print(f"Found issues for {id}: {issues}")
report_struct[owner][automation_script][id].append(
dict([("known_issues", issues)])
)

write_output(report_struct, args.output)