Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip invalid json files in ReportBuilder #384

Merged
merged 13 commits into from
Dec 20, 2023
22 changes: 20 additions & 2 deletions src/viztracer/report_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def __init__(
self.align = align
self.minimize_memory = minimize_memory
self.jsons: List[Dict] = []
self.invalid_json_paths: List[str] = []
self.json_loaded = False
self.final_messages: List[Tuple[str, Dict]] = []
if not isinstance(data, (dict, list, tuple)):
Expand All @@ -67,18 +68,28 @@ def load_jsons(self) -> None:
self.jsons = [get_json(self.data)]
elif isinstance(self.data, (list, tuple)):
self.jsons = []
self.invalid_json_paths = []
for idx, j in enumerate(self.data):
if self.verbose > 0:
same_line_print(f"Loading trace data from processes {idx}/{len(self.data)}")
self.jsons.append(get_json(j))
try:
self.jsons.append(get_json(j))
except json.JSONDecodeError:
self.invalid_json_paths.append(j)
if len(self.invalid_json_paths) > 0:
self.final_messages.append(("corrupted_json", {"paths": self.invalid_json_paths}))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do a full search on this PR and change all "corrupted" to "invalid".


def combine_json(self) -> None:
if self.verbose > 0:
same_line_print("Combining trace data")
if self.combined_json:
return
if not self.jsons:
if not self.jsons and not self.invalid_json_paths:
raise ValueError("Can't get report of nothing")
elif not self.jsons:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you hate nested if so much :) . You are evaluating self.jsons twice (performance wise it's not a big deal, but logic wise it's not clean).

Just check self.jsons first, and raise different errors based on self.invalid_json_paths. Nested if works in many cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you hate nested if so much :) . You are evaluating self.jsons twice (performance wise it's not a big deal, but logic wise it's not clean).

Just check self.jsons first, and raise different errors based on self.invalid_json_paths. Nested if works in many cases.

Sorry, I didn't notice that. And thank you for correcting my bad habit.

# print corrupted json files
self.print_messages()
raise ValueError("All json files are corrupted")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if not self.jsons and not self.invalid_json_paths:
raise ValueError("Can't get report of nothing")
elif not self.jsons:
# print corrupted json files
self.print_messages()
raise ValueError("All json files are corrupted")
if not self.jsons:
if not self.invalid_json_paths:
raise ValueError("Can't get report of nothing")
else:
raise ValueError("No valid json files found")

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing the exception message should be enough - this is a rather rare case, just separate that from "no files".

if self.align:
for one in self.jsons:
self.align_events(one["traceEvents"])
Expand Down Expand Up @@ -213,3 +224,10 @@ def print_messages(self):
color_print("OKGREEN", f"vizviewer \"{report_abspath}\"")
else:
color_print("OKGREEN", f"vizviewer {report_abspath}")
elif msg_type == "corrupted_json":
print("")
color_print("WARNING", "Found and ignored corrupted json file, you may lost some process data.")
color_print("WARNING", "Corrupted json file:")
for msg in msg_args["paths"]:
color_print("WARNING", f" {msg}")
print("")
26 changes: 26 additions & 0 deletions tests/test_report_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import io
import json
import os
import shutil
import tempfile
from unittest.mock import patch

import viztracer
from viztracer.report_builder import ReportBuilder
Expand Down Expand Up @@ -70,6 +72,30 @@ def test_invalid_json(self):
with self.assertRaises(Exception):
ReportBuilder([invalid_json_path], verbose=1)

@patch('sys.stdout', new_callable=io.StringIO)
def test_corrupted_json(self, mock_stdout):
with tempfile.TemporaryDirectory() as tmpdir:
corrupted_json_path = os.path.join(os.path.dirname(__file__), "data", "fib.py")
valid_json_path = os.path.join(os.path.dirname(__file__), "data", "multithread.json")
corrupted_json_file = shutil.copy(corrupted_json_path, os.path.join(tmpdir, "corrupted.json"))
valid_json_file = shutil.copy(valid_json_path, os.path.join(tmpdir, "valid.json"))
rb = ReportBuilder([corrupted_json_file, valid_json_file], verbose=1)
with io.StringIO() as s:
rb.save(s)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should check for some printed messages here. This test only assures no exceptions raised.

self.assertIn("Corrupted json file", mock_stdout.getvalue())

@patch('sys.stdout', new_callable=io.StringIO)
def test_all_corrupted_json(self, mock_stdout):
with tempfile.TemporaryDirectory() as tmpdir:
corrupted_json_path = os.path.join(os.path.dirname(__file__), "data", "fib.py")
corrupted_json_file = shutil.copy(corrupted_json_path, os.path.join(tmpdir, "corrupted.json"))
rb = ReportBuilder([corrupted_json_file], verbose=1)
with self.assertRaises(Exception) as context:
with io.StringIO() as s:
rb.save(s)
self.assertEqual(str(context.exception), "All json files are corrupted")
self.assertIn("Corrupted json file", mock_stdout.getvalue())

def test_combine(self):
with tempfile.TemporaryDirectory() as tmpdir:
file_path1 = os.path.join(tmpdir, "result1.json")
Expand Down
Loading