From 25e6ee4a98a06f11b83965c6787c0637efb83bef Mon Sep 17 00:00:00 2001 From: shashi gharti Date: Tue, 7 Jun 2022 17:57:00 +0545 Subject: [PATCH] Revised code * Rearranged code and placed it in the relevant places "separation of concerns" * Made changes to tests and added new tests * Revised summary command code --- frictionless/program/summary.py | 84 +------------- frictionless/report/report.py | 146 +++++++++++------------- frictionless/schema/schema.py | 10 ++ tests/program/test_summary.py | 30 ++--- tests/report/test_report.py | 36 ++++++ tests/report/test_reporttask.py | 81 ------------- tests/resource/describe/test_general.py | 13 +++ tests/resource/validate/test_general.py | 39 +++++++ 8 files changed, 184 insertions(+), 255 deletions(-) create mode 100644 tests/report/test_report.py delete mode 100644 tests/report/test_reporttask.py diff --git a/frictionless/program/summary.py b/frictionless/program/summary.py index 60c8a7399d..5f636edd13 100644 --- a/frictionless/program/summary.py +++ b/frictionless/program/summary.py @@ -1,9 +1,6 @@ import typer -from tabulate import tabulate from .main import program from . import common -from .. import helpers -from ..layout import Layout from ..resource import Resource @@ -20,100 +17,29 @@ def program_summary(source: str = common.source): raise typer.Exit(1) # Infer Resource try: - resource = Resource(source, layout=Layout(limit_rows=5)) + resource = Resource(source) resource.infer() except Exception as exception: typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True) raise typer.Exit(1) - # Describe data - content = [ - [field.name, field.type, True if field.required else ""] - for field in resource.schema.fields - ] typer.secho("") typer.secho("# Describe ", bold=True) typer.secho("") - typer.secho(tabulate(content, headers=["name", "type", "required"], tablefmt="grid")) - # Extract data - try: - resource.extract() - except Exception as exception: - typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True) - raise typer.Exit(1) + typer.secho(str(resource.schema.to_summary())) typer.secho("") typer.secho("# Extract ", bold=True) typer.secho("") - typer.secho(resource.to_view()) - # Validate data + typer.secho(str(resource.to_view())) + # Validate try: report = resource.validate() except Exception as exception: typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True) raise typer.Exit(1) - error_content = [] - error_list = {} typer.secho("") typer.secho("# Validate ", bold=True) typer.secho("") - for task in report.tasks: - tabular = task.resource.profile == "tabular-data-resource" - prefix = "valid" if task.valid else "invalid" - suffix = "" if tabular else "(non-tabular)" - source = task.resource.path or task.resource.name - # for zipped resources append file name - if task.resource.innerpath: - source = f"{source} => {task.resource.innerpath}" - typer.secho(f"# {'-'*len(prefix)}", bold=True) - typer.secho(f"# {prefix}: {source} {suffix}", bold=True) - typer.secho(f"# {'-'*len(prefix)}", bold=True) - for error in report.tasks[0].errors: - error_content.append( - [ - error.get("rowPosition", ""), - error.get("fieldPosition", ""), - error.code, - error.message, - ] - ) - # error list for summary - error_title = f"{error.name} ({error.code})" - if error_title not in error_list: - error_list[error_title] = 0 - error_list[error_title] += 1 - if task.partial: - last_row_checked = error.get("rowPosition", "") - error_content = helpers.wrap_text_to_colwidths(error_content) - rows_checked = last_row_checked if task.partial else None - summary_content = helpers.validation_summary( - source, - basepath=task.resource.basepath, - time_taken=task.time, - rows_checked=rows_checked, - error_list=error_list, - ) - typer.secho("") - typer.secho("## Summary ", bold=True) - typer.secho("") - typer.secho( - str( - tabulate( - summary_content, - headers=["Description", "Size/Name/Count"], - tablefmt="grid", - ) - ) - ) - if len(error_content) > 0: - typer.secho("") - typer.secho("## Errors ", bold=True) - typer.secho("") - typer.secho( - tabulate( - error_content, - headers=["row", "field", "code", "message"], - tablefmt="grid", - ) - ) + typer.secho(str(report.to_summary())) # Return retcode raise typer.Exit(code=int(not report.valid)) diff --git a/frictionless/report/report.py b/frictionless/report/report.py index 54c73967d9..2131011dc3 100644 --- a/frictionless/report/report.py +++ b/frictionless/report/report.py @@ -2,7 +2,6 @@ from copy import deepcopy from importlib import import_module from tabulate import tabulate -from ..layout import Layout from ..metadata import Metadata from ..errors import Error, TaskError, ReportError from ..exception import FrictionlessException @@ -168,6 +167,75 @@ def wrapper(*args, **kwargs): return wrapper + # Summary + + def to_summary(self): + validation_content = None + for task in self.tasks: + tabular = task.resource.profile == "tabular-data-resource" + prefix = "valid" if task.valid else "invalid" + suffix = "" if tabular else "(non-tabular)" + source = task.resource.path or task.resource.name + # for zipped resources append file name + if task.resource.innerpath: + source = f"{source} => {task.resource.innerpath}" + validation_content = f"\n# {'-'*len(prefix)}\n" + validation_content += f"\n# {prefix}: {source} {suffix}\n" + validation_content += f"\n# {'-'*len(prefix)}\n" + error_list = {} + error_content = [] + for error in task.errors: + if error.code == "scheme-error": + return error + error_content.append( + [ + error.get("rowPosition", ""), + error.get("fieldPosition", ""), + error.code, + error.message, + ] + ) + # error list for summary + error_title = f"{error.name} ({error.code})" + if error_title not in error_list: + error_list[error_title] = 0 + error_list[error_title] += 1 + if task.partial: + last_row_checked = error.get("rowPosition", "") + # Validate + error_content = helpers.wrap_text_to_colwidths(error_content) + rows_checked = last_row_checked if task.partial else None + summary_content = helpers.validation_summary( + task.resource.path, + basepath=task.resource.basepath, + time_taken=self.time, + rows_checked=rows_checked, + error_list=error_list, + ) + validation_content += "\n\n" + validation_content += "## Summary " + validation_content += "\n\n" + validation_content += str( + tabulate( + summary_content, + headers=["Description", "Size/Name/Count"], + tablefmt="grid", + ) + ) + if len(error_content) > 0: + validation_content += "\n\n" + validation_content += "## Errors " + validation_content += "\n\n" + validation_content += str( + tabulate( + error_content, + headers=["row", "field", "code", "message"], + tablefmt="grid", + ) + ) + + return validation_content + # Metadata metadata_Error = ReportError @@ -333,82 +401,6 @@ def flatten(self, spec=["rowPosition", "fieldPosition", "code"]): result.append([context.get(prop) for prop in spec]) return result - # Summary - - def to_summary(self) -> dict: - """Summary of the resource - - Raises: - FrictionlessException: on any error - """ - # Process errors - summary = {} - error_list = {} - error_content = [] - for error in self.errors: - if error.code == "scheme-error": - return error - error_content.append( - [ - error.get("rowPosition", ""), - error.get("fieldPosition", ""), - error.code, - error.message, - ] - ) - # error list for summary - error_title = f"{error.name} ({error.code})" - if error_title not in error_list: - error_list[error_title] = 0 - error_list[error_title] += 1 - if self.partial: - last_row_checked = error.get("rowPosition", "") - # Describe - try: - self.resource.infer() - except Exception as exception: - raise FrictionlessException(self.__Error(note=str(exception))) from exception - summary["describe"] = tabulate( - [ - [field.name, field.type, True if field.required else ""] - for field in self.resource.schema.fields - ], - headers=["name", "type", "required"], - tablefmt="grid", - ) - # Extract - # Copy of existing resource to reset the properties to only extract 5 rows - resource = self.resource.to_copy(layout=Layout(limit_rows=5)) - try: - resource.extract() - except Exception as exception: - raise FrictionlessException(self.__Error(note=str(exception))) from exception - summary["extract"] = resource.to_view() - # Validate - summary["validate"] = {} - error_content = helpers.wrap_text_to_colwidths(error_content) - rows_checked = last_row_checked if self.partial else None - summary_content = helpers.validation_summary( - self.resource.path, - basepath=self.resource.basepath, - time_taken=self.time, - rows_checked=rows_checked, - error_list=error_list, - ) - summary["validate"]["summary"] = tabulate( - summary_content, - headers=["Description", "Size/Name/Count"], - tablefmt="grid", - ) - if len(error_content) > 0: - summary["validate"]["errors"] = tabulate( - error_content, - headers=["row", "field", "code", "message"], - tablefmt="grid", - ) - - return summary - # Metadata metadata_Error = ReportError diff --git a/frictionless/schema/schema.py b/frictionless/schema/schema.py index bf8f80ddf1..f21be9b40a 100644 --- a/frictionless/schema/schema.py +++ b/frictionless/schema/schema.py @@ -1,4 +1,5 @@ from copy import copy, deepcopy +from tabulate import tabulate from ..exception import FrictionlessException from ..metadata import Metadata from ..field import Field @@ -289,6 +290,15 @@ def to_excel_template(self, path: str) -> any: ) return tableschema_to_template.create_xlsx(self, path) + # Summary + + def to_summary(self): + content = [ + [field.name, field.type, True if field.required else ""] + for field in self.fields + ] + return tabulate(content, headers=["name", "type", "required"], tablefmt="grid") + # Metadata metadata_duplicate = True diff --git a/tests/program/test_summary.py b/tests/program/test_summary.py index 719e137eb7..e3f2c296c3 100644 --- a/tests/program/test_summary.py +++ b/tests/program/test_summary.py @@ -1,8 +1,9 @@ import os from typer.testing import CliRunner -from frictionless import program +from frictionless import program, helpers runner = CliRunner() +IS_UNIX = not helpers.is_platform("windows") def test_program_error_not_found(): @@ -41,34 +42,24 @@ def test_program_summary_valid(): ) -def test_program_summary_describe_header_row(): - result = runner.invoke(program, "summary data/countries.csv") - assert result.exit_code == 1 - assert result.stdout.count("| name | type | required |") - - def test_program_summary_describe(): result = runner.invoke(program, "summary data/countries.csv") assert result.exit_code == 1 assert ( - result.stdout.count("| id | integer | |") + result.stdout.count("| name | type | required |") + and result.stdout.count("| id | integer | |") and result.stdout.count("| neighbor_id | string | |") and result.stdout.count("| name | string | |") and result.stdout.count("| population | string | |") ) -def test_program_summary_extract_header_row(): - result = runner.invoke(program, "summary data/countries.csv") - assert result.exit_code == 1 - assert result.stdout.count("| id | neighbor_id | name | population |") - - def test_program_summary_extract(): result = runner.invoke(program, "summary data/countries.csv") assert result.exit_code == 1 assert ( - result.stdout.count("| 1 | 'Ireland' | 'Britain' | '67' |") + result.stdout.count("| id | neighbor_id | name | population |") + and result.stdout.count("| 1 | 'Ireland' | 'Britain' | '67' |") and result.stdout.count("| 2 | '3' | 'France' | 'n/a' |") and result.stdout.count("| 3 | '22' | 'Germany' | '83' |") and result.stdout.count("| 4 | None | 'Italy' | '60' |") @@ -127,9 +118,12 @@ def test_program_summary_validate_errors(): def test_program_summary_without_command(tmpdir): output_file_path = f"{tmpdir}/output.txt" exit_code = os.system(f"frictionless data/countries.csv > {output_file_path}") - # A value of 256 means the spawned program terminated with exit code 1 - # https://stackoverflow.com/questions/47832180/os-system-returns-the-value-256-when-run-from-crontab - assert exit_code == 256 + if IS_UNIX: + # A value of 256 means the spawned program terminated with exit code 1 + # https://stackoverflow.com/questions/47832180/os-system-returns-the-value-256-when-run-from-crontab + assert exit_code == 256 + else: + assert exit_code == 1 with open(output_file_path, encoding="utf-8") as file: expected = file.read() assert ( diff --git a/tests/report/test_report.py b/tests/report/test_report.py new file mode 100644 index 0000000000..c85a7d7a47 --- /dev/null +++ b/tests/report/test_report.py @@ -0,0 +1,36 @@ +from frictionless import validate + + +def test_report_summary(): + report = validate("data/countries.csv") + output = report.to_summary() + assert output.count("invalid") and output.count("Summary") and output.count("Errors") + + +def test_report_summary_validate_summary(): + report = validate("data/countries.csv") + output = report.to_summary() + assert ( + output.count("File name |") + and output.count("File size (bytes) | 143") + and output.count("Total Time Taken (sec) |") + and output.count("Total Errors | 4") + and output.count("Extra Cell (extra-cell) | 1") + and output.count("Missing Cell (missing-cell) | 3") + ) + + +def test_report_summary_validate_errors(): + report = validate("data/countries.csv") + output = report.to_summary() + with open("data/fixtures/program/summary/errors.txt", encoding="utf-8") as file: + expected = file.read() + assert output.count(expected.strip()) + + +def test_report_summary_valid(): + report = validate("data/capital-valid.csv") + output = report.to_summary() + assert ( + output.count("valid") and output.count("Summary") and not output.count("Errors") + ) diff --git a/tests/report/test_reporttask.py b/tests/report/test_reporttask.py deleted file mode 100644 index 344377236f..0000000000 --- a/tests/report/test_reporttask.py +++ /dev/null @@ -1,81 +0,0 @@ -from frictionless import validate - - -def test_report_task_error_not_found(): - report = validate("data/countriess.csv") - assert ( - report.tasks[0].errors[0].code == "scheme-error" - and report.tasks[0].errors[0].description - == "Data reading error because of incorrect scheme." - ) - - -def test_report_task_summary(): - report = validate("data/countries.csv") - assert report.valid is False and ["describe", "extract", "validate"] == list( - report.tasks[0].to_summary().keys() - ) - - -def test_report_task_summary_valid(): - report = validate("data/capital-valid.csv") - assert report.valid is True and ["summary"] == list( - report.tasks[0].to_summary()["validate"].keys() - ) - - -def test_report_task_summary_describe(): - report = validate("data/countries.csv") - output = report.tasks[0].to_summary()["describe"] - assert ( - output.count("| id | integer | |") - and output.count("| neighbor_id | string | |") - and output.count("| name | string | |") - and output.count("| population | string | |") - ) - - -def test_report_task_summary_extract(): - report = validate("data/countries.csv") - output = report.tasks[0].to_summary()["extract"] - assert ( - output.count("| 1 | 'Ireland' | 'Britain' | '67' |") - and output.count("| 2 | '3' | 'France' | 'n/a' |") - and output.count("| 3 | '22' | 'Germany' | '83' |") - and output.count("| 4 | None | 'Italy' | '60' |") - and output.count("| 5 | None | None | None |") - ) - - -def test_report_task_summary_extract_only_5_rows(): - report = validate("data/long.csv") - output = report.tasks[0].to_summary()["extract"] - assert ( - output.count("| 1 | 'a' |") - and output.count("| 2 | 'b' |") - and output.count("| 3 | 'c' |") - and output.count("| 4 | 'd' |") - and output.count("| 5 | 'e' |") - and not output.count("| 6 | 'f' |") - ) - - -def test_report_task_summary_validate_summary(): - report = validate("data/countries.csv") - validate_summary = report.tasks[0].to_summary()["validate"]["summary"] - assert ( - validate_summary.count("File name |") - and validate_summary.count("File size (bytes) | 143") - and validate_summary.count("Total Time Taken (sec) |") - and validate_summary.count("Total Errors | 4") - and validate_summary.count("Extra Cell (extra-cell) | 1") - and validate_summary.count("Missing Cell (missing-cell) | 3") - ) - - -def test_report_task_summary_validate_errors(): - report = validate("data/countries.csv") - output_file_path = "data/fixtures/program/summary/errors.txt" - with open(output_file_path, encoding="utf-8") as file: - expected = file.read() - assert report.tasks[0].to_summary()["validate"]["errors"] == expected.strip() diff --git a/tests/resource/describe/test_general.py b/tests/resource/describe/test_general.py index e95e9e17c1..c87bc6b2cd 100644 --- a/tests/resource/describe/test_general.py +++ b/tests/resource/describe/test_general.py @@ -186,3 +186,16 @@ def test_describe_resource_with_json_format_issue_827(): def test_describe_resource_with_years_in_the_header_issue_825(): resource = Resource.describe("data/issue-825.csv") assert resource.schema.field_names == ["Musei", "2011", "2010"] + + +def test_describe_resource_schema_summary(): + resource = Resource.describe("data/countries.csv") + resource.infer() + output = resource.schema.to_summary() + assert ( + output.count("| name | type | required |") + and output.count("| id | integer | |") + and output.count("| neighbor_id | string | |") + and output.count("| name | string | |") + and output.count("| population | string | |") + ) diff --git a/tests/resource/validate/test_general.py b/tests/resource/validate/test_general.py index c683eae3db..155a608929 100644 --- a/tests/resource/validate/test_general.py +++ b/tests/resource/validate/test_general.py @@ -559,3 +559,42 @@ def test_validate_resource_errors_with_fields_993(): 'The data resource has an error: "fields" should be set as "resource.schema.fields" (not "resource.fields").', ] ] + + +def test_validate_resource_summary_invalid(): + resource = Resource("data/countries.csv") + report = resource.validate() + output = report.to_summary() + assert output.count("valid") and output.count("Summary") and output.count("Errors") + + +def test_validate_resource_validate_summary(): + resource = Resource("data/countries.csv") + report = resource.validate() + output = report.to_summary() + assert ( + output.count("File name |") + and output.count("File size (bytes) | 143") + and output.count("Total Time Taken (sec) |") + and output.count("Total Errors | 4") + and output.count("Extra Cell (extra-cell) | 1") + and output.count("Missing Cell (missing-cell) | 3") + ) + + +def test_validate_resource_validate_errors(): + resource = Resource("data/countries.csv") + report = resource.validate() + output = report.to_summary() + with open("data/fixtures/program/summary/errors.txt", encoding="utf-8") as file: + expected = file.read() + assert output.count(expected.strip()) + + +def test_validate_resource_summary_valid(): + resource = Resource("data/capital-valid.csv") + report = resource.validate() + output = report.to_summary() + assert ( + output.count("valid") and output.count("Summary") and not output.count("Errors") + )