diff --git a/data/fixtures/cli/long-error-messages-976.txt b/data/fixtures/cli/long-error-messages-976.txt index 2ba67bba4b..7cddfd1171 100644 --- a/data/fixtures/cli/long-error-messages-976.txt +++ b/data/fixtures/cli/long-error-messages-976.txt @@ -1,33 +1,20 @@ -# ------- -# invalid: test-tabulator -# ------- +## Errors - -# Summary - -Description Size/Name/Count -------------------------------- ----------------- -File name (Not Found) test-tabulator -File size N/A -Total Time Taken (sec) -Total Errors 1 -Resource Error (resource-error) 1 - -# Errors - -row field code message ------ ------- --------- ------------------------------------------------- - resource- The data resource has an error: "{'format': - error 'inline', 'hashing': 'md5', 'name': 'test- - tabulator', 'profile': 'tabular-data-resource', - 'resources': [{'name': 'first-resource', 'path': - 'table.xls', 'schema': {'fields': [{'name': 'id', - 'type': 'number'}, {'name': 'name', 'type': - 'string'}]}}, {'name': 'number-two', 'path': - 'table-reverse.csv', 'schema': {'fields': - [{'name': 'id', 'type': 'integer'}, {'name': - 'name', 'type': 'string'}]}}], 'scheme': '', - 'stats': {'bytes': 0, 'fields': 0, 'hash': '', - 'rows': 0}} is not valid under any of the given - schemas" at "" in metadata and at "oneOf" in - profile \ No newline at end of file ++-------+---------+-----------+---------------------------------------------------+ +| row | field | code | message | ++=======+=========+===========+===================================================+ +| | | resource- | The data resource has an error: "{'format': | +| | | error | 'inline', 'hashing': 'md5', 'name': 'test- | +| | | | tabulator', 'profile': 'tabular-data-resource', | +| | | | 'resources': [{'name': 'first-resource', 'path': | +| | | | 'table.xls', 'schema': {'fields': [{'name': 'id', | +| | | | 'type': 'number'}, {'name': 'name', 'type': | +| | | | 'string'}]}}, {'name': 'number-two', 'path': | +| | | | 'table-reverse.csv', 'schema': {'fields': | +| | | | [{'name': 'id', 'type': 'integer'}, {'name': | +| | | | 'name', 'type': 'string'}]}}], 'scheme': '', | +| | | | 'stats': {'bytes': 0, 'fields': 0, 'hash': '', | +| | | | 'rows': 0}} is not valid under any of the given | +| | | | schemas" at "" in metadata and at "oneOf" in | +| | | | profile | ++-------+---------+-----------+---------------------------------------------------+ \ No newline at end of file diff --git a/data/fixtures/cli/zipped-resources-979.txt b/data/fixtures/cli/zipped-resources-979.txt index de9b7ef3b2..dff7178988 100644 --- a/data/fixtures/cli/zipped-resources-979.txt +++ b/data/fixtures/cli/zipped-resources-979.txt @@ -1,44 +1,8 @@ -# ----- -# valid: ogd10_energieforschungstatistik_ch.csv -# ----- - -# Summary - -Description Size/Name/Count ----------------------- -------------------------------------- -File name ogd10_energieforschungstatistik_ch.csv -File size (KB) 88541 -Total Time Taken (sec) -# ------- -# invalid: ogd10_catalogs.zip => capital-invalid.csv -# ------- - - -# Summary - -Description Size/Name/Count ---------------------------- ----------------------------------------- -File name (Not Found) ogd10_catalogs.zip => capital-invalid.csv -File size N/A -Total Time Taken (sec) -Total Errors 1 -Schema Error (schema-error) 1 - -# Errors - -row field code message ------ ------- ------- ------------------------------------------------- - schema- Schema is not valid: Schemas with duplicate field - error names are not supported - -# ----- -# valid: ogd10_catalogs.zip => finanzquellen.csv -# ----- - -# Summary - -Description Size/Name/Count ----------------------- --------------------------------------- -File name (Not Found) ogd10_catalogs.zip => finanzquellen.csv -File size N/A -Total Time Taken (sec) +## Errors + ++-------+---------+---------+---------------------------------------------------+ +| row | field | code | message | ++=======+=========+=========+===================================================+ +| | | schema- | Schema is not valid: Schemas with duplicate field | +| | | error | names are not supported | ++-------+---------+---------+---------------------------------------------------+ \ No newline at end of file diff --git a/data/fixtures/summary/multiline-errors.txt b/data/fixtures/summary/multiline-errors.txt new file mode 100644 index 0000000000..c6f3486dbe --- /dev/null +++ b/data/fixtures/summary/multiline-errors.txt @@ -0,0 +1,15 @@ ++-------+---------+------------+----------------------------------------------------+ +| row | field | code | message | ++=======+=========+============+====================================================+ +| 4 | 5 | extra-cell | Row at position "4" has an extra value in field at | +| | | | position "5" | ++-------+---------+------------+----------------------------------------------------+ +| 7 | 2 | missing- | Row at position "7" has a missing cell in field | +| | | cell | "neighbor_id" at position "2" | ++-------+---------+------------+----------------------------------------------------+ +| 7 | 3 | missing- | Row at position "7" has a missing cell in field | +| | | cell | "name" at position "3" | ++-------+---------+------------+----------------------------------------------------+ +| 7 | 4 | missing- | Row at position "7" has a missing cell in field | +| | | cell | "population" at position "4" | ++-------+---------+------------+----------------------------------------------------+ \ No newline at end of file diff --git a/data/fixtures/summary/multiline-scheme-error.txt b/data/fixtures/summary/multiline-scheme-error.txt new file mode 100644 index 0000000000..1fe9bf7613 --- /dev/null +++ b/data/fixtures/summary/multiline-scheme-error.txt @@ -0,0 +1,9 @@ +## Errors + ++-------+---------+---------+---------------------------------------------------+ +| row | field | code | message | ++=======+=========+=========+===================================================+ +| | | scheme- | The data source could not be successfully loaded: | +| | | error | [Errno 2] No such file or directory: | +| | | | 'data/countriess.csv' | ++-------+---------+---------+---------------------------------------------------+ \ No newline at end of file diff --git a/frictionless/program/__init__.py b/frictionless/program/__init__.py index 82594aded8..eef0a9d3f8 100644 --- a/frictionless/program/__init__.py +++ b/frictionless/program/__init__.py @@ -1,6 +1,7 @@ from .api import program_api from .describe import program_describe from .extract import program_extract +from .summary import program_summary from .main import program, program_main from .transform import program_transform from .validate import program_validate diff --git a/frictionless/program/main.py b/frictionless/program/main.py index f309434e73..7fcf1b39f7 100644 --- a/frictionless/program/main.py +++ b/frictionless/program/main.py @@ -1,3 +1,4 @@ +import sys import typer from typing import Optional from .. import settings @@ -5,7 +6,17 @@ # Program -program = typer.Typer() + +# TODO: remove this hack when Typer supports not-found commands catching +# https://github.com/tiangolo/typer/issues/18 +class Program(typer.Typer): + def __call__(self, *args, **kwargs): + if sys.argv[1].count("."): + sys.argv = [sys.argv[0], "summary", sys.argv[1]] + return super().__call__(*args, **kwargs) + + +program = Program() # Helpers diff --git a/frictionless/program/summary.py b/frictionless/program/summary.py new file mode 100644 index 0000000000..47f3cd41b3 --- /dev/null +++ b/frictionless/program/summary.py @@ -0,0 +1,44 @@ +import typer +from .main import program +from . import common +from ..resource import Resource + + +@program.command(name="summary") +def program_summary(source: str = common.source): + """Summary of data source. + + It will return schema, sample of the data and validation report for the resource. + """ + # Validate input + if not source: + message = 'Providing "source" is required' + typer.secho(message, err=True, fg=typer.colors.RED, bold=True) + raise typer.Exit(1) + # Infer Resource + try: + resource = Resource(source) + resource.infer() + except Exception as exception: + typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True) + raise typer.Exit(1) + typer.secho("") + typer.secho("# Describe ", bold=True) + typer.secho("") + typer.secho(str(resource.schema.to_summary())) + typer.secho("") + typer.secho("# Extract ", bold=True) + typer.secho("") + typer.secho(str(resource.to_view())) + # Validate + try: + report = resource.validate() + except Exception as exception: + typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True) + raise typer.Exit(1) + typer.secho("") + typer.secho("# Validate ", bold=True) + typer.secho(str(report.to_summary())) + + # Return retcode + raise typer.Exit(code=int(not report.valid)) diff --git a/frictionless/program/validate.py b/frictionless/program/validate.py index a9d8db9c84..1265e2fd6f 100644 --- a/frictionless/program/validate.py +++ b/frictionless/program/validate.py @@ -1,7 +1,5 @@ import sys -import os import typer -import textwrap from typing import List from tabulate import tabulate from ..actions import validate @@ -217,7 +215,7 @@ def program_validate( typer.secho(content) raise typer.Exit() - # Return report + # Return validation report errors if report.errors: content = [] if is_stdin: @@ -232,136 +230,8 @@ def program_validate( str(tabulate(content, headers=["code", "message"], tablefmt="simple")) ) - # Return tables - prev_invalid = False - for number, task in enumerate(report.tasks, start=1): - tabular = task.resource.profile == "tabular-data-resource" - if number != 1 and prev_invalid: - typer.secho("") - prefix = "valid" if task.valid else "invalid" - suffix = "" if tabular else "(non-tabular)" - source = task.resource.path or task.resource.name - # for zipped resources append file name - if task.resource.innerpath: - source = f"{source} => {task.resource.innerpath}" - if is_stdin: - source = "stdin" - typer.secho(f"# {'-'*len(prefix)}", bold=True) - typer.secho(f"# {prefix}: {source} {suffix}", bold=True) - typer.secho(f"# {'-'*len(prefix)}", bold=True) - error_list = {} - if task.errors: - prev_invalid = True - typer.secho("") - content = [] - for error in task.errors: - content.append( - [ - error.get("rowPosition", ""), - error.get("fieldPosition", ""), - error.code, - error.message, - ] - ) - # error list for summary - error_title = f"{error.name} ({error.code})" - if error_title not in error_list: - error_list[error_title] = 0 - error_list[error_title] += 1 - if task.partial: - last_row_checked = error.get("rowPosition", "") - content = _wrap_text_to_colwidths(content) - # summary - rows_checked = last_row_checked if task.partial else None - summary_content = _validation_summary( - source, - basepath=task.resource.basepath, - time_taken=task.time, - rows_checked=rows_checked, - error_list=error_list, - ) - typer.echo("\n# Summary \n") - if task.partial: - typer.echo( - "The document was partially validated because of one of the limits" - ) - typer.echo("* limit errors") - typer.echo("* memory Limit \n") - typer.secho( - str( - tabulate( - summary_content, - headers=["Description", "Size/Name/Count"], - tablefmt="simple", - ) - ) - ) - # errors - if task.errors: - typer.echo("\n# Errors \n") - typer.secho( - str( - tabulate( - content, - headers=["row", "field", "code", "message"], - tablefmt="simple", - ) - ) - ) + # Return validation report summary and tables + typer.secho(str(report.to_summary())) # Return retcode raise typer.Exit(code=int(not report.valid)) - - -# TODO:This is a temporary function to use with tabulate as -# tabulate 0.8.9 does not support text wrap -def _wrap_text_to_colwidths( - list_of_lists: List, colwidths: List = [5, 5, 10, 50] -) -> List: - """Create new list with wrapped text with different column width. - Args: - list_of_lists (List): List of lines - colwidths (List): width for each column - - Returns: - List: list of lines with wrapped text - - """ - result = [] - for row in list_of_lists: - new_row = [] - for cell, width in zip(row, colwidths): - cell = str(cell) - wrapped = textwrap.wrap(cell, width=width) - new_row.append("\n".join(wrapped)) - result.append(new_row) - return result - - -def _validation_summary( - source: str, - time_taken: str, - basepath: str = None, - rows_checked: int = None, - error_list: List = None, -) -> List: - """Generate summary for validation task""" - file_path = os.path.join(basepath, source) if basepath else source - file_size = "N/A" - unit = None - if os.path.exists(file_path): - file_size = os.path.getsize(file_path) - unit = helpers.format_bytes(file_size) - content = [ - [f"File name { '' if unit else '(Not Found)' }", source], - [f"File size { f'({unit})' if unit else '' }", file_size], - ["Total Time Taken (sec)", time_taken], - ] - if rows_checked: - content.append(["Rows Checked(Partial)**", rows_checked]) - if error_list: - content.append(["Total Errors", sum(error_list.values())]) - for code, count in error_list.items(): - content.append([code, count]) - - return content diff --git a/frictionless/report/report.py b/frictionless/report/report.py index a38680ff0d..e1d8017067 100644 --- a/frictionless/report/report.py +++ b/frictionless/report/report.py @@ -1,6 +1,10 @@ +import os import functools +import textwrap from copy import deepcopy from importlib import import_module +from tabulate import tabulate +from typing import List from ..metadata import Metadata from ..errors import Error, TaskError, ReportError from ..exception import FrictionlessException @@ -166,6 +170,67 @@ def wrapper(*args, **kwargs): return wrapper + # Summary + + def to_summary(self): + """Summary of the report + + Returns: + str: validation report + """ + + validation_content = "" + for task in self.tasks: + tabular = task.resource.profile == "tabular-data-resource" + prefix = "valid" if task.valid else "invalid" + suffix = "" if tabular else "(non-tabular)" + source = task.resource.path or task.resource.name + # for zipped resources append file name + if task.resource.innerpath: + source = f"{source} => {task.resource.innerpath}" + validation_content += f"\n# {'-'*len(prefix)}" + validation_content += f"\n# {prefix}: {source} {suffix}" + validation_content += f"\n# {'-'*len(prefix)}" + error_content = [] + if task.errors: + for error in task.errors: + error_content.append( + [ + error.get("rowPosition", ""), + error.get("fieldPosition", ""), + error.code, + error.message, + ] + ) + # Validate + error_content = _wrap_text_to_colwidths(error_content) + validation_content += "\n\n" + validation_content += "## Summary " + validation_content += "\n\n" + if task.partial: + validation_content += ( + "The document was partially validated because of one of the limits\n" + ) + validation_content += "* limit errors \n" + validation_content += "* memory Limit" + validation_content += "\n\n" + validation_content += task.to_summary() + validation_content += "\n\n" + # errors + if task.errors: + validation_content += "## Errors " + validation_content += "\n\n" + validation_content += str( + tabulate( + error_content, + headers=["row", "field", "code", "message"], + tablefmt="grid", + ) + ) + validation_content += "\n\n" + + return validation_content + # Metadata metadata_Error = ReportError @@ -222,7 +287,7 @@ def __init__( time=None, scope=None, partial=None, - errors=None + errors=None, ): # Store provided @@ -331,6 +396,63 @@ def flatten(self, spec=["rowPosition", "fieldPosition", "code"]): result.append([context.get(prop) for prop in spec]) return result + # Summary + + def to_summary( + self, + ) -> str: + """Generate summary for validation task" + + Returns: + str: validation summary + """ + source = self.resource.path or self.resource.name + # For zipped resources append file name + if self.resource.innerpath: + source = f"{source} => {self.resource.innerpath}" + file_path = ( + os.path.join(self.resource.basepath, source) + if self.resource.basepath + else source + ) + # Prepare error lists and last row checked(in case of partial validation) + error_list = {} + for error in self.errors: + error_title = f"{error.name} ({error.code})" + if error_title not in error_list: + error_list[error_title] = 0 + error_list[error_title] += 1 + if self.partial: + last_row_checked = error.get("rowPosition", "") + rows_checked = last_row_checked if self.partial else None + file_size = "N/A" + unit = None + if os.path.exists(file_path): + file_size = os.path.getsize(file_path) + unit = helpers.format_bytes(file_size) + not_found_text = "" + if not unit: + if not self.resource.innerpath: + not_found_text = "(Not Found)" + content = [ + [f"File name {not_found_text}", source], + [f"File size { f'({unit})' if unit else '' }", file_size], + ["Total Time Taken (sec)", self.time], + ] + if rows_checked: + content.append(["Rows Checked(Partial)**", rows_checked]) + if error_list: + content.append(["Total Errors", sum(error_list.values())]) + for code, count in error_list.items(): + content.append([code, count]) + return str( + tabulate( + content, + headers=["Description", "Size/Name/Count"], + tablefmt="grid", + ) + ) + # Metadata metadata_Error = ReportError @@ -344,3 +466,28 @@ def metadata_process(self): if not isinstance(resource, Resource): resource = Resource(resource) dict.__setitem__(self, "resource", resource) + + +# TODO:This is a temporary function to use with tabulate as +# tabulate 0.8.9 does not support text wrap +def _wrap_text_to_colwidths( + list_of_lists: List, colwidths: List = [5, 5, 10, 50] +) -> List: + """Create new list with wrapped text with different column width. + Args: + list_of_lists (List): List of lines + colwidths (List): width for each column + + Returns: + List: list of lines with wrapped text + + """ + result = [] + for row in list_of_lists: + new_row = [] + for cell, width in zip(row, colwidths): + cell = str(cell) + wrapped = textwrap.wrap(cell, width=width) + new_row.append("\n".join(wrapped)) + result.append(new_row) + return result diff --git a/frictionless/schema/schema.py b/frictionless/schema/schema.py index bf8f80ddf1..8113ee1854 100644 --- a/frictionless/schema/schema.py +++ b/frictionless/schema/schema.py @@ -1,4 +1,5 @@ from copy import copy, deepcopy +from tabulate import tabulate from ..exception import FrictionlessException from ..metadata import Metadata from ..field import Field @@ -289,6 +290,21 @@ def to_excel_template(self, path: str) -> any: ) return tableschema_to_template.create_xlsx(self, path) + # Summary + + def to_summary(self): + """Summary of the schema in table format + + Returns: + str: schema summary + """ + + content = [ + [field.name, field.type, True if field.required else ""] + for field in self.fields + ] + return tabulate(content, headers=["name", "type", "required"], tablefmt="grid") + # Metadata metadata_duplicate = True diff --git a/tests/program/test_summary.py b/tests/program/test_summary.py new file mode 100644 index 0000000000..8ed24a0cb9 --- /dev/null +++ b/tests/program/test_summary.py @@ -0,0 +1,142 @@ +import os +from typer.testing import CliRunner +from frictionless import program, helpers + +runner = CliRunner() +IS_UNIX = not helpers.is_platform("windows") + + +def test_program_summary_error_not_found(): + result = runner.invoke(program, "summary data/countriess.csv") + assert result.exit_code == 1 + assert ( + result.stdout.count("[scheme-error]") + and result.stdout.count("[Errno 2]") + and result.stdout.count("data/countriess.csv") + ) + + +def test_program_summary(): + result = runner.invoke(program, "summary data/countries.csv") + assert result.exit_code == 1 + assert ( + result.stdout.count("invalid") + and result.stdout.count("Describe") + and result.stdout.count("Extract") + and result.stdout.count("Validate") + and result.stdout.count("Summary") + and result.stdout.count("Errors") + ) + + +def test_program_summary_valid(): + result = runner.invoke(program, "summary data/capital-valid.csv") + assert result.exit_code == 0 + assert ( + result.stdout.count("valid") + and result.stdout.count("Describe") + and result.stdout.count("Extract") + and result.stdout.count("Validate") + and result.stdout.count("Summary") + and not result.stdout.count("Errors") + ) + + +def test_program_summary_describe(): + result = runner.invoke(program, "summary data/countries.csv") + assert result.exit_code == 1 + assert ( + result.stdout.count("| name | type | required |") + and result.stdout.count("| id | integer | |") + and result.stdout.count("| neighbor_id | string | |") + and result.stdout.count("| name | string | |") + and result.stdout.count("| population | string | |") + ) + + +def test_program_summary_extract(): + result = runner.invoke(program, "summary data/countries.csv") + assert result.exit_code == 1 + assert ( + result.stdout.count("| id | neighbor_id | name | population |") + and result.stdout.count("| 1 | 'Ireland' | 'Britain' | '67' |") + and result.stdout.count("| 2 | '3' | 'France' | 'n/a' |") + and result.stdout.count("| 3 | '22' | 'Germany' | '83' |") + and result.stdout.count("| 4 | None | 'Italy' | '60' |") + and result.stdout.count("| 5 | None | None | None |") + ) + + +def test_program_summary_extract_only_5_rows(): + result = runner.invoke(program, "summary data/long.csv") + assert result.exit_code == 0 + assert ( + result.stdout.count("valid") + and result.stdout.count("| 1 | 'a' |") + and result.stdout.count("| 2 | 'b' |") + and result.stdout.count("| 3 | 'c' |") + and result.stdout.count("| 4 | 'd' |") + and result.stdout.count("| 5 | 'e' |") + and not result.stdout.count("| 6 | 'f' |") + ) + + +def test_program_summary_validate(): + result = runner.invoke(program, "summary data/countries.csv") + assert result.exit_code == 1 + assert result.stdout.count("# invalid:") + + +def test_program_summary_validate_summary(): + result = runner.invoke(program, "summary data/countries.csv") + assert result.exit_code == 1 + assert ( + result.stdout.count("Description | Size/Name/Count") + and result.stdout.count("File name | data/countries.csv") + and result.stdout.count("File size (bytes) | 143") + and result.stdout.count("Total Time Taken (sec) |") + and result.stdout.count("Total Errors | 4") + and result.stdout.count("Extra Cell (extra-cell) | 1") + and result.stdout.count("Missing Cell (missing-cell) | 3") + ) + + +def test_program_summary_validate_errors(): + result = runner.invoke(program, "summary data/countries.csv") + output_file_path = "data/fixtures/summary/multiline-errors.txt" + with open(output_file_path, encoding="utf-8") as file: + expected = file.read() + assert result.exit_code == 1 + assert result.stdout.count(expected.strip()) + + +def test_program_summary_without_command(tmpdir): + output_file_path = f"{tmpdir}/output.txt" + exit_code = os.system(f"frictionless data/countries.csv > {output_file_path}") + if IS_UNIX: + # A value of 256 means the spawned program terminated with exit code 1 + # https://stackoverflow.com/questions/47832180/os-system-returns-the-value-256-when-run-from-crontab + assert exit_code == 256 + else: + assert exit_code == 1 + with open(output_file_path, encoding="utf-8") as file: + expected = file.read() + assert ( + expected.count("Describe") + and expected.count("Extract") + and expected.count("Validate") + and expected.count("Summary") + and expected.count("Errors") + ) + + +def test_program_summary_without_filepath(): + result = runner.invoke(program, "summary") + assert result.exit_code == 1 + assert result.stdout.strip() == 'Providing "source" is required' + + +def test_program_summary_zipped_innerpath(): + result = runner.invoke(program, "summary data/table.csv.zip") + assert result.exit_code == 0 + assert result.stdout.count("table.csv.zip => table.csv") diff --git a/tests/program/test_validate.py b/tests/program/test_validate.py index 27eda6afee..00480368f9 100644 --- a/tests/program/test_validate.py +++ b/tests/program/test_validate.py @@ -1,6 +1,5 @@ import json import yaml -import re from typer.testing import CliRunner from frictionless import Metadata, Detector, program, validate @@ -220,13 +219,12 @@ def test_program_validate_zipped_resources_979(): with open(output_file_path, encoding="utf-8") as file: expected = file.read() assert result.exit_code == 1 - assert result.stdout.count("valid: ogd10_energieforschungstatistik_ch.csv") - assert result.stdout.count("valid: ogd10_catalogs.zip => finanzquellen.csv") - assert result.stdout.count("invalid: ogd10_catalogs.zip => capital-invalid.csv") - assert result.stdout.count("Schema is not valid") - # remove timetaken floating point number which varies - output = re.sub(r"(\d+)\.(.*)\d", "", result.stdout) - assert output.strip() == expected.strip() + assert ( + result.stdout.count("valid: ogd10_energieforschungstatistik_ch.csv") + and result.stdout.count("valid: ogd10_catalogs.zip => finanzquellen.csv") + and result.stdout.count("invalid: ogd10_catalogs.zip => capital-invalid.csv") + and result.stdout.count(expected.strip()) + ) def test_program_validate_long_error_messages_976(): @@ -234,9 +232,8 @@ def test_program_validate_long_error_messages_976(): output_file_path = "data/fixtures/cli/long-error-messages-976.txt" with open(output_file_path, encoding="utf-8") as file: expected = file.read() - output = re.sub(r"(\d+)\.(.*)\d", "", result.stdout) assert result.exit_code == 1 - assert output.strip() == expected.strip() + assert result.stdout.count(expected.strip()) def test_program_validate_partial_validation_info_933(): diff --git a/tests/report/test_report.py b/tests/report/test_report.py new file mode 100644 index 0000000000..9dc89bc4eb --- /dev/null +++ b/tests/report/test_report.py @@ -0,0 +1,80 @@ +from frictionless import validate, helpers + + +IS_UNIX = not helpers.is_platform("windows") + + +def test_program_error_not_found(): + report = validate("data/countriess.csv") + output = report.to_summary() + with open( + "data/fixtures/summary/multiline-scheme-error.txt", encoding="utf-8" + ) as file: + expected = file.read() + assert output.count(expected.strip()) + assert output.count("File name (Not Found)") + + +def test_report_summary_valid(): + report = validate("data/capital-valid.csv") + output = report.to_summary() + assert ( + output.count("valid") and output.count("Summary") and not output.count("Errors") + ) + + +def test_report_summary_invalid(): + report = validate("data/countries.csv") + output = report.to_summary() + assert output.count("invalid") and output.count("Summary") and output.count("Errors") + + +def test_report_summary_validate_summary_valid(): + report = validate("data/capital-valid.csv") + output = report.to_summary() + file_size = 50 if IS_UNIX else 56 + assert ( + output.count("valid") + and output.count("Summary") + and output.count("File name | data/capital-valid.csv") + and output.count(f"File size (bytes) | {file_size} ") + and output.count("Total Time Taken (sec) | ") + ) + + +def test_report_summary_validate_summary_invalid(): + report = validate("data/capital-invalid.csv") + output = report.to_summary() + file_size = 171 if IS_UNIX else 183 + assert ( + output.count("invalid") + and output.count("Summary") + and output.count("File name | data/capital-invalid.csv") + and output.count(f"File size (bytes) | {file_size} ") + and output.count("Total Time Taken (sec) |") + and output.count("Total Errors | 5 ") + and output.count("Duplicate Label (duplicate-label) | 1 ") + and output.count("Missing Cell (missing-cell) | 1 ") + and output.count("Blank Row (blank-row) | 1 ") + and output.count("Type Error (type-error) | 1 ") + and output.count("Extra Cell (extra-cell) | 1 ") + ) + + +def test_report_summary_validate_multiline_errors(): + report = validate("data/countries.csv") + output = report.to_summary() + with open("data/fixtures/summary/multiline-errors.txt", encoding="utf-8") as file: + expected = file.read() + assert output.count(expected.strip()) + + +def test_report_summary_partial_validation(): + report = validate("data/capital-invalid.csv", limit_errors=2) + output = report.to_summary() + assert ( + output.count("The document was partially validated because of one of the limits") + and output.count("limit errors") + and output.count("memory Limit") + and output.count("Rows Checked(Partial)** | 10") + ) diff --git a/tests/report/test_reporttask.py b/tests/report/test_reporttask.py new file mode 100644 index 0000000000..bd3e2f2726 --- /dev/null +++ b/tests/report/test_reporttask.py @@ -0,0 +1,78 @@ +from frictionless import validate, helpers + + +IS_UNIX = not helpers.is_platform("windows") + + +def test_report_reporttask_summary_valid(): + report = validate("data/capital-valid.csv") + output = report.tasks[0].to_summary() + file_size = 50 if IS_UNIX else 56 + assert ( + output.count("File name | data/capital-valid.csv") + and output.count(f"File size (bytes) | {file_size} ") + and output.count("Total Time Taken (sec) | ") + ) + + +def test_report_reporttask_summary_invalid(): + report = validate("data/capital-invalid.csv") + output = report.tasks[0].to_summary() + file_size = 171 if IS_UNIX else 183 + assert ( + output.count("File name | data/capital-invalid.csv") + and output.count(f"File size (bytes) | {file_size} ") + and output.count("Total Time Taken (sec) |") + and output.count("Total Errors | 5 ") + and output.count("Duplicate Label (duplicate-label) | 1 ") + and output.count("Missing Cell (missing-cell) | 1 ") + and output.count("Blank Row (blank-row) | 1 ") + and output.count("Type Error (type-error) | 1 ") + and output.count("Extra Cell (extra-cell) | 1 ") + ) + + +def test_report_reporttask_summary_filenotfound(): + report = validate("data/capital-invalids.csv") + output = report.tasks[0].to_summary() + assert ( + output.count("File name (Not Found) | data/capital-invalids.csv") + and output.count("File size | N/A") + and output.count("Total Time Taken (sec) ") + and output.count("Total Errors | 1") + and output.count("Scheme Error (scheme-error) | 1") + ) + + +def test_report_reporttask_summary_zippedfile(): + report = validate("data/table.csv.zip") + output = report.tasks[0].to_summary() + assert ( + output.count("File name | data/table.csv.zip => table.csv") + and output.count("File size | N/A") + and output.count("Total Time Taken (sec) |") + ) + + +def test_report_reporttask_summary_lastrowchecked(): + report = validate("data/capital-invalid.csv", limit_errors=2) + output = report.tasks[0].to_summary() + assert ( + output.count("Rows Checked(Partial)** | 10") + and output.count("Total Errors | 2") + and output.count("Duplicate Label (duplicate-label) | 1") + and output.count("Missing Cell (missing-cell) | 1") + ) + + +def test_report_reporttask_summary_errors_with_count(): + report = validate("data/capital-invalid.csv") + output = report.tasks[0].to_summary() + assert ( + output.count("Total Errors | 5 ") + and output.count("Duplicate Label (duplicate-label) | 1 ") + and output.count("Missing Cell (missing-cell) | 1 ") + and output.count("Blank Row (blank-row) | 1 ") + and output.count("Type Error (type-error) | 1 ") + and output.count("Extra Cell (extra-cell) | 1 ") + ) diff --git a/tests/resource/describe/test_general.py b/tests/resource/describe/test_general.py index e95e9e17c1..c87bc6b2cd 100644 --- a/tests/resource/describe/test_general.py +++ b/tests/resource/describe/test_general.py @@ -186,3 +186,16 @@ def test_describe_resource_with_json_format_issue_827(): def test_describe_resource_with_years_in_the_header_issue_825(): resource = Resource.describe("data/issue-825.csv") assert resource.schema.field_names == ["Musei", "2011", "2010"] + + +def test_describe_resource_schema_summary(): + resource = Resource.describe("data/countries.csv") + resource.infer() + output = resource.schema.to_summary() + assert ( + output.count("| name | type | required |") + and output.count("| id | integer | |") + and output.count("| neighbor_id | string | |") + and output.count("| name | string | |") + and output.count("| population | string | |") + ) diff --git a/tests/resource/test_general.py b/tests/resource/test_general.py index 365b22fa84..4229cd226c 100644 --- a/tests/resource/test_general.py +++ b/tests/resource/test_general.py @@ -549,3 +549,29 @@ def test_resource_pprint_1029(): 'path': 'data/table.csv', 'title': 'My Resource'}""" assert repr(resource) == expected + + +def test_resource_summary_valid_resource(): + resource = Resource("data/capital-valid.csv") + output = resource.to_view() + assert ( + output.count("| id | name |") + and output.count("| 1 | 'London' |") + and output.count("| 2 | 'Berlin' |") + and output.count("| 3 | 'Paris' |") + and output.count("| 4 | 'Madrid' |") + and output.count("| 5 | 'Rome' |") + ) + + +def test_resource_summary_invalid_resource(): + resource = Resource("data/countries.csv") + output = resource.to_view() + assert ( + output.count("| id | neighbor_id | name | population |") + and output.count("| 1 | 'Ireland' | 'Britain' | '67' |") + and output.count("| 2 | '3' | 'France' | 'n/a' |") + and output.count("| 3 | '22' | 'Germany' | '83' |") + and output.count("| 4 | None | 'Italy' | '60' |") + and output.count("| 5 | None | None | None |") + ) diff --git a/tests/schema/test_general.py b/tests/schema/test_general.py index d68d4a87f7..242289a0d9 100644 --- a/tests/schema/test_general.py +++ b/tests/schema/test_general.py @@ -337,3 +337,69 @@ def test_schema_not_supported_type_issue_goodatbles_304(): schema = Schema({"fields": [{"name": "name"}, {"name": "age", "type": "bad"}]}) assert schema.metadata_valid is False assert schema.fields[1] == {"name": "age", "type": "bad"} + + +def test_schema_summary(): + schema = Schema(DESCRIPTOR_MAX) + output = schema.to_summary() + assert ( + output.count("| name | type | required |") + and output.count("| id | string | True |") + and output.count("| height | number | |") + and output.count("| age | integer | |") + and output.count("| name | string | |") + ) + + +def test_schema_summary_without_required(): + descriptor = { + "fields": [ + {"name": "test_1", "type": "string", "format": "default"}, + {"name": "test_2", "type": "string", "format": "default"}, + {"name": "test_3", "type": "string", "format": "default"}, + ] + } + schema = Schema(descriptor) + output = schema.to_summary() + assert ( + output.count("| name | type | required |") + and output.count("| test_1 | string | |") + and output.count("| test_2 | string | |") + and output.count("| test_3 | string | |") + ) + + +def test_schema_summary_without_type_missing_for_some_fields(): + descriptor = { + "fields": [ + {"name": "id", "format": "default"}, + {"name": "name", "type": "string", "format": "default"}, + {"name": "age", "format": "default"}, + ] + } + schema = Schema(descriptor) + output = schema.to_summary() + assert ( + output.count("| name | type | required |") + and output.count("| id | any | |") + and output.count("| name | string | |") + and output.count("| age | any | |") + ) + + +def test_schema_summary_with_name_missing_for_some_fields(): + descriptor = { + "fields": [ + {"type": "int", "format": "default"}, + {"type": "int", "format": "default"}, + {"name": "name", "type": "string", "format": "default"}, + ] + } + schema = Schema(descriptor) + output = schema.to_summary() + assert ( + output.count("| name | type | required |") + and output.count("| int | int | |") + and output.count("| int | int | |") + and output.count("| name | string | |") + )