Skip to content

Commit

Permalink
Revised code
Browse files Browse the repository at this point in the history
* Rearranged code and placed it in the relevant places "separation of concerns"
* Made changes to tests and added new tests
* Revised summary command code
  • Loading branch information
shashigharti committed Jun 7, 2022
1 parent cde50b5 commit 25e6ee4
Show file tree
Hide file tree
Showing 8 changed files with 184 additions and 255 deletions.
84 changes: 5 additions & 79 deletions frictionless/program/summary.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import typer
from tabulate import tabulate
from .main import program
from . import common
from .. import helpers
from ..layout import Layout
from ..resource import Resource


Expand All @@ -20,100 +17,29 @@ def program_summary(source: str = common.source):
raise typer.Exit(1)
# Infer Resource
try:
resource = Resource(source, layout=Layout(limit_rows=5))
resource = Resource(source)
resource.infer()
except Exception as exception:
typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
raise typer.Exit(1)
# Describe data
content = [
[field.name, field.type, True if field.required else ""]
for field in resource.schema.fields
]
typer.secho("")
typer.secho("# Describe ", bold=True)
typer.secho("")
typer.secho(tabulate(content, headers=["name", "type", "required"], tablefmt="grid"))
# Extract data
try:
resource.extract()
except Exception as exception:
typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
raise typer.Exit(1)
typer.secho(str(resource.schema.to_summary()))
typer.secho("")
typer.secho("# Extract ", bold=True)
typer.secho("")
typer.secho(resource.to_view())
# Validate data
typer.secho(str(resource.to_view()))
# Validate
try:
report = resource.validate()
except Exception as exception:
typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
raise typer.Exit(1)
error_content = []
error_list = {}
typer.secho("")
typer.secho("# Validate ", bold=True)
typer.secho("")
for task in report.tasks:
tabular = task.resource.profile == "tabular-data-resource"
prefix = "valid" if task.valid else "invalid"
suffix = "" if tabular else "(non-tabular)"
source = task.resource.path or task.resource.name
# for zipped resources append file name
if task.resource.innerpath:
source = f"{source} => {task.resource.innerpath}"
typer.secho(f"# {'-'*len(prefix)}", bold=True)
typer.secho(f"# {prefix}: {source} {suffix}", bold=True)
typer.secho(f"# {'-'*len(prefix)}", bold=True)
for error in report.tasks[0].errors:
error_content.append(
[
error.get("rowPosition", ""),
error.get("fieldPosition", ""),
error.code,
error.message,
]
)
# error list for summary
error_title = f"{error.name} ({error.code})"
if error_title not in error_list:
error_list[error_title] = 0
error_list[error_title] += 1
if task.partial:
last_row_checked = error.get("rowPosition", "")
error_content = helpers.wrap_text_to_colwidths(error_content)
rows_checked = last_row_checked if task.partial else None
summary_content = helpers.validation_summary(
source,
basepath=task.resource.basepath,
time_taken=task.time,
rows_checked=rows_checked,
error_list=error_list,
)
typer.secho("")
typer.secho("## Summary ", bold=True)
typer.secho("")
typer.secho(
str(
tabulate(
summary_content,
headers=["Description", "Size/Name/Count"],
tablefmt="grid",
)
)
)
if len(error_content) > 0:
typer.secho("")
typer.secho("## Errors ", bold=True)
typer.secho("")
typer.secho(
tabulate(
error_content,
headers=["row", "field", "code", "message"],
tablefmt="grid",
)
)
typer.secho(str(report.to_summary()))

# Return retcode
raise typer.Exit(code=int(not report.valid))
146 changes: 69 additions & 77 deletions frictionless/report/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from copy import deepcopy
from importlib import import_module
from tabulate import tabulate
from ..layout import Layout
from ..metadata import Metadata
from ..errors import Error, TaskError, ReportError
from ..exception import FrictionlessException
Expand Down Expand Up @@ -168,6 +167,75 @@ def wrapper(*args, **kwargs):

return wrapper

# Summary

def to_summary(self):
validation_content = None
for task in self.tasks:
tabular = task.resource.profile == "tabular-data-resource"
prefix = "valid" if task.valid else "invalid"
suffix = "" if tabular else "(non-tabular)"
source = task.resource.path or task.resource.name
# for zipped resources append file name
if task.resource.innerpath:
source = f"{source} => {task.resource.innerpath}"
validation_content = f"\n# {'-'*len(prefix)}\n"
validation_content += f"\n# {prefix}: {source} {suffix}\n"
validation_content += f"\n# {'-'*len(prefix)}\n"
error_list = {}
error_content = []
for error in task.errors:
if error.code == "scheme-error":
return error
error_content.append(
[
error.get("rowPosition", ""),
error.get("fieldPosition", ""),
error.code,
error.message,
]
)
# error list for summary
error_title = f"{error.name} ({error.code})"
if error_title not in error_list:
error_list[error_title] = 0
error_list[error_title] += 1
if task.partial:
last_row_checked = error.get("rowPosition", "")
# Validate
error_content = helpers.wrap_text_to_colwidths(error_content)
rows_checked = last_row_checked if task.partial else None
summary_content = helpers.validation_summary(
task.resource.path,
basepath=task.resource.basepath,
time_taken=self.time,
rows_checked=rows_checked,
error_list=error_list,
)
validation_content += "\n\n"
validation_content += "## Summary "
validation_content += "\n\n"
validation_content += str(
tabulate(
summary_content,
headers=["Description", "Size/Name/Count"],
tablefmt="grid",
)
)
if len(error_content) > 0:
validation_content += "\n\n"
validation_content += "## Errors "
validation_content += "\n\n"
validation_content += str(
tabulate(
error_content,
headers=["row", "field", "code", "message"],
tablefmt="grid",
)
)

return validation_content

# Metadata

metadata_Error = ReportError
Expand Down Expand Up @@ -333,82 +401,6 @@ def flatten(self, spec=["rowPosition", "fieldPosition", "code"]):
result.append([context.get(prop) for prop in spec])
return result

# Summary

def to_summary(self) -> dict:
"""Summary of the resource
Raises:
FrictionlessException: on any error
"""
# Process errors
summary = {}
error_list = {}
error_content = []
for error in self.errors:
if error.code == "scheme-error":
return error
error_content.append(
[
error.get("rowPosition", ""),
error.get("fieldPosition", ""),
error.code,
error.message,
]
)
# error list for summary
error_title = f"{error.name} ({error.code})"
if error_title not in error_list:
error_list[error_title] = 0
error_list[error_title] += 1
if self.partial:
last_row_checked = error.get("rowPosition", "")
# Describe
try:
self.resource.infer()
except Exception as exception:
raise FrictionlessException(self.__Error(note=str(exception))) from exception
summary["describe"] = tabulate(
[
[field.name, field.type, True if field.required else ""]
for field in self.resource.schema.fields
],
headers=["name", "type", "required"],
tablefmt="grid",
)
# Extract
# Copy of existing resource to reset the properties to only extract 5 rows
resource = self.resource.to_copy(layout=Layout(limit_rows=5))
try:
resource.extract()
except Exception as exception:
raise FrictionlessException(self.__Error(note=str(exception))) from exception
summary["extract"] = resource.to_view()
# Validate
summary["validate"] = {}
error_content = helpers.wrap_text_to_colwidths(error_content)
rows_checked = last_row_checked if self.partial else None
summary_content = helpers.validation_summary(
self.resource.path,
basepath=self.resource.basepath,
time_taken=self.time,
rows_checked=rows_checked,
error_list=error_list,
)
summary["validate"]["summary"] = tabulate(
summary_content,
headers=["Description", "Size/Name/Count"],
tablefmt="grid",
)
if len(error_content) > 0:
summary["validate"]["errors"] = tabulate(
error_content,
headers=["row", "field", "code", "message"],
tablefmt="grid",
)

return summary

# Metadata

metadata_Error = ReportError
Expand Down
10 changes: 10 additions & 0 deletions frictionless/schema/schema.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from copy import copy, deepcopy
from tabulate import tabulate
from ..exception import FrictionlessException
from ..metadata import Metadata
from ..field import Field
Expand Down Expand Up @@ -289,6 +290,15 @@ def to_excel_template(self, path: str) -> any:
)
return tableschema_to_template.create_xlsx(self, path)

# Summary

def to_summary(self):
content = [
[field.name, field.type, True if field.required else ""]
for field in self.fields
]
return tabulate(content, headers=["name", "type", "required"], tablefmt="grid")

# Metadata

metadata_duplicate = True
Expand Down
30 changes: 12 additions & 18 deletions tests/program/test_summary.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os
from typer.testing import CliRunner
from frictionless import program
from frictionless import program, helpers

runner = CliRunner()
IS_UNIX = not helpers.is_platform("windows")


def test_program_error_not_found():
Expand Down Expand Up @@ -41,34 +42,24 @@ def test_program_summary_valid():
)


def test_program_summary_describe_header_row():
result = runner.invoke(program, "summary data/countries.csv")
assert result.exit_code == 1
assert result.stdout.count("| name | type | required |")


def test_program_summary_describe():
result = runner.invoke(program, "summary data/countries.csv")
assert result.exit_code == 1
assert (
result.stdout.count("| id | integer | |")
result.stdout.count("| name | type | required |")
and result.stdout.count("| id | integer | |")
and result.stdout.count("| neighbor_id | string | |")
and result.stdout.count("| name | string | |")
and result.stdout.count("| population | string | |")
)


def test_program_summary_extract_header_row():
result = runner.invoke(program, "summary data/countries.csv")
assert result.exit_code == 1
assert result.stdout.count("| id | neighbor_id | name | population |")


def test_program_summary_extract():
result = runner.invoke(program, "summary data/countries.csv")
assert result.exit_code == 1
assert (
result.stdout.count("| 1 | 'Ireland' | 'Britain' | '67' |")
result.stdout.count("| id | neighbor_id | name | population |")
and result.stdout.count("| 1 | 'Ireland' | 'Britain' | '67' |")
and result.stdout.count("| 2 | '3' | 'France' | 'n/a' |")
and result.stdout.count("| 3 | '22' | 'Germany' | '83' |")
and result.stdout.count("| 4 | None | 'Italy' | '60' |")
Expand Down Expand Up @@ -127,9 +118,12 @@ def test_program_summary_validate_errors():
def test_program_summary_without_command(tmpdir):
output_file_path = f"{tmpdir}/output.txt"
exit_code = os.system(f"frictionless data/countries.csv > {output_file_path}")
# A value of 256 means the spawned program terminated with exit code 1
# https://stackoverflow.com/questions/47832180/os-system-returns-the-value-256-when-run-from-crontab
assert exit_code == 256
if IS_UNIX:
# A value of 256 means the spawned program terminated with exit code 1
# https://stackoverflow.com/questions/47832180/os-system-returns-the-value-256-when-run-from-crontab
assert exit_code == 256
else:
assert exit_code == 1
with open(output_file_path, encoding="utf-8") as file:
expected = file.read()
assert (
Expand Down
Loading

0 comments on commit 25e6ee4

Please sign in to comment.