Revised code

* Rearranged code and placed it in the relevant places "separation of concerns" * Made changes to tests and added new tests * Revised summary command code
frictionlessdata · Jun 7, 2022 · 25e6ee4 · 25e6ee4
1 parent cde50b5
commit 25e6ee4
Show file tree

Hide file tree

Showing 8 changed files with 184 additions and 255 deletions.
diff --git a/frictionless/program/summary.py b/frictionless/program/summary.py
@@ -1,9 +1,6 @@
 import typer
-from tabulate import tabulate
 from .main import program
 from . import common
-from .. import helpers
-from ..layout import Layout
 from ..resource import Resource
 
 
@@ -20,100 +17,29 @@ def program_summary(source: str = common.source):
         raise typer.Exit(1)
     # Infer Resource
     try:
-        resource = Resource(source, layout=Layout(limit_rows=5))
+        resource = Resource(source)
         resource.infer()
     except Exception as exception:
         typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
         raise typer.Exit(1)
-    # Describe data
-    content = [
-        [field.name, field.type, True if field.required else ""]
-        for field in resource.schema.fields
-    ]
     typer.secho("")
     typer.secho("# Describe ", bold=True)
     typer.secho("")
-    typer.secho(tabulate(content, headers=["name", "type", "required"], tablefmt="grid"))
-    # Extract data
-    try:
-        resource.extract()
-    except Exception as exception:
-        typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
-        raise typer.Exit(1)
+    typer.secho(str(resource.schema.to_summary()))
     typer.secho("")
     typer.secho("# Extract ", bold=True)
     typer.secho("")
-    typer.secho(resource.to_view())
-    # Validate data
+    typer.secho(str(resource.to_view()))
+    # Validate
     try:
         report = resource.validate()
     except Exception as exception:
         typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
         raise typer.Exit(1)
-    error_content = []
-    error_list = {}
     typer.secho("")
     typer.secho("# Validate ", bold=True)
     typer.secho("")
-    for task in report.tasks:
-        tabular = task.resource.profile == "tabular-data-resource"
-        prefix = "valid" if task.valid else "invalid"
-        suffix = "" if tabular else "(non-tabular)"
-        source = task.resource.path or task.resource.name
-        # for zipped resources append file name
-        if task.resource.innerpath:
-            source = f"{source} => {task.resource.innerpath}"
-        typer.secho(f"# {'-'*len(prefix)}", bold=True)
-        typer.secho(f"# {prefix}: {source} {suffix}", bold=True)
-        typer.secho(f"# {'-'*len(prefix)}", bold=True)
-        for error in report.tasks[0].errors:
-            error_content.append(
-                [
-                    error.get("rowPosition", ""),
-                    error.get("fieldPosition", ""),
-                    error.code,
-                    error.message,
-                ]
-            )
-            # error list for summary
-            error_title = f"{error.name} ({error.code})"
-            if error_title not in error_list:
-                error_list[error_title] = 0
-            error_list[error_title] += 1
-            if task.partial:
-                last_row_checked = error.get("rowPosition", "")
-    error_content = helpers.wrap_text_to_colwidths(error_content)
-    rows_checked = last_row_checked if task.partial else None
-    summary_content = helpers.validation_summary(
-        source,
-        basepath=task.resource.basepath,
-        time_taken=task.time,
-        rows_checked=rows_checked,
-        error_list=error_list,
-    )
-    typer.secho("")
-    typer.secho("## Summary ", bold=True)
-    typer.secho("")
-    typer.secho(
-        str(
-            tabulate(
-                summary_content,
-                headers=["Description", "Size/Name/Count"],
-                tablefmt="grid",
-            )
-        )
-    )
-    if len(error_content) > 0:
-        typer.secho("")
-        typer.secho("## Errors ", bold=True)
-        typer.secho("")
-        typer.secho(
-            tabulate(
-                error_content,
-                headers=["row", "field", "code", "message"],
-                tablefmt="grid",
-            )
-        )
+    typer.secho(str(report.to_summary()))
 
     # Return retcode
     raise typer.Exit(code=int(not report.valid))
diff --git a/frictionless/report/report.py b/frictionless/report/report.py
@@ -2,7 +2,6 @@
 from copy import deepcopy
 from importlib import import_module
 from tabulate import tabulate
-from ..layout import Layout
 from ..metadata import Metadata
 from ..errors import Error, TaskError, ReportError
 from ..exception import FrictionlessException
@@ -168,6 +167,75 @@ def wrapper(*args, **kwargs):
 
         return wrapper
 
+    # Summary
+
+    def to_summary(self):
+        validation_content = None
+        for task in self.tasks:
+            tabular = task.resource.profile == "tabular-data-resource"
+            prefix = "valid" if task.valid else "invalid"
+            suffix = "" if tabular else "(non-tabular)"
+            source = task.resource.path or task.resource.name
+            # for zipped resources append file name
+            if task.resource.innerpath:
+                source = f"{source} => {task.resource.innerpath}"
+            validation_content = f"\n# {'-'*len(prefix)}\n"
+            validation_content += f"\n# {prefix}: {source} {suffix}\n"
+            validation_content += f"\n# {'-'*len(prefix)}\n"
+            error_list = {}
+            error_content = []
+            for error in task.errors:
+                if error.code == "scheme-error":
+                    return error
+                error_content.append(
+                    [
+                        error.get("rowPosition", ""),
+                        error.get("fieldPosition", ""),
+                        error.code,
+                        error.message,
+                    ]
+                )
+                # error list for summary
+                error_title = f"{error.name} ({error.code})"
+                if error_title not in error_list:
+                    error_list[error_title] = 0
+                error_list[error_title] += 1
+                if task.partial:
+                    last_row_checked = error.get("rowPosition", "")
+            # Validate
+            error_content = helpers.wrap_text_to_colwidths(error_content)
+            rows_checked = last_row_checked if task.partial else None
+            summary_content = helpers.validation_summary(
+                task.resource.path,
+                basepath=task.resource.basepath,
+                time_taken=self.time,
+                rows_checked=rows_checked,
+                error_list=error_list,
+            )
+            validation_content += "\n\n"
+            validation_content += "## Summary "
+            validation_content += "\n\n"
+            validation_content += str(
+                tabulate(
+                    summary_content,
+                    headers=["Description", "Size/Name/Count"],
+                    tablefmt="grid",
+                )
+            )
+            if len(error_content) > 0:
+                validation_content += "\n\n"
+                validation_content += "## Errors "
+                validation_content += "\n\n"
+                validation_content += str(
+                    tabulate(
+                        error_content,
+                        headers=["row", "field", "code", "message"],
+                        tablefmt="grid",
+                    )
+                )
+
+        return validation_content
+
     # Metadata
 
     metadata_Error = ReportError
@@ -333,82 +401,6 @@ def flatten(self, spec=["rowPosition", "fieldPosition", "code"]):
             result.append([context.get(prop) for prop in spec])
         return result
 
-    # Summary
-
-    def to_summary(self) -> dict:
-        """Summary of the resource
-
-        Raises:
-            FrictionlessException: on any error
-        """
-        # Process errors
-        summary = {}
-        error_list = {}
-        error_content = []
-        for error in self.errors:
-            if error.code == "scheme-error":
-                return error
-            error_content.append(
-                [
-                    error.get("rowPosition", ""),
-                    error.get("fieldPosition", ""),
-                    error.code,
-                    error.message,
-                ]
-            )
-            # error list for summary
-            error_title = f"{error.name} ({error.code})"
-            if error_title not in error_list:
-                error_list[error_title] = 0
-            error_list[error_title] += 1
-            if self.partial:
-                last_row_checked = error.get("rowPosition", "")
-        # Describe
-        try:
-            self.resource.infer()
-        except Exception as exception:
-            raise FrictionlessException(self.__Error(note=str(exception))) from exception
-        summary["describe"] = tabulate(
-            [
-                [field.name, field.type, True if field.required else ""]
-                for field in self.resource.schema.fields
-            ],
-            headers=["name", "type", "required"],
-            tablefmt="grid",
-        )
-        # Extract
-        # Copy of existing resource to reset the properties to only extract 5 rows
-        resource = self.resource.to_copy(layout=Layout(limit_rows=5))
-        try:
-            resource.extract()
-        except Exception as exception:
-            raise FrictionlessException(self.__Error(note=str(exception))) from exception
-        summary["extract"] = resource.to_view()
-        # Validate
-        summary["validate"] = {}
-        error_content = helpers.wrap_text_to_colwidths(error_content)
-        rows_checked = last_row_checked if self.partial else None
-        summary_content = helpers.validation_summary(
-            self.resource.path,
-            basepath=self.resource.basepath,
-            time_taken=self.time,
-            rows_checked=rows_checked,
-            error_list=error_list,
-        )
-        summary["validate"]["summary"] = tabulate(
-            summary_content,
-            headers=["Description", "Size/Name/Count"],
-            tablefmt="grid",
-        )
-        if len(error_content) > 0:
-            summary["validate"]["errors"] = tabulate(
-                error_content,
-                headers=["row", "field", "code", "message"],
-                tablefmt="grid",
-            )
-
-        return summary
-
     # Metadata
 
     metadata_Error = ReportError

diff --git a/frictionless/schema/schema.py b/frictionless/schema/schema.py
@@ -1,4 +1,5 @@
 from copy import copy, deepcopy
+from tabulate import tabulate
 from ..exception import FrictionlessException
 from ..metadata import Metadata
 from ..field import Field
@@ -289,6 +290,15 @@ def to_excel_template(self, path: str) -> any:
         )
         return tableschema_to_template.create_xlsx(self, path)
 
+    # Summary
+
+    def to_summary(self):
+        content = [
+            [field.name, field.type, True if field.required else ""]
+            for field in self.fields
+        ]
+        return tabulate(content, headers=["name", "type", "required"], tablefmt="grid")
+
     # Metadata
 
     metadata_duplicate = True

diff --git a/tests/program/test_summary.py b/tests/program/test_summary.py
@@ -1,8 +1,9 @@
 import os
 from typer.testing import CliRunner
-from frictionless import program
+from frictionless import program, helpers
 
 runner = CliRunner()
+IS_UNIX = not helpers.is_platform("windows")
 
 
 def test_program_error_not_found():
@@ -41,34 +42,24 @@ def test_program_summary_valid():
     )
 
 
-def test_program_summary_describe_header_row():
-    result = runner.invoke(program, "summary data/countries.csv")
-    assert result.exit_code == 1
-    assert result.stdout.count("| name        | type    | required   |")
-
-
 def test_program_summary_describe():
     result = runner.invoke(program, "summary data/countries.csv")
     assert result.exit_code == 1
     assert (
-        result.stdout.count("| id          | integer |            |")
+        result.stdout.count("| name        | type    | required   |")
+        and result.stdout.count("| id          | integer |            |")
         and result.stdout.count("| neighbor_id | string  |            |")
         and result.stdout.count("| name        | string  |            |")
         and result.stdout.count("| population  | string  |            |")
     )
 
 
-def test_program_summary_extract_header_row():
-    result = runner.invoke(program, "summary data/countries.csv")
-    assert result.exit_code == 1
-    assert result.stdout.count("| id | neighbor_id | name      | population |")
-
-
 def test_program_summary_extract():
     result = runner.invoke(program, "summary data/countries.csv")
     assert result.exit_code == 1
     assert (
-        result.stdout.count("|  1 | 'Ireland'   | 'Britain' | '67'       |")
+        result.stdout.count("| id | neighbor_id | name      | population |")
+        and result.stdout.count("|  1 | 'Ireland'   | 'Britain' | '67'       |")
         and result.stdout.count("|  2 | '3'         | 'France'  | 'n/a'      |")
         and result.stdout.count("|  3 | '22'        | 'Germany' | '83'       |")
         and result.stdout.count("|  4 | None        | 'Italy'   | '60'       |")
@@ -127,9 +118,12 @@ def test_program_summary_validate_errors():
 def test_program_summary_without_command(tmpdir):
     output_file_path = f"{tmpdir}/output.txt"
     exit_code = os.system(f"frictionless data/countries.csv > {output_file_path}")
-    # A value of 256 means the spawned program terminated with exit code 1
-    # https://stackoverflow.com/questions/47832180/os-system-returns-the-value-256-when-run-from-crontab
-    assert exit_code == 256
+    if IS_UNIX:
+        # A value of 256 means the spawned program terminated with exit code 1
+        # https://stackoverflow.com/questions/47832180/os-system-returns-the-value-256-when-run-from-crontab
+        assert exit_code == 256
+    else:
+        assert exit_code == 1
     with open(output_file_path, encoding="utf-8") as file:
         expected = file.read()
     assert (