From 25e6ee4a98a06f11b83965c6787c0637efb83bef Mon Sep 17 00:00:00 2001
From: shashi gharti <shashi.gharti@gmail.com>
Date: Tue, 7 Jun 2022 17:57:00 +0545
Subject: [PATCH] Revised code

* Rearranged code and placed it in the relevant places "separation of concerns"
* Made changes to tests and added new tests
* Revised summary command code
---
 frictionless/program/summary.py         |  84 +-------------
 frictionless/report/report.py           | 146 +++++++++++-------------
 frictionless/schema/schema.py           |  10 ++
 tests/program/test_summary.py           |  30 ++---
 tests/report/test_report.py             |  36 ++++++
 tests/report/test_reporttask.py         |  81 -------------
 tests/resource/describe/test_general.py |  13 +++
 tests/resource/validate/test_general.py |  39 +++++++
 8 files changed, 184 insertions(+), 255 deletions(-)
 create mode 100644 tests/report/test_report.py
 delete mode 100644 tests/report/test_reporttask.py

diff --git a/frictionless/program/summary.py b/frictionless/program/summary.py
index 60c8a7399d..5f636edd13 100644
--- a/frictionless/program/summary.py
+++ b/frictionless/program/summary.py
@@ -1,9 +1,6 @@
 import typer
-from tabulate import tabulate
 from .main import program
 from . import common
-from .. import helpers
-from ..layout import Layout
 from ..resource import Resource
 
 
@@ -20,100 +17,29 @@ def program_summary(source: str = common.source):
         raise typer.Exit(1)
     # Infer Resource
     try:
-        resource = Resource(source, layout=Layout(limit_rows=5))
+        resource = Resource(source)
         resource.infer()
     except Exception as exception:
         typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
         raise typer.Exit(1)
-    # Describe data
-    content = [
-        [field.name, field.type, True if field.required else ""]
-        for field in resource.schema.fields
-    ]
     typer.secho("")
     typer.secho("# Describe ", bold=True)
     typer.secho("")
-    typer.secho(tabulate(content, headers=["name", "type", "required"], tablefmt="grid"))
-    # Extract data
-    try:
-        resource.extract()
-    except Exception as exception:
-        typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
-        raise typer.Exit(1)
+    typer.secho(str(resource.schema.to_summary()))
     typer.secho("")
     typer.secho("# Extract ", bold=True)
     typer.secho("")
-    typer.secho(resource.to_view())
-    # Validate data
+    typer.secho(str(resource.to_view()))
+    # Validate
     try:
         report = resource.validate()
     except Exception as exception:
         typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
         raise typer.Exit(1)
-    error_content = []
-    error_list = {}
     typer.secho("")
     typer.secho("# Validate ", bold=True)
     typer.secho("")
-    for task in report.tasks:
-        tabular = task.resource.profile == "tabular-data-resource"
-        prefix = "valid" if task.valid else "invalid"
-        suffix = "" if tabular else "(non-tabular)"
-        source = task.resource.path or task.resource.name
-        # for zipped resources append file name
-        if task.resource.innerpath:
-            source = f"{source} => {task.resource.innerpath}"
-        typer.secho(f"# {'-'*len(prefix)}", bold=True)
-        typer.secho(f"# {prefix}: {source} {suffix}", bold=True)
-        typer.secho(f"# {'-'*len(prefix)}", bold=True)
-        for error in report.tasks[0].errors:
-            error_content.append(
-                [
-                    error.get("rowPosition", ""),
-                    error.get("fieldPosition", ""),
-                    error.code,
-                    error.message,
-                ]
-            )
-            # error list for summary
-            error_title = f"{error.name} ({error.code})"
-            if error_title not in error_list:
-                error_list[error_title] = 0
-            error_list[error_title] += 1
-            if task.partial:
-                last_row_checked = error.get("rowPosition", "")
-    error_content = helpers.wrap_text_to_colwidths(error_content)
-    rows_checked = last_row_checked if task.partial else None
-    summary_content = helpers.validation_summary(
-        source,
-        basepath=task.resource.basepath,
-        time_taken=task.time,
-        rows_checked=rows_checked,
-        error_list=error_list,
-    )
-    typer.secho("")
-    typer.secho("## Summary ", bold=True)
-    typer.secho("")
-    typer.secho(
-        str(
-            tabulate(
-                summary_content,
-                headers=["Description", "Size/Name/Count"],
-                tablefmt="grid",
-            )
-        )
-    )
-    if len(error_content) > 0:
-        typer.secho("")
-        typer.secho("## Errors ", bold=True)
-        typer.secho("")
-        typer.secho(
-            tabulate(
-                error_content,
-                headers=["row", "field", "code", "message"],
-                tablefmt="grid",
-            )
-        )
+    typer.secho(str(report.to_summary()))
 
     # Return retcode
     raise typer.Exit(code=int(not report.valid))
diff --git a/frictionless/report/report.py b/frictionless/report/report.py
index 54c73967d9..2131011dc3 100644
--- a/frictionless/report/report.py
+++ b/frictionless/report/report.py
@@ -2,7 +2,6 @@
 from copy import deepcopy
 from importlib import import_module
 from tabulate import tabulate
-from ..layout import Layout
 from ..metadata import Metadata
 from ..errors import Error, TaskError, ReportError
 from ..exception import FrictionlessException
@@ -168,6 +167,75 @@ def wrapper(*args, **kwargs):
 
         return wrapper
 
+    # Summary
+
+    def to_summary(self):
+        validation_content = None
+        for task in self.tasks:
+            tabular = task.resource.profile == "tabular-data-resource"
+            prefix = "valid" if task.valid else "invalid"
+            suffix = "" if tabular else "(non-tabular)"
+            source = task.resource.path or task.resource.name
+            # for zipped resources append file name
+            if task.resource.innerpath:
+                source = f"{source} => {task.resource.innerpath}"
+            validation_content = f"\n# {'-'*len(prefix)}\n"
+            validation_content += f"\n# {prefix}: {source} {suffix}\n"
+            validation_content += f"\n# {'-'*len(prefix)}\n"
+            error_list = {}
+            error_content = []
+            for error in task.errors:
+                if error.code == "scheme-error":
+                    return error
+                error_content.append(
+                    [
+                        error.get("rowPosition", ""),
+                        error.get("fieldPosition", ""),
+                        error.code,
+                        error.message,
+                    ]
+                )
+                # error list for summary
+                error_title = f"{error.name} ({error.code})"
+                if error_title not in error_list:
+                    error_list[error_title] = 0
+                error_list[error_title] += 1
+                if task.partial:
+                    last_row_checked = error.get("rowPosition", "")
+            # Validate
+            error_content = helpers.wrap_text_to_colwidths(error_content)
+            rows_checked = last_row_checked if task.partial else None
+            summary_content = helpers.validation_summary(
+                task.resource.path,
+                basepath=task.resource.basepath,
+                time_taken=self.time,
+                rows_checked=rows_checked,
+                error_list=error_list,
+            )
+            validation_content += "\n\n"
+            validation_content += "## Summary "
+            validation_content += "\n\n"
+            validation_content += str(
+                tabulate(
+                    summary_content,
+                    headers=["Description", "Size/Name/Count"],
+                    tablefmt="grid",
+                )
+            )
+            if len(error_content) > 0:
+                validation_content += "\n\n"
+                validation_content += "## Errors "
+                validation_content += "\n\n"
+                validation_content += str(
+                    tabulate(
+                        error_content,
+                        headers=["row", "field", "code", "message"],
+                        tablefmt="grid",
+                    )
+                )
+
+        return validation_content
+
     # Metadata
 
     metadata_Error = ReportError
@@ -333,82 +401,6 @@ def flatten(self, spec=["rowPosition", "fieldPosition", "code"]):
             result.append([context.get(prop) for prop in spec])
         return result
 
-    # Summary
-
-    def to_summary(self) -> dict:
-        """Summary of the resource
-
-        Raises:
-            FrictionlessException: on any error
-        """
-        # Process errors
-        summary = {}
-        error_list = {}
-        error_content = []
-        for error in self.errors:
-            if error.code == "scheme-error":
-                return error
-            error_content.append(
-                [
-                    error.get("rowPosition", ""),
-                    error.get("fieldPosition", ""),
-                    error.code,
-                    error.message,
-                ]
-            )
-            # error list for summary
-            error_title = f"{error.name} ({error.code})"
-            if error_title not in error_list:
-                error_list[error_title] = 0
-            error_list[error_title] += 1
-            if self.partial:
-                last_row_checked = error.get("rowPosition", "")
-        # Describe
-        try:
-            self.resource.infer()
-        except Exception as exception:
-            raise FrictionlessException(self.__Error(note=str(exception))) from exception
-        summary["describe"] = tabulate(
-            [
-                [field.name, field.type, True if field.required else ""]
-                for field in self.resource.schema.fields
-            ],
-            headers=["name", "type", "required"],
-            tablefmt="grid",
-        )
-        # Extract
-        # Copy of existing resource to reset the properties to only extract 5 rows
-        resource = self.resource.to_copy(layout=Layout(limit_rows=5))
-        try:
-            resource.extract()
-        except Exception as exception:
-            raise FrictionlessException(self.__Error(note=str(exception))) from exception
-        summary["extract"] = resource.to_view()
-        # Validate
-        summary["validate"] = {}
-        error_content = helpers.wrap_text_to_colwidths(error_content)
-        rows_checked = last_row_checked if self.partial else None
-        summary_content = helpers.validation_summary(
-            self.resource.path,
-            basepath=self.resource.basepath,
-            time_taken=self.time,
-            rows_checked=rows_checked,
-            error_list=error_list,
-        )
-        summary["validate"]["summary"] = tabulate(
-            summary_content,
-            headers=["Description", "Size/Name/Count"],
-            tablefmt="grid",
-        )
-        if len(error_content) > 0:
-            summary["validate"]["errors"] = tabulate(
-                error_content,
-                headers=["row", "field", "code", "message"],
-                tablefmt="grid",
-            )
-
-        return summary
-
     # Metadata
 
     metadata_Error = ReportError
diff --git a/frictionless/schema/schema.py b/frictionless/schema/schema.py
index bf8f80ddf1..f21be9b40a 100644
--- a/frictionless/schema/schema.py
+++ b/frictionless/schema/schema.py
@@ -1,4 +1,5 @@
 from copy import copy, deepcopy
+from tabulate import tabulate
 from ..exception import FrictionlessException
 from ..metadata import Metadata
 from ..field import Field
@@ -289,6 +290,15 @@ def to_excel_template(self, path: str) -> any:
         )
         return tableschema_to_template.create_xlsx(self, path)
 
+    # Summary
+
+    def to_summary(self):
+        content = [
+            [field.name, field.type, True if field.required else ""]
+            for field in self.fields
+        ]
+        return tabulate(content, headers=["name", "type", "required"], tablefmt="grid")
+
     # Metadata
 
     metadata_duplicate = True
diff --git a/tests/program/test_summary.py b/tests/program/test_summary.py
index 719e137eb7..e3f2c296c3 100644
--- a/tests/program/test_summary.py
+++ b/tests/program/test_summary.py
@@ -1,8 +1,9 @@
 import os
 from typer.testing import CliRunner
-from frictionless import program
+from frictionless import program, helpers
 
 runner = CliRunner()
+IS_UNIX = not helpers.is_platform("windows")
 
 
 def test_program_error_not_found():
@@ -41,34 +42,24 @@ def test_program_summary_valid():
     )
 
 
-def test_program_summary_describe_header_row():
-    result = runner.invoke(program, "summary data/countries.csv")
-    assert result.exit_code == 1
-    assert result.stdout.count("| name        | type    | required   |")
-
-
 def test_program_summary_describe():
     result = runner.invoke(program, "summary data/countries.csv")
     assert result.exit_code == 1
     assert (
-        result.stdout.count("| id          | integer |            |")
+        result.stdout.count("| name        | type    | required   |")
+        and result.stdout.count("| id          | integer |            |")
         and result.stdout.count("| neighbor_id | string  |            |")
         and result.stdout.count("| name        | string  |            |")
         and result.stdout.count("| population  | string  |            |")
     )
 
 
-def test_program_summary_extract_header_row():
-    result = runner.invoke(program, "summary data/countries.csv")
-    assert result.exit_code == 1
-    assert result.stdout.count("| id | neighbor_id | name      | population |")
-
-
 def test_program_summary_extract():
     result = runner.invoke(program, "summary data/countries.csv")
     assert result.exit_code == 1
     assert (
-        result.stdout.count("|  1 | 'Ireland'   | 'Britain' | '67'       |")
+        result.stdout.count("| id | neighbor_id | name      | population |")
+        and result.stdout.count("|  1 | 'Ireland'   | 'Britain' | '67'       |")
         and result.stdout.count("|  2 | '3'         | 'France'  | 'n/a'      |")
         and result.stdout.count("|  3 | '22'        | 'Germany' | '83'       |")
         and result.stdout.count("|  4 | None        | 'Italy'   | '60'       |")
@@ -127,9 +118,12 @@ def test_program_summary_validate_errors():
 def test_program_summary_without_command(tmpdir):
     output_file_path = f"{tmpdir}/output.txt"
     exit_code = os.system(f"frictionless data/countries.csv > {output_file_path}")
-    # A value of 256 means the spawned program terminated with exit code 1
-    # https://stackoverflow.com/questions/47832180/os-system-returns-the-value-256-when-run-from-crontab
-    assert exit_code == 256
+    if IS_UNIX:
+        # A value of 256 means the spawned program terminated with exit code 1
+        # https://stackoverflow.com/questions/47832180/os-system-returns-the-value-256-when-run-from-crontab
+        assert exit_code == 256
+    else:
+        assert exit_code == 1
     with open(output_file_path, encoding="utf-8") as file:
         expected = file.read()
     assert (
diff --git a/tests/report/test_report.py b/tests/report/test_report.py
new file mode 100644
index 0000000000..c85a7d7a47
--- /dev/null
+++ b/tests/report/test_report.py
@@ -0,0 +1,36 @@
+from frictionless import validate
+
+
+def test_report_summary():
+    report = validate("data/countries.csv")
+    output = report.to_summary()
+    assert output.count("invalid") and output.count("Summary") and output.count("Errors")
+
+
+def test_report_summary_validate_summary():
+    report = validate("data/countries.csv")
+    output = report.to_summary()
+    assert (
+        output.count("File name                   |")
+        and output.count("File size (bytes)           | 143")
+        and output.count("Total Time Taken (sec)      |")
+        and output.count("Total Errors                | 4")
+        and output.count("Extra Cell (extra-cell)     | 1")
+        and output.count("Missing Cell (missing-cell) | 3")
+    )
+
+
+def test_report_summary_validate_errors():
+    report = validate("data/countries.csv")
+    output = report.to_summary()
+    with open("data/fixtures/program/summary/errors.txt", encoding="utf-8") as file:
+        expected = file.read()
+    assert output.count(expected.strip())
+
+
+def test_report_summary_valid():
+    report = validate("data/capital-valid.csv")
+    output = report.to_summary()
+    assert (
+        output.count("valid") and output.count("Summary") and not output.count("Errors")
+    )
diff --git a/tests/report/test_reporttask.py b/tests/report/test_reporttask.py
deleted file mode 100644
index 344377236f..0000000000
--- a/tests/report/test_reporttask.py
+++ /dev/null
@@ -1,81 +0,0 @@
-from frictionless import validate
-
-
-def test_report_task_error_not_found():
-    report = validate("data/countriess.csv")
-    assert (
-        report.tasks[0].errors[0].code == "scheme-error"
-        and report.tasks[0].errors[0].description
-        == "Data reading error because of incorrect scheme."
-    )
-
-
-def test_report_task_summary():
-    report = validate("data/countries.csv")
-    assert report.valid is False and ["describe", "extract", "validate"] == list(
-        report.tasks[0].to_summary().keys()
-    )
-
-
-def test_report_task_summary_valid():
-    report = validate("data/capital-valid.csv")
-    assert report.valid is True and ["summary"] == list(
-        report.tasks[0].to_summary()["validate"].keys()
-    )
-
-
-def test_report_task_summary_describe():
-    report = validate("data/countries.csv")
-    output = report.tasks[0].to_summary()["describe"]
-    assert (
-        output.count("| id          | integer |            |")
-        and output.count("| neighbor_id | string  |            |")
-        and output.count("| name        | string  |            |")
-        and output.count("| population  | string  |            |")
-    )
-
-
-def test_report_task_summary_extract():
-    report = validate("data/countries.csv")
-    output = report.tasks[0].to_summary()["extract"]
-    assert (
-        output.count("|  1 | 'Ireland'   | 'Britain' | '67'       |")
-        and output.count("|  2 | '3'         | 'France'  | 'n/a'      |")
-        and output.count("|  3 | '22'        | 'Germany' | '83'       |")
-        and output.count("|  4 | None        | 'Italy'   | '60'       |")
-        and output.count("|  5 | None        | None      | None       |")
-    )
-
-
-def test_report_task_summary_extract_only_5_rows():
-    report = validate("data/long.csv")
-    output = report.tasks[0].to_summary()["extract"]
-    assert (
-        output.count("|  1 | 'a'  |")
-        and output.count("|  2 | 'b'  |")
-        and output.count("|  3 | 'c'  |")
-        and output.count("|  4 | 'd'  |")
-        and output.count("|  5 | 'e'  |")
-        and not output.count("|  6 | 'f'  |")
-    )
-
-
-def test_report_task_summary_validate_summary():
-    report = validate("data/countries.csv")
-    validate_summary = report.tasks[0].to_summary()["validate"]["summary"]
-    assert (
-        validate_summary.count("File name                   |")
-        and validate_summary.count("File size (bytes)           | 143")
-        and validate_summary.count("Total Time Taken (sec)      |")
-        and validate_summary.count("Total Errors                | 4")
-        and validate_summary.count("Extra Cell (extra-cell)     | 1")
-        and validate_summary.count("Missing Cell (missing-cell) | 3")
-    )
-
-
-def test_report_task_summary_validate_errors():
-    report = validate("data/countries.csv")
-    output_file_path = "data/fixtures/program/summary/errors.txt"
-    with open(output_file_path, encoding="utf-8") as file:
-        expected = file.read()
-    assert report.tasks[0].to_summary()["validate"]["errors"] == expected.strip()
diff --git a/tests/resource/describe/test_general.py b/tests/resource/describe/test_general.py
index e95e9e17c1..c87bc6b2cd 100644
--- a/tests/resource/describe/test_general.py
+++ b/tests/resource/describe/test_general.py
@@ -186,3 +186,16 @@ def test_describe_resource_with_json_format_issue_827():
 def test_describe_resource_with_years_in_the_header_issue_825():
     resource = Resource.describe("data/issue-825.csv")
     assert resource.schema.field_names == ["Musei", "2011", "2010"]
+
+
+def test_describe_resource_schema_summary():
+    resource = Resource.describe("data/countries.csv")
+    resource.infer()
+    output = resource.schema.to_summary()
+    assert (
+        output.count("| name        | type    | required   |")
+        and output.count("| id          | integer |            |")
+        and output.count("| neighbor_id | string  |            |")
+        and output.count("| name        | string  |            |")
+        and output.count("| population  | string  |            |")
+    )
diff --git a/tests/resource/validate/test_general.py b/tests/resource/validate/test_general.py
index c683eae3db..155a608929 100644
--- a/tests/resource/validate/test_general.py
+++ b/tests/resource/validate/test_general.py
@@ -559,3 +559,42 @@ def test_validate_resource_errors_with_fields_993():
             'The data resource has an error: "fields" should be set as "resource.schema.fields" (not "resource.fields").',
         ]
     ]
+
+
+def test_validate_resource_summary_invalid():
+    resource = Resource("data/countries.csv")
+    report = resource.validate()
+    output = report.to_summary()
+    assert output.count("valid") and output.count("Summary") and output.count("Errors")
+
+
+def test_validate_resource_validate_summary():
+    resource = Resource("data/countries.csv")
+    report = resource.validate()
+    output = report.to_summary()
+    assert (
+        output.count("File name                   |")
+        and output.count("File size (bytes)           | 143")
+        and output.count("Total Time Taken (sec)      |")
+        and output.count("Total Errors                | 4")
+        and output.count("Extra Cell (extra-cell)     | 1")
+        and output.count("Missing Cell (missing-cell) | 3")
+    )
+
+
+def test_validate_resource_validate_errors():
+    resource = Resource("data/countries.csv")
+    report = resource.validate()
+    output = report.to_summary()
+    with open("data/fixtures/program/summary/errors.txt", encoding="utf-8") as file:
+        expected = file.read()
+    assert output.count(expected.strip())
+
+
+def test_validate_resource_summary_valid():
+    resource = Resource("data/capital-valid.csv")
+    report = resource.validate()
+    output = report.to_summary()
+    assert (
+        output.count("valid") and output.count("Summary") and not output.count("Errors")
+    )