frictionlessdata · roll · Jun 10, 2022 · Jun 1, 2022 · Jun 7, 2022 · Jun 7, 2022
diff --git a/data/fixtures/cli/long-error-messages-976.txt b/data/fixtures/cli/long-error-messages-976.txt
@@ -1,33 +1,20 @@
-# -------
-# invalid: test-tabulator 
-# -------
+## Errors 
 
-
-# Summary 
-
-Description                      Size/Name/Count
--------------------------------  -----------------
-File name (Not Found)            test-tabulator
-File size                        N/A
-Total Time Taken (sec)           
-Total Errors                     1
-Resource Error (resource-error)  1
-
-# Errors 
-
-row    field    code       message
------  -------  ---------  -------------------------------------------------
-                resource-  The data resource has an error: "{'format':
-                error      'inline', 'hashing': 'md5', 'name': 'test-
-                           tabulator', 'profile': 'tabular-data-resource',
-                           'resources': [{'name': 'first-resource', 'path':
-                           'table.xls', 'schema': {'fields': [{'name': 'id',
-                           'type': 'number'}, {'name': 'name', 'type':
-                           'string'}]}}, {'name': 'number-two', 'path':
-                           'table-reverse.csv', 'schema': {'fields':
-                           [{'name': 'id', 'type': 'integer'}, {'name':
-                           'name', 'type': 'string'}]}}], 'scheme': '',
-                           'stats': {'bytes': 0, 'fields': 0, 'hash': '',
-                           'rows': 0}} is not valid under any of the given
-                           schemas" at "" in metadata and at "oneOf" in
-                           profile
++-------+---------+-----------+---------------------------------------------------+
+| row   | field   | code      | message                                           |
++=======+=========+===========+===================================================+
+|       |         | resource- | The data resource has an error: "{'format':       |
+|       |         | error     | 'inline', 'hashing': 'md5', 'name': 'test-        |
+|       |         |           | tabulator', 'profile': 'tabular-data-resource',   |
+|       |         |           | 'resources': [{'name': 'first-resource', 'path':  |
+|       |         |           | 'table.xls', 'schema': {'fields': [{'name': 'id', |
+|       |         |           | 'type': 'number'}, {'name': 'name', 'type':       |
+|       |         |           | 'string'}]}}, {'name': 'number-two', 'path':      |
+|       |         |           | 'table-reverse.csv', 'schema': {'fields':         |
+|       |         |           | [{'name': 'id', 'type': 'integer'}, {'name':      |
+|       |         |           | 'name', 'type': 'string'}]}}], 'scheme': '',      |
+|       |         |           | 'stats': {'bytes': 0, 'fields': 0, 'hash': '',    |
+|       |         |           | 'rows': 0}} is not valid under any of the given   |
+|       |         |           | schemas" at "" in metadata and at "oneOf" in      |
+|       |         |           | profile                                           |
++-------+---------+-----------+---------------------------------------------------+
diff --git a/data/fixtures/cli/zipped-resources-979.txt b/data/fixtures/cli/zipped-resources-979.txt
@@ -1,44 +1,8 @@
-# -----
-# valid: ogd10_energieforschungstatistik_ch.csv 
-# -----
-
-# Summary 
-
-Description             Size/Name/Count
-----------------------  --------------------------------------
-File name               ogd10_energieforschungstatistik_ch.csv
-File size (KB)          88541
-Total Time Taken (sec)  
-# -------
-# invalid: ogd10_catalogs.zip => capital-invalid.csv 
-# -------
-
-
-# Summary 
-
-Description                  Size/Name/Count
----------------------------  -----------------------------------------
-File name (Not Found)        ogd10_catalogs.zip => capital-invalid.csv
-File size                    N/A
-Total Time Taken (sec)       
-Total Errors                 1
-Schema Error (schema-error)  1
-
-# Errors 
-
-row    field    code     message
------  -------  -------  -------------------------------------------------
-                schema-  Schema is not valid: Schemas with duplicate field
-                error    names are not supported
-
-# -----
-# valid: ogd10_catalogs.zip => finanzquellen.csv 
-# -----
-
-# Summary 
-
-Description             Size/Name/Count
-----------------------  ---------------------------------------
-File name (Not Found)   ogd10_catalogs.zip => finanzquellen.csv
-File size               N/A
-Total Time Taken (sec)  
+## Errors 
+
++-------+---------+---------+---------------------------------------------------+
+| row   | field   | code    | message                                           |
++=======+=========+=========+===================================================+
+|       |         | schema- | Schema is not valid: Schemas with duplicate field |
+|       |         | error   | names are not supported                           |
++-------+---------+---------+---------------------------------------------------+
diff --git a/data/fixtures/summary/multiline-errors.txt b/data/fixtures/summary/multiline-errors.txt
@@ -0,0 +1,15 @@
++-------+---------+------------+----------------------------------------------------+
+|   row |   field | code       | message                                            |
++=======+=========+============+====================================================+
+|     4 |       5 | extra-cell | Row at position "4" has an extra value in field at |
+|       |         |            | position "5"                                       |
++-------+---------+------------+----------------------------------------------------+
+|     7 |       2 | missing-   | Row at position "7" has a missing cell in field    |
+|       |         | cell       | "neighbor_id" at position "2"                      |
++-------+---------+------------+----------------------------------------------------+
+|     7 |       3 | missing-   | Row at position "7" has a missing cell in field    |
+|       |         | cell       | "name" at position "3"                             |
++-------+---------+------------+----------------------------------------------------+
+|     7 |       4 | missing-   | Row at position "7" has a missing cell in field    |
+|       |         | cell       | "population" at position "4"                       |
++-------+---------+------------+----------------------------------------------------+
diff --git a/data/fixtures/summary/multiline-scheme-error.txt b/data/fixtures/summary/multiline-scheme-error.txt
@@ -0,0 +1,9 @@
+## Errors 
+
++-------+---------+---------+---------------------------------------------------+
+| row   | field   | code    | message                                           |
++=======+=========+=========+===================================================+
+|       |         | scheme- | The data source could not be successfully loaded: |
+|       |         | error   | [Errno 2] No such file or directory:              |
+|       |         |         | 'data/countriess.csv'                             |
++-------+---------+---------+---------------------------------------------------+
diff --git a/frictionless/program/__init__.py b/frictionless/program/__init__.py
@@ -1,6 +1,7 @@
 from .api import program_api
 from .describe import program_describe
 from .extract import program_extract
+from .summary import program_summary
 from .main import program, program_main
 from .transform import program_transform
 from .validate import program_validate
diff --git a/frictionless/program/main.py b/frictionless/program/main.py
@@ -1,11 +1,22 @@
+import sys
 import typer
 from typing import Optional
 from .. import settings
 
 
 # Program
 
-program = typer.Typer()
+
+# TODO: remove this hack when Typer supports not-found commands catching
+# https://github.com/tiangolo/typer/issues/18
+class Program(typer.Typer):
+    def __call__(self, *args, **kwargs):
+        if sys.argv[1].count("."):
+            sys.argv = [sys.argv[0], "summary", sys.argv[1]]
+        return super().__call__(*args, **kwargs)
+
+
+program = Program()
 
 
 # Helpers

diff --git a/frictionless/program/summary.py b/frictionless/program/summary.py
@@ -0,0 +1,44 @@
+import typer
+from .main import program
+from . import common
+from ..resource import Resource
+
+
+@program.command(name="summary")
+def program_summary(source: str = common.source):
+    """Summary of data source.
+
+    It will return schema, sample of the data and validation report for the resource.
+    """
+    # Validate input
+    if not source:
+        message = 'Providing "source" is required'
+        typer.secho(message, err=True, fg=typer.colors.RED, bold=True)
+        raise typer.Exit(1)
+    # Infer Resource
+    try:
+        resource = Resource(source)
+        resource.infer()
+    except Exception as exception:
+        typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
+        raise typer.Exit(1)
+    typer.secho("")
+    typer.secho("# Describe ", bold=True)
+    typer.secho("")
+    typer.secho(str(resource.schema.to_summary()))
+    typer.secho("")
+    typer.secho("# Extract ", bold=True)
+    typer.secho("")
+    typer.secho(str(resource.to_view()))
+    # Validate
+    try:
+        report = resource.validate()
+    except Exception as exception:
+        typer.secho(str(exception), err=True, fg=typer.colors.RED, bold=True)
+        raise typer.Exit(1)
+    typer.secho("")
+    typer.secho("# Validate ", bold=True)
+    typer.secho(str(report.to_summary()))
+
+    # Return retcode
+    raise typer.Exit(code=int(not report.valid))
diff --git a/frictionless/program/validate.py b/frictionless/program/validate.py
@@ -1,7 +1,5 @@
 import sys
-import os
 import typer
-import textwrap
 from typing import List
 from tabulate import tabulate
 from ..actions import validate
@@ -217,7 +215,7 @@ def program_validate(
         typer.secho(content)
         raise typer.Exit()
 
-    # Return report
+    # Return validation report errors
     if report.errors:
         content = []
         if is_stdin:
@@ -232,136 +230,8 @@ def program_validate(
             str(tabulate(content, headers=["code", "message"], tablefmt="simple"))
         )
 
-    # Return tables
-    prev_invalid = False
-    for number, task in enumerate(report.tasks, start=1):
-        tabular = task.resource.profile == "tabular-data-resource"
-        if number != 1 and prev_invalid:
-            typer.secho("")
-        prefix = "valid" if task.valid else "invalid"
-        suffix = "" if tabular else "(non-tabular)"
-        source = task.resource.path or task.resource.name
-        # for zipped resources append file name
-        if task.resource.innerpath:
-            source = f"{source} => {task.resource.innerpath}"
-        if is_stdin:
-            source = "stdin"
-        typer.secho(f"# {'-'*len(prefix)}", bold=True)
-        typer.secho(f"# {prefix}: {source} {suffix}", bold=True)
-        typer.secho(f"# {'-'*len(prefix)}", bold=True)
-        error_list = {}
-        if task.errors:
-            prev_invalid = True
-            typer.secho("")
-            content = []
-            for error in task.errors:
-                content.append(
-                    [
-                        error.get("rowPosition", ""),
-                        error.get("fieldPosition", ""),
-                        error.code,
-                        error.message,
-                    ]
-                )
-                # error list for summary
-                error_title = f"{error.name} ({error.code})"
-                if error_title not in error_list:
-                    error_list[error_title] = 0
-                error_list[error_title] += 1
-                if task.partial:
-                    last_row_checked = error.get("rowPosition", "")
-            content = _wrap_text_to_colwidths(content)
-        # summary
-        rows_checked = last_row_checked if task.partial else None
-        summary_content = _validation_summary(
-            source,
-            basepath=task.resource.basepath,
-            time_taken=task.time,
-            rows_checked=rows_checked,
-            error_list=error_list,
-        )
-        typer.echo("\n# Summary \n")
-        if task.partial:
-            typer.echo(
-                "The document was partially validated because of one of the limits"
-            )
-            typer.echo("* limit errors")
-            typer.echo("* memory Limit \n")
-        typer.secho(
-            str(
-                tabulate(
-                    summary_content,
-                    headers=["Description", "Size/Name/Count"],
-                    tablefmt="simple",
-                )
-            )
-        )
-        # errors
-        if task.errors:
-            typer.echo("\n# Errors \n")
-            typer.secho(
-                str(
-                    tabulate(
-                        content,
-                        headers=["row", "field", "code", "message"],
-                        tablefmt="simple",
-                    )
-                )
-            )
+    # Return validation report summary and tables
+    typer.secho(str(report.to_summary()))
 
     # Return retcode
     raise typer.Exit(code=int(not report.valid))
-
-
-# TODO:This is a temporary function to use with tabulate as
-# tabulate 0.8.9 does not support text wrap
-def _wrap_text_to_colwidths(
-    list_of_lists: List, colwidths: List = [5, 5, 10, 50]
-) -> List:
-    """Create new list with wrapped text with different column width.
-    Args:
-        list_of_lists (List): List of lines
-        colwidths (List): width for each column
-
-    Returns:
-        List: list of lines with wrapped text
-
-    """
-    result = []
-    for row in list_of_lists:
-        new_row = []
-        for cell, width in zip(row, colwidths):
-            cell = str(cell)
-            wrapped = textwrap.wrap(cell, width=width)
-            new_row.append("\n".join(wrapped))
-        result.append(new_row)
-    return result
-
-
-def _validation_summary(
-    source: str,
-    time_taken: str,
-    basepath: str = None,
-    rows_checked: int = None,
-    error_list: List = None,
-) -> List:
-    """Generate summary for validation task"""
-    file_path = os.path.join(basepath, source) if basepath else source
-    file_size = "N/A"
-    unit = None
-    if os.path.exists(file_path):
-        file_size = os.path.getsize(file_path)
-        unit = helpers.format_bytes(file_size)
-    content = [
-        [f"File name { '' if unit else '(Not Found)' }", source],
-        [f"File size { f'({unit})' if unit else '' }", file_size],
-        ["Total Time Taken (sec)", time_taken],
-    ]
-    if rows_checked:
-        content.append(["Rows Checked(Partial)**", rows_checked])
-    if error_list:
-        content.append(["Total Errors", sum(error_list.values())])
-    for code, count in error_list.items():
-        content.append([code, count])
-
-    return content