fix: row limits & row count labels are confusing

Currently, in explore & dashboard, we usually apply a row limit on the query we issue against the database. Following this, some visualization backend code does some post-processing on data frames using pandas, typically pivoting some things, which affects the result set's row count in intricate capacities. Now from a UX standpoint the user is exposed with: - row limits in the control panels - row count in various areas of the UI where visualiztion, preview, samples and raw results are shown Currently we show the rowcount that's from row-processing. So maybe a hard limit was applied at 1000 rows on the database, but we pivot and it goes does to say 532, and we show the user that we haven't hit the limit when we actually did. Also note that the component that shows rowcount is supposed to turn red if limit is hit, letting the user know they are looking at truncated data.
apache · Mar 28, 2024 · acc8596 · acc8596
1 parent 546d48a
commit acc8596
Show file tree

Hide file tree

Showing 7 changed files with 14 additions and 1 deletion.
diff --git a/superset-frontend/src/explore/components/ChartPills.tsx b/superset-frontend/src/explore/components/ChartPills.tsx
@@ -66,7 +66,7 @@ export const ChartPills = forwardRef(
         >
           {!isLoading && firstQueryResponse && (
             <RowCountLabel
-              rowcount={Number(firstQueryResponse.rowcount) || 0}
+              rowcount={Number(firstQueryResponse.sql_rowcount) || 0}
               limit={Number(rowLimit) || 0}
             />
           )}

diff --git a/superset/common/query_actions.py b/superset/common/query_actions.py
@@ -135,6 +135,8 @@ def _get_full(
             "data": payload.get("data"),
             "colnames": payload.get("colnames"),
             "coltypes": payload.get("coltypes"),
+            "rowcount": payload.get("rowcount"),
+            "sql_rowcount": payload.get("sql_rowcount"),
         }
     return payload
 

diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py
@@ -194,6 +194,7 @@ def get_df_payload(
             "status": cache.status,
             "stacktrace": cache.stacktrace,
             "rowcount": len(cache.df.index),
+            "sql_rowcount": cache.sql_rowcount,
             "from_dttm": query_obj.from_dttm,
             "to_dttm": query_obj.to_dttm,
             "label_map": label_map,

diff --git a/superset/common/utils/query_cache_manager.py b/superset/common/utils/query_cache_manager.py
@@ -64,6 +64,7 @@ def __init__(
         is_cached: bool | None = None,
         cache_dttm: str | None = None,
         cache_value: dict[str, Any] | None = None,
+        sql_rowcount: int | None = None,
     ) -> None:
         self.df = df
         self.query = query
@@ -79,6 +80,7 @@ def __init__(
         self.is_cached = is_cached
         self.cache_dttm = cache_dttm
         self.cache_value = cache_value
+        self.sql_rowcount = sql_rowcount
 
     # pylint: disable=too-many-arguments
     def set_query_result(
@@ -102,6 +104,7 @@ def set_query_result(
             self.rejected_filter_columns = query_result.rejected_filter_columns
             self.error_message = query_result.error_message
             self.df = query_result.df
+            self.sql_rowcount = query_result.sql_rowcount
             self.annotation_data = {} if annotation_data is None else annotation_data
 
             if self.status != QueryStatus.FAILED:
@@ -117,6 +120,7 @@ def set_query_result(
                 "applied_filter_columns": self.applied_filter_columns,
                 "rejected_filter_columns": self.rejected_filter_columns,
                 "annotation_data": self.annotation_data,
+                "sql_rowcount": self.sql_rowcount,
             }
             if self.is_loaded and key and self.status != QueryStatus.FAILED:
                 self.set(
@@ -167,6 +171,7 @@ def get(
                 query_cache.status = QueryStatus.SUCCESS
                 query_cache.is_loaded = True
                 query_cache.is_cached = cache_value is not None
+                query_cache.sql_rowcount = cache_value.get("sql_rowcount", None)
                 query_cache.cache_dttm = (
                     cache_value["dttm"] if cache_value is not None else None
                 )

diff --git a/superset/models/helpers.py b/superset/models/helpers.py
@@ -584,6 +584,7 @@ def __init__(  # pylint: disable=too-many-arguments
         self.errors = errors or []
         self.from_dttm = from_dttm
         self.to_dttm = to_dttm
+        self.sql_rowcount = len(self.df.index) if not self.df.empty else 0
 
 
 class ExtraJSONMixin:

diff --git a/superset/views/core.py b/superset/views/core.py
@@ -167,6 +167,8 @@ def get_raw_results(self, viz_obj: BaseViz) -> FlaskResponse:
                 "data": payload["df"].to_dict("records"),
                 "colnames": payload.get("colnames"),
                 "coltypes": payload.get("coltypes"),
+                "rowcount": payload.get("rowcount"),
+                "sql_rowcount": payload.get("sql_rowcount"),
             },
         )
 

diff --git a/superset/viz.py b/superset/viz.py
@@ -262,6 +262,8 @@ def get_samples(self) -> dict[str, Any]:
             "data": payload["df"].to_dict(orient="records"),
             "colnames": payload.get("colnames"),
             "coltypes": payload.get("coltypes"),
+            "rowcount": payload.get("rowcount"),
+            "sql_rowcount": payload.get("sql_rowcount"),
         }
 
     @deprecated(deprecated_in="3.0")