Skip to content

Commit

Permalink
fix: row limits & row count labels are confusing
Browse files Browse the repository at this point in the history
Currently, in explore & dashboard, we usually apply a row limit
on the query we issue against the database.

Following this, some visualization backend code does some
post-processing on data frames using pandas, typically pivoting
some things, which affects the result set's row count in
intricate capacities.

Now from a UX standpoint the user is exposed with:
- row limits in the control panels
- row count in various areas of the UI where visualiztion, preview,
  samples and raw results are shown

Currently we show the rowcount that's from row-processing. So maybe
a hard limit was applied at 1000 rows on the database, but we
pivot and it goes does to say 532, and we show the user that
we haven't hit the limit when we actually did.

Also note that the component that shows rowcount is supposed
to turn red if limit is hit, letting the user know they are looking at
truncated data.
  • Loading branch information
mistercrunch committed Mar 28, 2024
1 parent 546d48a commit acc8596
Show file tree
Hide file tree
Showing 7 changed files with 14 additions and 1 deletion.
2 changes: 1 addition & 1 deletion superset-frontend/src/explore/components/ChartPills.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ export const ChartPills = forwardRef(
>
{!isLoading && firstQueryResponse && (
<RowCountLabel
rowcount={Number(firstQueryResponse.rowcount) || 0}
rowcount={Number(firstQueryResponse.sql_rowcount) || 0}
limit={Number(rowLimit) || 0}
/>
)}
Expand Down
2 changes: 2 additions & 0 deletions superset/common/query_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ def _get_full(
"data": payload.get("data"),
"colnames": payload.get("colnames"),
"coltypes": payload.get("coltypes"),
"rowcount": payload.get("rowcount"),
"sql_rowcount": payload.get("sql_rowcount"),
}
return payload

Expand Down
1 change: 1 addition & 0 deletions superset/common/query_context_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ def get_df_payload(
"status": cache.status,
"stacktrace": cache.stacktrace,
"rowcount": len(cache.df.index),
"sql_rowcount": cache.sql_rowcount,
"from_dttm": query_obj.from_dttm,
"to_dttm": query_obj.to_dttm,
"label_map": label_map,
Expand Down
5 changes: 5 additions & 0 deletions superset/common/utils/query_cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(
is_cached: bool | None = None,
cache_dttm: str | None = None,
cache_value: dict[str, Any] | None = None,
sql_rowcount: int | None = None,
) -> None:
self.df = df
self.query = query
Expand All @@ -79,6 +80,7 @@ def __init__(
self.is_cached = is_cached
self.cache_dttm = cache_dttm
self.cache_value = cache_value
self.sql_rowcount = sql_rowcount

# pylint: disable=too-many-arguments
def set_query_result(
Expand All @@ -102,6 +104,7 @@ def set_query_result(
self.rejected_filter_columns = query_result.rejected_filter_columns
self.error_message = query_result.error_message
self.df = query_result.df
self.sql_rowcount = query_result.sql_rowcount
self.annotation_data = {} if annotation_data is None else annotation_data

if self.status != QueryStatus.FAILED:
Expand All @@ -117,6 +120,7 @@ def set_query_result(
"applied_filter_columns": self.applied_filter_columns,
"rejected_filter_columns": self.rejected_filter_columns,
"annotation_data": self.annotation_data,
"sql_rowcount": self.sql_rowcount,
}
if self.is_loaded and key and self.status != QueryStatus.FAILED:
self.set(
Expand Down Expand Up @@ -167,6 +171,7 @@ def get(
query_cache.status = QueryStatus.SUCCESS
query_cache.is_loaded = True
query_cache.is_cached = cache_value is not None
query_cache.sql_rowcount = cache_value.get("sql_rowcount", None)
query_cache.cache_dttm = (
cache_value["dttm"] if cache_value is not None else None
)
Expand Down
1 change: 1 addition & 0 deletions superset/models/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ def __init__( # pylint: disable=too-many-arguments
self.errors = errors or []
self.from_dttm = from_dttm
self.to_dttm = to_dttm
self.sql_rowcount = len(self.df.index) if not self.df.empty else 0


class ExtraJSONMixin:
Expand Down
2 changes: 2 additions & 0 deletions superset/views/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ def get_raw_results(self, viz_obj: BaseViz) -> FlaskResponse:
"data": payload["df"].to_dict("records"),
"colnames": payload.get("colnames"),
"coltypes": payload.get("coltypes"),
"rowcount": payload.get("rowcount"),
"sql_rowcount": payload.get("sql_rowcount"),
},
)

Expand Down
2 changes: 2 additions & 0 deletions superset/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,8 @@ def get_samples(self) -> dict[str, Any]:
"data": payload["df"].to_dict(orient="records"),
"colnames": payload.get("colnames"),
"coltypes": payload.get("coltypes"),
"rowcount": payload.get("rowcount"),
"sql_rowcount": payload.get("sql_rowcount"),
}

@deprecated(deprecated_in="3.0")
Expand Down

0 comments on commit acc8596

Please sign in to comment.