Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(reports): xlsx attachments #19810

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
3 changes: 3 additions & 0 deletions superset-frontend/src/components/ReportModal/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ function ReportModal({
<StyledRadio value={NOTIFICATION_FORMATS.CSV}>
{t('Formatted CSV attached in email')}
</StyledRadio>
<StyledRadio value={NOTIFICATION_FORMATS.XLSX}>
{t('Formatted Excel attached in email')}
</StyledRadio>
</StyledRadioGroup>
</div>
</>
Expand Down
4 changes: 4 additions & 0 deletions superset-frontend/src/features/alerts/AlertReportModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ const TRANSLATIONS = {
CHART_TEXT: t('Chart'),
SEND_AS_PNG_TEXT: t('Send as PNG'),
SEND_AS_CSV_TEXT: t('Send as CSV'),
SEND_AS_XLSX: t('Send as XLSX'),
SEND_AS_TEXT: t('Send as text'),
IGNORE_CACHE_TEXT: t('Ignore cache when generating screenshot'),
NOTIFICATION_METHOD_TEXT: t('Notification method'),
Expand Down Expand Up @@ -1462,6 +1463,9 @@ const AlertReportModal: FunctionComponent<AlertReportModalProps> = ({
<StyledRadio value="CSV">
{TRANSLATIONS.SEND_AS_CSV_TEXT}
</StyledRadio>
<StyledRadio value="XLSX">
{TRANSLATIONS.SEND_AS_XLSX}
</StyledRadio>
{TEXT_BASED_VISUALIZATION_TYPES.includes(chartVizType) && (
<StyledRadio value="TEXT">
{TRANSLATIONS.SEND_AS_TEXT}
Expand Down
1 change: 1 addition & 0 deletions superset-frontend/src/reports/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ export enum NOTIFICATION_FORMATS {
TEXT = 'TEXT',
PNG = 'PNG',
CSV = 'CSV',
XLSX = 'XLSX',
}
export interface ReportObject {
id?: number;
Expand Down
32 changes: 27 additions & 5 deletions superset/reports/commands/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import json
import logging
from datetime import datetime, timedelta
from io import BytesIO
from typing import Any, Optional, Union
from uuid import UUID

Expand Down Expand Up @@ -158,6 +159,7 @@ def _get_url(
if self._report_schedule.chart:
if result_format in {
ChartDataResultFormat.CSV,
ChartDataResultFormat.XLSX,
ChartDataResultFormat.JSON,
}:
return get_url_path(
Expand Down Expand Up @@ -330,13 +332,25 @@ def _get_log_data(self) -> HeaderDataType:
}
return log_data

def _get_xlsx_data(self) -> bytes:
csv_data = self._get_csv_data()

df = pd.read_csv(BytesIO(csv_data))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Retrieving df from csv data it's not the good approach. We need to write a common method for csv and excel where we provide df param like get_data(self, df: pd.DataFrame) in superset/common/query_context_processor.py.
@dpgaspar Am I correct?
image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a bit confused about this part. We are getting df in order to convert it to excel.

I mean if we already have df we can just convert to excel so what would the common method achieve? Could you provide more details?

bio = BytesIO()

# pylint: disable=abstract-class-instantiated
with pd.ExcelWriter(bio, engine="openpyxl") as writer:
df.to_excel(writer, index=False)
cemremengu marked this conversation as resolved.
Show resolved Hide resolved

return bio.getvalue()

def _get_notification_content(self) -> NotificationContent:
"""
Gets a notification content, this is composed by a title and a screenshot

:raises: ReportScheduleScreenshotFailedError
"""
csv_data = None
data = None
embedded_data = None
error_text = None
screenshot_data = []
Expand All @@ -352,11 +366,18 @@ def _get_notification_content(self) -> NotificationContent:
error_text = "Unexpected missing screenshot"
elif (
self._report_schedule.chart
and self._report_schedule.report_format == ReportDataFormat.DATA
and self._report_schedule.report_format == ReportDataFormat.CSV
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add table_like method for ReportDataFormat and use the next condition:

if self._report_schedule.report_format in ReportDataFormat.table_like():

For ReportDataFormat:

@classmethod
def table_like(cls) -> Set["ReportDataFormat"]:
    return {cls.CSV} | {cls.XLSX}

):
csv_data = self._get_csv_data()
if not csv_data:
data = self._get_csv_data()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename self._get_csv_data() to self._get_data() in order to use CSV and Excel logic in one method.
I think it's better to use self._get_data() instead of self._get_csv_data() and self._get_xlsx_data() because we don't write the boilerplate code. If we want to add a data format for example XML, we do it in one self._get_data().
I think we can use get_chart_dataframe() func for CSV and Excel. This func returns df which we can use it in excel.df_to_excel().
I want to reuse the Chart Export func instead of having a lot of the same code for chart & report data exporting.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to create one def get_data(self, df: pd.DataFrame) method for chart and report?
It will be better to maintain the code in the future.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@EugeneTorap If you go this route (use _get_data instead of _get_csv_data then you lose the postprocessing that takes place in

elif query["result_format"] == ChartDataResultFormat.CSV:
buf = StringIO()
processed_df.to_csv(buf)
buf.seek(0)
query["data"] = buf.getvalue()

if not data:
error_text = "Unexpected missing csv file"
elif (
self._report_schedule.chart
and self._report_schedule.report_format == ReportDataFormat.XLSX
):
data = self._get_xlsx_data()
if not data:
error_text = "Unexpected missing csv data for xlsx"
if error_text:
return NotificationContent(
name=self._report_schedule.name,
Expand Down Expand Up @@ -386,7 +407,8 @@ def _get_notification_content(self) -> NotificationContent:
url=url,
screenshots=screenshot_data,
description=self._report_schedule.description,
csv=csv_data,
data=data,
data_format=self._report_schedule.report_format,
embedded_data=embedded_data,
header_data=header_data,
)
Expand Down
3 changes: 2 additions & 1 deletion superset/reports/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ class ReportState(str, enum.Enum):

class ReportDataFormat(str, enum.Enum):
VISUALIZATION = "PNG"
DATA = "CSV"
CSV = "CSV"
XLSX = "XLSX"
TEXT = "TEXT"


Expand Down
3 changes: 2 additions & 1 deletion superset/reports/notifications/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@
class NotificationContent:
name: str
header_data: HeaderDataType # this is optional to account for error states
csv: Optional[bytes] = None # bytes for csv file
data: Optional[bytes] = None # bytes for data attachment
screenshots: Optional[list[bytes]] = None # bytes for a list of screenshots
data_format: Optional[str] = None # data attachment format (csv, xlsx, etc)
text: Optional[str] = None
description: Optional[str] = ""
url: Optional[str] = None # url to chart/dashboard for this screenshot
Expand Down
14 changes: 10 additions & 4 deletions superset/reports/notifications/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _get_content(self) -> EmailContent:
return EmailContent(body=self._error_template(self._content.text))
# Get the domain from the 'From' address ..
# and make a message id without the < > in the end
csv_data = None
attachment = None
domain = self._get_smtp_domain()
images = {}

Expand Down Expand Up @@ -166,12 +166,18 @@ def _get_content(self) -> EmailContent:
"""
)

if self._content.csv:
csv_data = {__("%(name)s.csv", name=self._content.name): self._content.csv}
if self._content.data and self._content.data_format:
attachment = {
__(
"%(name)s.%(format)s",
name=self._content.name,
format=self._content.data_format.lower(),
): self._content.data
}
return EmailContent(
body=body,
images=images,
data=csv_data,
data=attachment,
header_data=self._content.header_data,
)

Expand Down
7 changes: 3 additions & 4 deletions superset/reports/notifications/slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ def _get_body(self) -> str:
return self._message_template(table)

def _get_inline_files(self) -> Sequence[Union[str, IOBase, bytes]]:
if self._content.csv:
return [self._content.csv]
if self._content.data:
return [self._content.data]
if self._content.screenshots:
return self._content.screenshots
return []
Expand All @@ -162,7 +162,6 @@ def send(self) -> None:
title = self._content.name
channel = self._get_channel()
body = self._get_body()
file_type = "csv" if self._content.csv else "png"
try:
token = app.config["SLACK_API_TOKEN"]
if callable(token):
Expand All @@ -176,7 +175,7 @@ def send(self) -> None:
file=file,
initial_comment=body,
title=title,
filetype=file_type,
filetype=self._content.data_format,
)
else:
client.chat_postMessage(channel=channel, text=body)
Expand Down
2 changes: 1 addition & 1 deletion superset/utils/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import pandas as pd


def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any:
def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> bytes:
output = io.BytesIO()
# pylint: disable=abstract-class-instantiated
with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
Expand Down
50 changes: 47 additions & 3 deletions tests/integration_tests/reports/commands_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,21 @@ def create_report_email_chart_with_csv():
report_schedule = create_report_notification(
email_target="[email protected]",
chart=chart,
report_format=ReportDataFormat.DATA,
report_format=ReportDataFormat.CSV,
)
yield report_schedule
cleanup_report_schedule(report_schedule)


@pytest.fixture()
def create_report_email_chart_with_xlsx():
with app.app_context():
chart = db.session.query(Slice).first()
chart.query_context = '{"mock": "query_context"}'
report_schedule = create_report_notification(
email_target="[email protected]",
chart=chart,
report_format=ReportDataFormat.XLSX,
)
yield report_schedule
cleanup_report_schedule(report_schedule)
Expand Down Expand Up @@ -233,13 +247,28 @@ def create_report_email_chart_with_csv_no_query_context():
report_schedule = create_report_notification(
email_target="[email protected]",
chart=chart,
report_format=ReportDataFormat.DATA,
report_format=ReportDataFormat.CSV,
name="report_csv_no_query_context",
)
yield report_schedule
cleanup_report_schedule(report_schedule)


@pytest.fixture()
def create_report_email_chart_with_xlsx_no_query_context():
with app.app_context():
chart = db.session.query(Slice).first()
chart.query_context = None
report_schedule = create_report_notification(
email_target="[email protected]",
chart=chart,
report_format=ReportDataFormat.XLSX,
name="report_xlsx_no_query_context",
)
yield report_schedule
cleanup_report_schedule(report_schedule)


@pytest.fixture()
def create_report_email_dashboard():
with app.app_context():
Expand Down Expand Up @@ -284,7 +313,22 @@ def create_report_slack_chart_with_csv():
report_schedule = create_report_notification(
slack_channel="slack_channel",
chart=chart,
report_format=ReportDataFormat.DATA,
report_format=ReportDataFormat.CSV,
)
yield report_schedule

cleanup_report_schedule(report_schedule)


@pytest.fixture()
def create_report_slack_chart_with_xlsx():
with app.app_context():
chart = db.session.query(Slice).first()
chart.query_context = '{"mock": "query_context"}'
report_schedule = create_report_notification(
slack_channel="slack_channel",
chart=chart,
report_format=ReportDataFormat.XLSX,
)
yield report_schedule

Expand Down