diff --git a/pyproject.toml b/pyproject.toml index 8466f9fa..91d6e363 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ dependencies = [ "databricks-labs-blueprint[yaml]>=0.4.2", "databricks-sdk>=0.38.0,<1.0.0", - "sqlglot>=22.3.1" + "sqlglot>=22.3.1,<26.30.0" ] [project.urls] diff --git a/src/databricks/labs/lsql/dashboards.py b/src/databricks/labs/lsql/dashboards.py index 931bb701..0ae33242 100644 --- a/src/databricks/labs/lsql/dashboards.py +++ b/src/databricks/labs/lsql/dashboards.py @@ -453,6 +453,7 @@ class Tile: _content: str = "" """The contents of the tile file. Hidden attribute that functions as a cache to read contents once.""" + # Note: fields of _position are always filled. _position: Position = dataclasses.field(default_factory=lambda: Position(0, 0, 0, 0)) """The position of the tile in the dashboard. Hidden stateful attribute updated by the tiling logic.""" @@ -495,13 +496,19 @@ def place_after(self, position: Position) -> None: - `position.width < _MAXIMUM_DASHBOARD_WIDTH` : tiles in a single row should have the same size - `position.width == _MAXIMUM_DASHBOARD_WIDTH` : any height """ - x = position.x + position.width - if x + self.position.width > _MAXIMUM_DASHBOARD_WIDTH: + if position.x is None or position.y is None or position.width is None or position.height is None: + logger.warning(f"Position unusable for placement: {position}") + return + x: int = position.x + position.width + my_position = self.position + assert my_position.width is not None + y: int + if x + my_position.width > _MAXIMUM_DASHBOARD_WIDTH: x = 0 y = position.y + position.height else: y = position.y - self._position = dataclasses.replace(self.position, x=x, y=y) + self._position = dataclasses.replace(my_position, x=x, y=y) @classmethod def from_tile_metadata(cls, tile_metadata: TileMetadata) -> "Tile": @@ -744,12 +751,19 @@ def _get_query_layouts(self) -> Iterable[Layout]: spec = self._get_query_widget_spec(fields, frame=frame) widget = Widget(name=f"{self.metadata.id}_widget", queries=[named_query], spec=spec) widget = self._merge_widget_with_overrides(widget) - height = self.position.height - if len(self.metadata.filters) > 0 and self.position.width > 0: - height -= self._FILTER_HEIGHT * math.ceil(len(self.metadata.filters) / self.position.width) + my_position = self.position + assert ( + my_position.width is not None + and my_position.height is not None + and my_position.x is not None + and my_position.y is not None + ) + height = my_position.height + if len(self.metadata.filters) > 0 and my_position.width > 0: + height -= self._FILTER_HEIGHT * math.ceil(len(self.metadata.filters) / my_position.width) height = max(height, 0) - y = self.position.y + self.position.height - height - position = dataclasses.replace(self.position, y=y, height=height) + y = my_position.y + my_position.height - height + position = dataclasses.replace(my_position, y=y, height=height) layout = Layout(widget=widget, position=position) yield layout @@ -783,24 +797,31 @@ def _get_filter_positions(self) -> Iterable[Position]: ii) occupy an additional row if the previous one is filled completely. """ filters_size = len(self.metadata.filters) * self._FILTER_HEIGHT - if filters_size > self.position.width * (self.position.height - 1): # At least one row for the query widget + my_position = self.position + assert ( + my_position.width is not None + and my_position.height is not None + and my_position.x is not None + and my_position.y is not None + ) + if filters_size > my_position.width * (my_position.height - 1): # At least one row for the query widget raise ValueError(f"Too many filters defined for {self}") # The bottom row requires bookkeeping to adjust the filters width to fill it completely - bottom_row_index = len(self.metadata.filters) // self.position.width - bottom_row_filter_count = len(self.metadata.filters) % self.position.width or self.position.width - bottom_row_filter_width = self.position.width // bottom_row_filter_count - bottom_row_remainder_width = self.position.width - bottom_row_filter_width * bottom_row_filter_count + bottom_row_index = len(self.metadata.filters) // my_position.width + bottom_row_filter_count = len(self.metadata.filters) % my_position.width or my_position.width + bottom_row_filter_width = my_position.width // bottom_row_filter_count + bottom_row_remainder_width = my_position.width - bottom_row_filter_width * bottom_row_filter_count for filter_index in range(len(self.metadata.filters)): - if filter_index % self.position.width == 0: + if filter_index % my_position.width == 0: x_offset = 0 # Reset on new row - x = self.position.x + x_offset - y = self.position.y + self._FILTER_HEIGHT * (filter_index // self.position.width) + x = my_position.x + x_offset + y = my_position.y + self._FILTER_HEIGHT * (filter_index // my_position.width) width = 1 - if filter_index // self.position.width == bottom_row_index: # Reached bottom row + if filter_index // my_position.width == bottom_row_index: # Reached bottom row width = bottom_row_filter_width - if filter_index % self.position.width < bottom_row_remainder_width: + if filter_index % my_position.width < bottom_row_remainder_width: width += 1 # Fills up the remainder width if self.position.width % bottom_row_filter_count != 0 position = Position(x, y, width, self._FILTER_HEIGHT) yield position @@ -865,7 +886,9 @@ def position(self) -> Position: width = self.metadata.width else: fields = self._find_fields() - width = max(self._position.width, len(fields) // 3) + my_width = self._position.width + assert my_width is not None + width = max(my_width, len(fields) // 3) width = min(width, _MAXIMUM_DASHBOARD_WIDTH) height = self.metadata.height or self._position.height return Position(self._position.x, self._position.y, width, height) @@ -935,7 +958,11 @@ def _get_dataset_columns(self, datasets: list[Dataset]) -> set[tuple[str, str]]: """Get the filter column and dataset name pairs.""" dataset_columns = set() for dataset in datasets: + if dataset.query is None: + continue for field in self._find_filter_fields(dataset.query): + if field.name is None or dataset.name is None: + continue dataset_columns.add((field.name, dataset.name)) return dataset_columns @@ -1221,15 +1248,18 @@ def save_to_folder(self, dashboard: Dashboard, local_path: Path) -> Dashboard: local_path.mkdir(parents=True, exist_ok=True) dashboard = self._with_better_names(dashboard) for dataset in dashboard.datasets: + if dataset.query is None: + continue query = QueryTile.format(dataset.query) (local_path / f"{dataset.name}.sql").write_text(query) for page in dashboard.pages: with (local_path / f"{page.name}.yml").open("w") as f: yaml.safe_dump(page.as_dict(), f) for layout in page.layout: - if layout.widget.textbox_spec is not None: - name = layout.widget.name.removesuffix("_widget") - (local_path / f"{name}.md").write_text(layout.widget.textbox_spec) + if layout.widget.name is None or layout.widget.textbox_spec is None: + continue + name = layout.widget.name.removesuffix("_widget") + (local_path / f"{name}.md").write_text(layout.widget.textbox_spec) return dashboard def create_dashboard( @@ -1275,12 +1305,12 @@ def create_dashboard( def _with_better_names(self, dashboard: Dashboard) -> Dashboard: """Replace names with human-readable names.""" - better_names = {} + better_names: dict[str, str] = {} for dataset in dashboard.datasets: - if dataset.display_name is not None: + if dataset.name is not None and dataset.display_name is not None: better_names[dataset.name] = dataset.display_name for page in dashboard.pages: - if page.display_name is not None: + if page.name is not None and page.display_name is not None: better_names[page.name] = page.display_name return self._replace_names(dashboard, better_names) @@ -1294,22 +1324,26 @@ def _replace_names(self, node: T, better_names: dict[str, str]) -> T: setattr(node, field.name, [self._replace_names(item, better_names) for item in value]) elif dataclasses.is_dataclass(value): setattr(node, field.name, self._replace_names(value, better_names)) - if isinstance(node, Dataset): + if isinstance(node, Dataset) and node.name is not None: node.name = better_names.get(node.name, node.name) - elif isinstance(node, Page): + elif isinstance(node, Page) and node.name is not None: node.name = better_names.get(node.name, node.name) - elif isinstance(node, Query): + elif isinstance(node, Query) and node.dataset_name is not None: node.dataset_name = better_names.get(node.dataset_name, node.dataset_name) - elif isinstance(node, NamedQuery) and node.query: + elif isinstance(node, NamedQuery) and node.query and node.name is not None: # 'dashboards/01eeb077e38c17e6ba3511036985960c/datasets/01eeb081882017f6a116991d124d3068_...' if node.name.startswith("dashboards/"): - parts = [node.query.dataset_name] - for query_field in node.query.fields: - parts.append(query_field.name) - new_name = "_".join(parts) - better_names[node.name] = new_name + if node.query.dataset_name is None: + logger.warning(f"NamedQuery {node.name} has no dataset name, cannot replace name.") + else: + parts: list[str] = [node.query.dataset_name] + for query_field in node.query.fields: + if query_field.name is not None: + parts.append(query_field.name) + new_name = "_".join(parts) + better_names[node.name] = new_name node.name = better_names.get(node.name, node.name) - elif isinstance(node, ControlFieldEncoding): + elif isinstance(node, ControlFieldEncoding) and node.query_name is not None: node.query_name = better_names.get(node.query_name, node.query_name) elif isinstance(node, Widget): if node.spec is not None: diff --git a/src/databricks/labs/lsql/lakeview/model.py b/src/databricks/labs/lsql/lakeview/model.py index 92a9b16b..d2bd8d9d 100755 --- a/src/databricks/labs/lsql/lakeview/model.py +++ b/src/databricks/labs/lsql/lakeview/model.py @@ -156,7 +156,7 @@ def from_dict(cls, d: Json) -> AngleAxisSpec: @dataclass class AngleFieldEncoding: - field_name: str + field_name: str | None scale: QuantitativeScale axis: AngleAxisSpec | None = None display_name: str | None = None @@ -277,7 +277,7 @@ def from_dict(cls, d: Json) -> BarSpec: @dataclass class CategoricalColorScaleMappingEntry: value: DataDomainValue - color: str + color: str | None def as_dict(self) -> Json: body: Json = {} @@ -418,7 +418,7 @@ def from_dict(cls, d: Json) -> ColorEncodingForMultiSeries: @dataclass class ColorFieldEncoding: - field_name: str + field_name: str | None scale: Scale display_name: str | None = None legend: LegendSpec | None = None @@ -474,8 +474,8 @@ def from_dict(cls, d: Json) -> ControlEncodingMap: @dataclass class ControlFieldEncoding(ControlEncoding): - field_name: str - query_name: str + field_name: str | None + query_name: str | None display_name: str | None = None def as_dict(self) -> Json: @@ -519,7 +519,7 @@ def from_dict(cls, d: Json) -> CounterEncodingMap: @dataclass class CounterFieldEncoding: - field_name: str + field_name: str | None display_name: str | None = None row_number: int | None = None @@ -597,8 +597,8 @@ class DataDomainValue(Enum): @dataclass class Dataset: - name: str - query: str + name: str | None + query: str | None display_name: str | None = None def as_dict(self) -> Json: @@ -674,7 +674,7 @@ def from_dict(cls, d: Json) -> DateRangePickerSpec: @dataclass class DetailsV1ColumnEncoding: - field_name: str + field_name: str | None display_name: str | None = None title: str | None = None type: ColumnType | None = None @@ -785,8 +785,8 @@ def from_dict(cls, d: Json) -> DropdownSpec: @dataclass class Field: - name: str - expression: str + name: str | None + expression: str | None def as_dict(self) -> Json: body: Json = {} @@ -803,7 +803,7 @@ def from_dict(cls, d: Json) -> Field: @dataclass class Format: - foreground_color: str + foreground_color: str | None def as_dict(self) -> Json: body: Json = {} @@ -843,7 +843,7 @@ def from_dict(cls, d: Json) -> FormatConfig: @dataclass class LabelEncoding: - show: bool + show: bool | None def as_dict(self) -> Json: body: Json = {} @@ -1034,8 +1034,8 @@ def from_dict(cls, d: Json) -> MultiSelectSpec: @dataclass class NamedQuery: - name: str - query: Query + name: str | None + query: Query | None def as_dict(self) -> Json: body: Json = {} @@ -1087,7 +1087,7 @@ def from_dict(cls, d: Json) -> Order: @dataclass class Page: - name: str + name: str | None layout: list[Layout] display_name: str | None = None @@ -1117,8 +1117,8 @@ class PaginationSize(Enum): @dataclass class ParameterEncoding(ControlEncoding): - dataset_name: str - parameter_keyword: str + dataset_name: str | None + parameter_keyword: str | None def as_dict(self) -> Json: body: Json = {} @@ -1192,7 +1192,7 @@ def from_dict(cls, d: Json) -> PieSpec: @dataclass class PivotCellEncoding: - field_name: str + field_name: str | None display_name: str | None = None def as_dict(self) -> Json: @@ -1237,7 +1237,7 @@ def from_dict(cls, d: Json) -> PivotEncodingMap: @dataclass class PivotSpec(WidgetSpec): - encodings: PivotEncodingMap + encodings: PivotEncodingMap | None frame: WidgetFrameSpec | None = None def as_dict(self) -> Json: @@ -1260,10 +1260,10 @@ def from_dict(cls, d: Json) -> PivotSpec: @dataclass class Position: - x: int - y: int - width: int - height: int + x: int | None + y: int | None + width: int | None + height: int | None def as_dict(self) -> Json: body: Json = {} @@ -1329,7 +1329,7 @@ def from_dict(cls, d: Json) -> QuantitativeScale: @dataclass class Query: - dataset_name: str + dataset_name: str | None fields: list[Field] disaggregated: bool | None = None orders: list[Order] | None = None @@ -1360,7 +1360,7 @@ def from_dict(cls, d: Json) -> Query: class RenderFieldEncoding: """Common type that a single-field encoding should (conceptually) extend from""" - field_name: str + field_name: str | None display_name: str | None = None def as_dict(self) -> Json: @@ -1412,8 +1412,8 @@ def from_dict(cls, d: Json) -> ScatterSpec: @dataclass class SingleFieldAxisEncoding: - field_name: str - scale: Scale + field_name: str | None + scale: Scale | None axis: AxisSpec | None = None display_name: str | None = None @@ -1528,10 +1528,10 @@ class TableV1ColumnEncoding: legacy v1 table.""" boolean_values: list[str] - display_as: DisplayType - field_name: str - title: str - type: ColumnType + display_as: DisplayType | None + field_name: str | None + title: str | None + type: ColumnType | None align_content: Alignment | None = None allow_html: bool | None = None allow_search: bool | None = None @@ -1619,7 +1619,7 @@ def from_dict(cls, d: Json) -> TableV1ColumnEncoding: align_content=_enum(d, "alignContent", Alignment), allow_html=d.get("allowHTML", None), allow_search=d.get("allowSearch", None), - boolean_values=d.get("booleanValues", None), + boolean_values=d.get("booleanValues", []), date_time_format=d.get("dateTimeFormat", None), decimal_format=d.get("decimalFormat", None), default_column_width=d.get("defaultColumnWidth", None), @@ -1664,15 +1664,15 @@ def from_dict(cls, d: Json) -> TableV1EncodingMap: @dataclass class TableV1Spec(WidgetSpec): - allow_html_by_default: bool + allow_html_by_default: bool | None """V1 uses `version` to determine if the v1 editor should set `allowHTML` by default.""" - condensed: bool + condensed: bool | None encodings: TableV1EncodingMap invisible_columns: list[TableV1SpecInvisibleColumnsItem] """Unused columns. These columns are invisible and not referred, and thus should not be include in the queries (be outside of `encodings`). Even when the base query changes not to include these columns, the table still can work without throwing errors.""" - items_per_page: int + items_per_page: int | None frame: WidgetFrameSpec | None = None pagination_size: PaginationSize | None = None with_row_number: bool | None = None @@ -1715,10 +1715,10 @@ def from_dict(cls, d: Json) -> TableV1Spec: @dataclass class TableV1SpecInvisibleColumnsItem: - name: str - display_as: DisplayType - type: ColumnType - title: str + name: str | None + display_as: DisplayType | None + type: ColumnType | None + title: str | None boolean_values: list[str] align_content: Alignment | None = None allow_html: bool | None = None @@ -1801,7 +1801,7 @@ def from_dict(cls, d: Json) -> TableV1SpecInvisibleColumnsItem: align_content=_enum(d, "alignContent", Alignment), allow_html=d.get("allowHTML", None), allow_search=d.get("allowSearch", None), - boolean_values=d.get("booleanValues", None), + boolean_values=d.get("booleanValues", []), date_time_format=d.get("dateTimeFormat", None), decimal_format=d.get("decimalFormat", None), default_column_width=d.get("defaultColumnWidth", None), @@ -1828,7 +1828,7 @@ def from_dict(cls, d: Json) -> TableV1SpecInvisibleColumnsItem: @dataclass class TableV2Spec(WidgetSpec): - encodings: TableEncodingMap + encodings: TableEncodingMap | None frame: WidgetFrameSpec | None = None def as_dict(self) -> Json: @@ -1864,7 +1864,7 @@ def from_dict(cls, d: Json) -> TemporalScale: @dataclass class TextEntrySpec(WidgetSpec): - encodings: ControlEncodingMap + encodings: ControlEncodingMap | None exclude: bool | None = None frame: WidgetFrameSpec | None = None is_case_sensitive: bool | None = None @@ -1906,7 +1906,7 @@ class TextEntrySpecMatchMode(Enum): @dataclass class Widget: - name: str + name: str | None queries: list[NamedQuery] | None = None spec: WidgetSpec | None = None textbox_spec: str | None = None @@ -1982,7 +1982,7 @@ def from_dict(cls, d: Json) -> WordCloudEncodingMap: @dataclass class WordCloudSpec(WidgetSpec): - encodings: WordCloudEncodingMap + encodings: WordCloudEncodingMap | None frame: WidgetFrameSpec | None = None def as_dict(self) -> Json: