Skip to content

Commit 27a41a5

Browse files
sundarshankar89asnaregueniaigoodwillpunning
authored
Profiler CLI (#1623)
<!-- REMOVE IRRELEVANT COMMENTS BEFORE CREATING A PULL REQUEST --> ## Changes <!-- Summary of your changes that are easy to understand. Add screenshots when necessary, they're helpful to illustrate the before and after state --> ### What does this PR do? This introduces the intial version of CLI ### Relevant implementation details ### Caveats/things to watch out for when reviewing: ### Linked issues <!-- DOC: Link issue with a keyword: close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved. See https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword --> ``` shell ~ ❯ databricks labs lakebridge execute-database-profiler --help 26s Profile the source system database Usage: databricks labs lakebridge execute-database-profiler [flags] Flags: -h, --help help for execute-database-profiler --source-tech string (Optional) The technology/platform of the sources to Profile Global Flags: --debug enable debug logging -o, --output type output type: text or json (default text) -p, --profile string ~/.databrickscfg profile -t, --target string bundle target to use (if applicable) ``` ``` shell ~ ❯ databricks labs lakebridge execute-database-profiler --source-tech mssql 12:24:59 ERROR [d.labs.lakebridge] Only following source system is supported ['synapse'] 12:24:59 ERROR [d.l.lakebridge.execute-database-profiler] ValueError: Invalid source technology mssql ~ ❯ databricks labs lakebridge execute-database-profiler --source-tech Synapse [UPGRADE ADVISED] Newer lakebridge version was released 12 days ago. Please run `databricks labs upgrade lakebridge` to upgrade: feature/profiler_entry_point -> v0.10.11 12:25:15 ERROR [d.l.lakebridge.execute-database-profiler] ValueError: Connection details not found. Please run `databricks labs lakebridge configure-database-profiler` to set up connection details for synapse. ~ ❯ ``` ``` shell ~ ❯ databricks labs lakebridge execute-database-profiler Select the source technology [0] synapse Enter a number between 0 and 0: 0 12:32:50 ERROR [d.l.lakebridge.execute-database-profiler] ValueError: Connection details not found. Please run `databricks labs lakebridge configure-database-profiler` to set up connection details for synapse. ``` ### Functionality - [ ] added relevant user documentation - [x] added new CLI command - [ ] modified existing command: `databricks labs remorph ...` - [ ] ... +add your own ### Tests <!-- How is this tested? Please see the checklist below and also describe any other relevant tests --> - [x] manually tested - [x] added unit tests - [x] added integration tests --------- Co-authored-by: Andrew Snare <[email protected]> Co-authored-by: Guenia Izquierdo <[email protected]> Co-authored-by: Will Girten <[email protected]>
1 parent e8aadaa commit 27a41a5

File tree

3 files changed

+52
-13
lines changed

3 files changed

+52
-13
lines changed

labs.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ commands:
7373
description: Reconcile source and target data residing on Databricks using aggregated metrics
7474

7575
- name: configure-database-profiler
76-
description: Configure database profiler
76+
description: (Experimental) Configure database profiler
7777

7878
- name: create-profiler-dashboard
7979
description: (Experimental) Upload the profiler results as a Databricks dashboard.
@@ -115,3 +115,9 @@ commands:
115115
116116
- name: configure-reconcile
117117
description: Configure 'reconcile' dependencies
118+
119+
- name: execute-database-profiler
120+
description: (Experimental) Profile the source system database
121+
flags:
122+
- name: source-tech
123+
description: (Optional) The technology/platform of the sources to Profile

src/databricks/labs/lakebridge/assessments/_constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
}
99

1010
# TODO modify this PLATFORM_TO_SOURCE_TECHNOLOGY.keys() once all platforms are supported
11-
PROFILER_SOURCE_SYSTEM = ["mssql", "synapse"]
11+
PROFILER_SOURCE_SYSTEM = ["synapse"]
1212

1313

1414
# This flag indicates whether a connector is required for the source system when pipeline is trigger

src/databricks/labs/lakebridge/cli.py

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@
2121

2222

2323
from databricks.labs.lakebridge.assessments.configure_assessment import create_assessment_configurator
24-
from databricks.labs.lakebridge.assessments import PROFILER_SOURCE_SYSTEM
24+
from databricks.labs.lakebridge.assessments import PROFILER_SOURCE_SYSTEM, PRODUCT_NAME
25+
from databricks.labs.lakebridge.assessments.profiler import Profiler
2526

2627
from databricks.labs.lakebridge.config import TranspileConfig, LSPConfigOptionV1
2728
from databricks.labs.lakebridge.contexts.application import ApplicationContext
29+
from databricks.labs.lakebridge.connections.credential_manager import cred_file
2830
from databricks.labs.lakebridge.helpers.recon_config_utils import ReconConfigPrompts
2931
from databricks.labs.lakebridge.helpers.telemetry_utils import make_alphanum_or_semver
3032
from databricks.labs.lakebridge.install import installer
@@ -711,18 +713,19 @@ def configure_secrets(*, w: WorkspaceClient) -> None:
711713
recon_conf.prompt_and_save_connection_details()
712714

713715

714-
@lakebridge.command(is_unauthenticated=True)
715-
def configure_database_profiler() -> None:
716-
"""[Experimental] Install the lakebridge Assessment package"""
717-
prompts = Prompts()
718-
719-
# Prompt for source system
720-
source_system = str(
721-
prompts.choice("Please select the source system you want to configure", PROFILER_SOURCE_SYSTEM)
722-
).lower()
716+
@lakebridge.command
717+
def configure_database_profiler(w: WorkspaceClient) -> None:
718+
"""[Experimental] Installs and runs the Lakebridge Assessment package for database profiling"""
719+
ctx = ApplicationContext(w)
720+
ctx.add_user_agent_extra("cmd", "configure-profiler")
721+
prompts = ctx.prompts
722+
source_tech = prompts.choice("Select the source technology", PROFILER_SOURCE_SYSTEM).lower()
723+
ctx.add_user_agent_extra("profiler_source_tech", make_alphanum_or_semver(source_tech))
724+
user = ctx.current_user
725+
logger.debug(f"User: {user}")
723726

724727
# Create appropriate assessment configurator
725-
assessment = create_assessment_configurator(source_system=source_system, product_name="lakebridge", prompts=prompts)
728+
assessment = create_assessment_configurator(source_system=source_tech, product_name="lakebridge", prompts=prompts)
726729
assessment.run()
727730

728731

@@ -960,6 +963,36 @@ def llm_transpile(
960963
)
961964

962965

966+
@lakebridge.command()
967+
def execute_database_profiler(w: WorkspaceClient, source_tech: str | None = None) -> None:
968+
"""Execute the Profiler Extraction for the given source technology"""
969+
ctx = ApplicationContext(w)
970+
ctx.add_user_agent_extra("cmd", "execute-profiler")
971+
prompts = ctx.prompts
972+
if source_tech is None:
973+
source_tech = prompts.choice("Select the source technology", PROFILER_SOURCE_SYSTEM)
974+
source_tech = source_tech.lower()
975+
976+
if source_tech not in PROFILER_SOURCE_SYSTEM:
977+
logger.error(f"Only the following source systems are supported: {PROFILER_SOURCE_SYSTEM}")
978+
raise_validation_exception(f"Invalid source technology {source_tech}")
979+
980+
ctx.add_user_agent_extra("profiler_source_tech", make_alphanum_or_semver(source_tech))
981+
user = ctx.current_user
982+
logger.debug(f"User: {user}")
983+
# check if cred_file is present which has the connection details before running the profiler
984+
file = cred_file(PRODUCT_NAME)
985+
if not file.exists():
986+
raise_validation_exception(
987+
f"Connection details not found. Please run `databricks labs lakebridge configure-database-profiler` "
988+
f"to set up connection details for {source_tech}."
989+
)
990+
profiler = Profiler.create(source_tech)
991+
992+
# TODO: Add extractor logic to ApplicationContext instead of creating inside the Profiler class
993+
profiler.profile()
994+
995+
963996
@lakebridge.command()
964997
def create_profiler_dashboard(
965998
*,

0 commit comments

Comments
 (0)