|
36 | 36 | from databricks.labs.lakebridge.transpiler.lsp.lsp_engine import LSPEngine |
37 | 37 | from databricks.labs.lakebridge.transpiler.repository import TranspilerRepository |
38 | 38 | from databricks.labs.lakebridge.transpiler.sqlglot.sqlglot_engine import SqlglotEngine |
| 39 | +from databricks.labs.lakebridge.transpiler.switch_runner import SwitchRunner |
39 | 40 | from databricks.labs.lakebridge.transpiler.transpile_engine import TranspileEngine |
40 | 41 |
|
41 | 42 | from databricks.labs.lakebridge.transpiler.transpile_status import ErrorSeverity |
| 43 | +from databricks.labs.switch.lsp import get_switch_dialects |
42 | 44 |
|
43 | 45 |
|
44 | 46 | # Subclass to allow controlled access to protected methods. |
@@ -827,6 +829,137 @@ def analyze( |
827 | 829 | logger.debug(f"User: {ctx.current_user}") |
828 | 830 |
|
829 | 831 |
|
| 832 | +def _validate_llm_transpile_args( |
| 833 | + input_source: str | None, |
| 834 | + output_ws_folder: str | None, |
| 835 | + source_dialect: str | None, |
| 836 | + prompts: Prompts, |
| 837 | +) -> tuple[str, str, str]: |
| 838 | + |
| 839 | + _switch_dialects = get_switch_dialects() |
| 840 | + |
| 841 | + # Validate presence after attempting to source from config |
| 842 | + if not input_source: |
| 843 | + input_source = prompts.question("Enter input SQL path") |
| 844 | + if not output_ws_folder: |
| 845 | + output_ws_folder = prompts.question("Enter output workspace folder must start with /Workspace/") |
| 846 | + if not source_dialect: |
| 847 | + source_dialect = prompts.choice("Select the source dialect", sorted(_switch_dialects)) |
| 848 | + |
| 849 | + # Validate input_source path exists (local path) |
| 850 | + if not Path(input_source).exists(): |
| 851 | + raise_validation_exception(f"Invalid path for '--input-source': Path '{input_source}' does not exist.") |
| 852 | + |
| 853 | + # Validate output_ws_folder is a workspace path |
| 854 | + if not str(output_ws_folder).startswith("/Workspace/"): |
| 855 | + raise_validation_exception( |
| 856 | + f"Invalid value for '--output-ws-folder': workspace output path must start with /Workspace/. Got: {output_ws_folder!r}" |
| 857 | + ) |
| 858 | + |
| 859 | + if source_dialect not in _switch_dialects: |
| 860 | + raise_validation_exception( |
| 861 | + f"Invalid value for '--source-dialect': {source_dialect!r} must be one of: {', '.join(sorted(_switch_dialects))}" |
| 862 | + ) |
| 863 | + |
| 864 | + return input_source, output_ws_folder, source_dialect |
| 865 | + |
| 866 | + |
| 867 | +@lakebridge.command |
| 868 | +def llm_transpile( |
| 869 | + *, |
| 870 | + w: WorkspaceClient, |
| 871 | + accept_terms: bool = False, |
| 872 | + input_source: str | None = None, |
| 873 | + output_ws_folder: str | None = None, |
| 874 | + source_dialect: str | None = None, |
| 875 | + catalog_name: str | None = None, |
| 876 | + schema_name: str | None = None, |
| 877 | + volume: str | None = None, |
| 878 | + foundation_model: str | None = None, |
| 879 | + ctx: ApplicationContext | None = None, |
| 880 | +) -> None: |
| 881 | + """Transpile source code to Databricks using LLM Transpiler (Switch)""" |
| 882 | + if ctx is None: |
| 883 | + ctx = ApplicationContext(w) |
| 884 | + del w |
| 885 | + ctx.add_user_agent_extra("cmd", "llm-transpile") |
| 886 | + user = ctx.current_user |
| 887 | + logger.debug(f"User: {user}") |
| 888 | + |
| 889 | + if not accept_terms: |
| 890 | + logger.warning( |
| 891 | + """Please read and accept these terms before proceeding: |
| 892 | + This feature leverages a Large Language Model (LLM) to analyse and convert |
| 893 | + your provided content, code and data. You consent to your content being |
| 894 | + transmitted to, processed by, and returned from the foundation models hosted |
| 895 | + by Databricks or external foundation models you have configured in your |
| 896 | + workspace. The outputs of the LLM are generated automatically without human |
| 897 | + review, and may contain inaccuracies or errors. You are responsible for |
| 898 | + reviewing and validating all outputs before relying on them for any critical |
| 899 | + or production use. |
| 900 | +
|
| 901 | + By using this feature you accept these terms, re-run with '--accept-terms=true'. |
| 902 | + """ |
| 903 | + ) |
| 904 | + raise SystemExit("LLM transpiler terms not accepted, exiting.") |
| 905 | + |
| 906 | + prompts = ctx.prompts |
| 907 | + resource_configurator = ctx.resource_configurator |
| 908 | + |
| 909 | + # If CLI args are missing, try to read them from config.yml |
| 910 | + input_source, output_ws_folder, source_dialect = _validate_llm_transpile_args( |
| 911 | + input_source, |
| 912 | + output_ws_folder, |
| 913 | + source_dialect, |
| 914 | + prompts, |
| 915 | + ) |
| 916 | + |
| 917 | + if catalog_name is None: |
| 918 | + catalog_name = resource_configurator.prompt_for_catalog_setup(default_catalog_name="lakebridge") |
| 919 | + |
| 920 | + if schema_name is None: |
| 921 | + schema_name = resource_configurator.prompt_for_schema_setup(catalog=catalog_name, default_schema_name="switch") |
| 922 | + |
| 923 | + if volume is None: |
| 924 | + volume = resource_configurator.prompt_for_volume_setup( |
| 925 | + catalog=catalog_name, schema=schema_name, default_volume_name="switch_volume" |
| 926 | + ) |
| 927 | + |
| 928 | + resource_configurator.has_necessary_access(catalog_name, schema_name, volume) |
| 929 | + |
| 930 | + if foundation_model is None: |
| 931 | + foundation_model = resource_configurator.prompt_for_foundation_model_choice() |
| 932 | + |
| 933 | + job_list = ctx.install_state.jobs |
| 934 | + if "Switch" not in job_list: |
| 935 | + logger.debug(f"Missing Switch from installed state jobs: {job_list!r}") |
| 936 | + raise RuntimeError( |
| 937 | + "Switch Job not found. " |
| 938 | + "Please run 'databricks labs lakebridge install-transpile --include-llm-transpiler true' first." |
| 939 | + ) |
| 940 | + job_id = int(job_list["Switch"]) |
| 941 | + logger.debug(f"Switch job ID found: {job_id}") |
| 942 | + |
| 943 | + ctx.add_user_agent_extra("transpiler_source_dialect", source_dialect) |
| 944 | + job_runner = SwitchRunner(ctx.workspace_client) |
| 945 | + volume_input_path = job_runner.upload_to_volume( |
| 946 | + local_path=Path(input_source), |
| 947 | + catalog=catalog_name, |
| 948 | + schema=schema_name, |
| 949 | + volume=volume, |
| 950 | + ) |
| 951 | + |
| 952 | + job_runner.run( |
| 953 | + volume_input_path=volume_input_path, |
| 954 | + output_ws_folder=output_ws_folder, |
| 955 | + source_tech=source_dialect, |
| 956 | + catalog=catalog_name, |
| 957 | + schema=schema_name, |
| 958 | + foundation_model=foundation_model, |
| 959 | + job_id=job_id, |
| 960 | + ) |
| 961 | + |
| 962 | + |
830 | 963 | @lakebridge.command() |
831 | 964 | def create_profiler_dashboard( |
832 | 965 | *, |
|
0 commit comments