diff --git a/pandasai/cli/__init__.py b/pandasai/cli/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/pandasai/cli/__init__.py @@ -0,0 +1 @@ + diff --git a/pandasai/cli/main.py b/pandasai/cli/main.py new file mode 100644 index 000000000..77237e6b2 --- /dev/null +++ b/pandasai/cli/main.py @@ -0,0 +1,162 @@ +import os +import re + +import click + +from pandasai import DatasetLoader +from pandasai.data_loader.semantic_layer_schema import ( + SemanticLayerSchema, + Source, + SQLConnectionConfig, +) +from pandasai.helpers.path import find_project_root, get_validated_dataset_path + + +def validate_api_key(api_key: str) -> bool: + """Validate PandaBI API key format.""" + pattern = r"^PAI-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" + return bool(re.match(pattern, api_key)) + + +@click.group() +def cli(): + """šŸ¼ PandaAI CLI - Manage your datasets with ease""" + pass + + +@cli.group() +def dataset(): + """šŸ“Š Dataset management commands""" + pass + + +@dataset.command() +def create(): + """šŸŽØ Create a new dataset through a guided process""" + click.echo("šŸš€ Let's create a new dataset!\n") + + # Get organization and dataset name + while True: + path = click.prompt("šŸ“ Enter the dataset path (format: organization/dataset)") + try: + org_name, dataset_name = get_validated_dataset_path(path) + break + except ValueError as e: + click.echo(f"āŒ Error: {str(e)}") + + dataset_directory = os.path.join( + find_project_root(), "datasets", org_name, dataset_name + ) + + # Check if dataset already exists + if os.path.exists(dataset_directory): + schema_path = os.path.join(dataset_directory, "schema.yaml") + if os.path.exists(schema_path): + click.echo(f"āŒ Error: Dataset already exists at path: {path}") + return + + # Get dataset metadata + name = click.prompt("šŸ“ Enter dataset name", default=dataset_name) + description = click.prompt("šŸ“‹ Enter dataset description", default="") + + # Get source configuration + source_type = click.prompt( + "šŸ”Œ Enter source type", + type=click.Choice(["mysql", "postgres"]), + default="mysql", + ) + + table_name = click.prompt("šŸ“¦ Enter table name") + + # Build connection configuration + connection_config = { + "host": click.prompt("šŸŒ Enter host", default="localhost"), + "port": click.prompt("šŸ” Enter port", type=int), + "database": click.prompt("šŸ’¾ Enter database name"), + "user": click.prompt("šŸ‘¤ Enter username"), + "password": click.prompt("šŸ”‘ Enter password", hide_input=True), + } + + # Create source configuration + source = { + "type": source_type, + "table": table_name, + "connection": SQLConnectionConfig(**connection_config), + } + + # Create schema + schema = SemanticLayerSchema( + name=name, description=description, source=Source(**source) + ) + + # Create directory and save schema + os.makedirs(dataset_directory, exist_ok=True) + schema_path = os.path.join(dataset_directory, "schema.yaml") + + with open(schema_path, "w") as yml_file: + yml_file.write(schema.to_yaml()) + + click.echo(f"\nāœØ Dataset created successfully at: {dataset_directory}") + + +@cli.command() +@click.argument("api_key") +def login(api_key: str): + """šŸ”‘ Authenticate with your PandaBI API key""" + if not validate_api_key(api_key): + click.echo( + "āŒ Invalid API key format. Expected format: PAI-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + ) + return + + env_path = os.path.join(find_project_root(), ".env") + env_content = "" + new_line = f"PANDABI_API_KEY={api_key}\n" + + # Read existing .env if it exists + if os.path.exists(env_path): + with open(env_path, "r") as f: + lines = f.readlines() + # Filter out existing PANDABI_API_KEY line if present + lines = [line for line in lines if not line.startswith("PANDABI_API_KEY=")] + env_content = "".join(lines) + if env_content and not env_content.endswith("\n"): + env_content += "\n" + + # Write updated content + with open(env_path, "w") as f: + f.write(env_content + new_line) + + click.echo("āœ… Successfully authenticated with PandaBI!") + + +@cli.command() +@click.argument("dataset_path") +def pull(dataset_path): + """šŸ“„ Pull a dataset from a remote source""" + try: + click.echo(f"šŸ”„ Pulling dataset from: {dataset_path}") + dataset_loader = DatasetLoader() + df = dataset_loader.load(dataset_path) + df.pull() + click.echo(f"\nāœØ Dataset successfully pulled from path: {dataset_path}") + except Exception as e: + click.echo(f"āŒ Error pulling dataset: {str(e)}") + + +@cli.command() +@click.argument("dataset_path") +def push(dataset_path): + """šŸ“¤ Push a dataset to a remote source""" + try: + click.echo(f"šŸ”„ Pushing dataset to: {dataset_path}") + dataset_loader = DatasetLoader() + df = dataset_loader.load(dataset_path) + df.push() + click.echo(f"\nāœØ Dataset successfully pushed to path: {dataset_path}") + except Exception as e: + click.echo(f"āŒ Error pushing dataset: {str(e)}") + + +if __name__ == "__main__": + cli() diff --git a/pyproject.toml b/pyproject.toml index fe91147ec..e4f1e281a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,9 @@ mkdocs = "1.5.3" mkdocstrings-python = "1.7.2" markdown-include = "^0.6.0" +[tool.poetry.scripts] +pai = "pandasai.cli.main:cli" + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 000000000..6962c0b2f --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,226 @@ +import os +from unittest.mock import MagicMock, patch + +import pytest +from click.testing import CliRunner + +from pandasai.cli.main import cli, get_validated_dataset_path, validate_api_key + + +def test_validate_api_key(): + # Valid API key + assert validate_api_key("PAI-59ca2c4a-7998-4195-81d1-5c597f998867") == True + + # Invalid API keys + assert validate_api_key("PAI-59ca2c4a-7998-4195-81d1") == False # Too short + assert ( + validate_api_key("XXX-59ca2c4a-7998-4195-81d1-5c597f998867") == False + ) # Wrong prefix + assert ( + validate_api_key("PAI-59ca2c4a-7998-4195-81d1-5c597f99886") == False + ) # Wrong length + assert ( + validate_api_key("PAI-59ca2c4a7998419581d15c597f998867") == False + ) # Missing hyphens + assert ( + validate_api_key("PAI-XXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX") == False + ) # Invalid characters + + +def test_login_command(tmp_path): + runner = CliRunner() + + with runner.isolated_filesystem(temp_dir=tmp_path) as td: + # Test with valid API key + result = runner.invoke( + cli, ["login", "PAI-59ca2c4a-7998-4195-81d1-5c597f998867"] + ) + assert result.exit_code == 0 + assert "Successfully authenticated with PandaBI!" in result.output + + # Verify .env file content + with open(os.path.join(td, ".env")) as f: + content = f.read() + assert "PANDABI_API_KEY=PAI-59ca2c4a-7998-4195-81d1-5c597f998867" in content + + # Test with invalid API key + result = runner.invoke(cli, ["login", "invalid-key"]) + assert result.exit_code == 0 # Click returns 0 for validation errors by default + assert "Invalid API key format" in result.output + + +def test_login_command_preserves_existing_env(tmp_path): + runner = CliRunner() + + with runner.isolated_filesystem(temp_dir=tmp_path) as td: + # Create .env with existing variables + with open(os.path.join(td, ".env"), "w") as f: + f.write("EXISTING_VAR=value\n") + f.write("PANDABI_API_KEY=PAI-old-key-that-should-be-replaced\n") + f.write("ANOTHER_VAR=another_value\n") + + # Update API key + result = runner.invoke( + cli, ["login", "PAI-59ca2c4a-7998-4195-81d1-5c597f998867"] + ) + assert result.exit_code == 0 + + # Verify .env file content + with open(os.path.join(td, ".env")) as f: + content = f.read().splitlines() + assert "EXISTING_VAR=value" in content + assert "ANOTHER_VAR=another_value" in content + assert "PANDABI_API_KEY=PAI-59ca2c4a-7998-4195-81d1-5c597f998867" in content + assert "PANDABI_API_KEY=PAI-old-key-that-should-be-replaced" not in content + + +def test_get_validated_dataset_path_valid(): + """Test get_validated_dataset_path with valid input""" + org, dataset = get_validated_dataset_path("my-org/my-dataset") + assert org == "my-org" + assert dataset == "my-dataset" + + +def test_get_validated_dataset_path_invalid_format(): + """Test get_validated_dataset_path with invalid format""" + with pytest.raises( + ValueError, match="Path must be in format 'organization/dataset'" + ): + get_validated_dataset_path("invalid-path") + + +def test_get_validated_dataset_path_invalid_org(): + """Test get_validated_dataset_path with invalid organization name""" + with pytest.raises( + ValueError, + match="Organization name must be lowercase and use hyphens instead of spaces", + ): + get_validated_dataset_path("INVALID_ORG/dataset") + + +def test_get_validated_dataset_path_invalid_dataset(): + """Test get_validated_dataset_path with invalid dataset name""" + with pytest.raises( + ValueError, + match="Dataset name must be lowercase and use hyphens instead of spaces", + ): + get_validated_dataset_path("my-org/INVALID_DATASET") + + +@pytest.fixture +def mock_dataset_loader(): + with patch("pandasai.cli.main.DatasetLoader") as mock: + yield mock + + +@pytest.fixture +def mock_project_root(tmp_path): + datasets_dir = tmp_path / "datasets" + datasets_dir.mkdir() + with patch("pandasai.cli.main.find_project_root") as mock: + mock.return_value = str(tmp_path) + yield mock + + +@patch("pandasai.cli.main.SemanticLayerSchema") +def test_dataset_create_command(mock_schema, mock_project_root, tmp_path): + """Test dataset create command with valid input""" + runner = CliRunner() + + # Mock schema instance + mock_schema_instance = MagicMock() + mock_schema_instance.to_yaml.return_value = "mock yaml content" + mock_schema.return_value = mock_schema_instance + + # Mock user input + inputs = [ + "test-org/test-dataset\n", # dataset path + "\n", # dataset name (default) + "\n", # description (empty) + "\n", # source type (default: mysql) + "users\n", # table name + "\n", # host (default: localhost) + "3306\n", # port + "testdb\n", # database name + "testuser\n", # username + "testpass\n", # password + ] + + result = runner.invoke(cli, ["dataset", "create"], input="".join(inputs)) + assert result.exit_code == 0 + assert "āœØ Dataset created successfully" in result.output + + # Verify directory and file were created + dataset_dir = tmp_path / "datasets" / "test-org" / "test-dataset" + assert dataset_dir.exists() + assert (dataset_dir / "schema.yaml").exists() + + +@patch("pandasai.cli.main.SemanticLayerSchema") +def test_dataset_create_existing(mock_schema, mock_project_root, tmp_path): + """Test dataset create command when dataset already exists""" + runner = CliRunner() + + # Create dataset directory and schema file + dataset_dir = tmp_path / "datasets" / "test-org" / "test-dataset" + dataset_dir.mkdir(parents=True) + schema_file = dataset_dir / "schema.yaml" + schema_file.write_text("test content") + + result = runner.invoke(cli, ["dataset", "create"], input="test-org/test-dataset\n") + assert result.exit_code == 0 + assert "Error: Dataset already exists" in result.output + + +def test_pull_command(mock_dataset_loader): + """Test pull command""" + runner = CliRunner() + mock_df = MagicMock() + mock_dataset_loader.return_value.load.return_value = mock_df + + result = runner.invoke(cli, ["pull", "test-org/test-dataset"]) + + assert result.exit_code == 0 + mock_dataset_loader.return_value.load.assert_called_once_with( + "test-org/test-dataset" + ) + mock_df.pull.assert_called_once() + assert "āœØ Dataset successfully pulled" in result.output + + +def test_push_command(mock_dataset_loader): + """Test push command""" + runner = CliRunner() + mock_df = MagicMock() + mock_dataset_loader.return_value.load.return_value = mock_df + + result = runner.invoke(cli, ["push", "test-org/test-dataset"]) + + assert result.exit_code == 0 + mock_dataset_loader.return_value.load.assert_called_once_with( + "test-org/test-dataset" + ) + mock_df.push.assert_called_once() + assert "āœØ Dataset successfully pushed" in result.output + + +def test_pull_command_error(mock_dataset_loader): + """Test pull command with error""" + runner = CliRunner() + mock_dataset_loader.return_value.load.side_effect = Exception("Test error") + + result = runner.invoke(cli, ["pull", "test-org/test-dataset"]) + + assert result.exit_code == 0 # CLI handles the error gracefully + assert "Error pulling dataset: Test error" in result.output + + +def test_push_command_error(mock_dataset_loader): + """Test push command with error""" + runner = CliRunner() + mock_dataset_loader.return_value.load.side_effect = Exception("Test error") + + result = runner.invoke(cli, ["push", "test-org/test-dataset"]) + + assert result.exit_code == 0 # CLI handles the error gracefully + assert "Error pushing dataset: Test error" in result.output