diff --git a/demos/demos_databases_apis/microsoft/kusto/graphistry_ADX_kusto_demo.ipynb b/demos/demos_databases_apis/microsoft/kusto/graphistry_ADX_kusto_demo.ipynb index a1f902ef34..08631880d1 100644 --- a/demos/demos_databases_apis/microsoft/kusto/graphistry_ADX_kusto_demo.ipynb +++ b/demos/demos_databases_apis/microsoft/kusto/graphistry_ADX_kusto_demo.ipynb @@ -677,7 +677,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.10.18" } }, "nbformat": 4, diff --git a/demos/demos_databases_apis/microsoft/sentinel/example.env b/demos/demos_databases_apis/microsoft/sentinel/example.env new file mode 100644 index 0000000000..0204994648 --- /dev/null +++ b/demos/demos_databases_apis/microsoft/sentinel/example.env @@ -0,0 +1,11 @@ +# Graphistry credentials (register at https://www.graphistry.com) +GRAPHISTRY_PERSONAL_KEY_ID=your_personal_key_id +GRAPHISTRY_PERSONAL_KEY_SECRET=your_personal_key_secret + +# Microsoft Sentinel workspace +SENTINEL_WORKSPACE_ID=12345678-1234-1234-1234-123456789abc + +# Optional: Service Principal authentication (if not using Azure CLI) +# AZURE_TENANT_ID=your-tenant-id +# AZURE_CLIENT_ID=your-client-id +# AZURE_CLIENT_SECRET=your-client-secret \ No newline at end of file diff --git a/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb new file mode 100644 index 0000000000..417df65e48 --- /dev/null +++ b/demos/demos_databases_apis/microsoft/sentinel/sentinel_security_analysis.ipynb @@ -0,0 +1,529 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Microsoft Sentinel Security Analysis with Graphistry\n", + "\n", + "This notebook demonstrates how to use Graphistry with Microsoft Sentinel (Log Analytics) to perform security analysis and visualization using KQL queries.\n", + "\n", + "## Prerequisites\n", + "\n", + "1. **Azure Access**: You need access to a Microsoft Sentinel workspace\n", + "2. **Authentication**: Either Azure CLI (`az login`) or service principal credentials\n", + "3. **Dependencies**: Install required packages\n", + "\n", + "```bash\n", + "pip install graphistry[sentinel] python-dotenv\n", + "```\n", + "\n", + "## Environment Setup\n", + "\n", + "1. Copy `example.env` to `.env` in the same directory as this notebook\n", + "2. Edit `.env` with your actual credentials:\n", + "\n", + "```bash\n", + "cp example.env .env\n", + "# Then edit .env with your credentials\n", + "```\n", + "\n", + "The `.env` file should contain:\n", + "\n", + "```env\n", + "# Graphistry credentials (register at https://www.graphistry.com)\n", + "GRAPHISTRY_PERSONAL_KEY_ID=your_personal_key_id\n", + "GRAPHISTRY_PERSONAL_KEY_SECRET=your_personal_key_secret\n", + "\n", + "# Microsoft Sentinel workspace\n", + "SENTINEL_WORKSPACE_ID=12345678-1234-1234-1234-123456789abc\n", + "\n", + "# Optional: Service Principal authentication (if not using Azure CLI)\n", + "# AZURE_TENANT_ID=your-tenant-id\n", + "# AZURE_CLIENT_ID=your-client-id\n", + "# AZURE_CLIENT_SECRET=your-client-secret\n", + "```\n", + "\n", + "**Important**: The `.env` file is gitignored to avoid committing secrets. Never commit actual credentials!\n", + "\n", + "## Getting Started\n", + "\n", + "### Option 1: Azure CLI Authentication (Recommended for Development)\n", + "\n", + "First, login with Azure CLI:\n", + "```bash\n", + "az login\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "import graphistry\nfrom datetime import datetime, timedelta\nimport pandas as pd\nimport os\nfrom dotenv import load_dotenv\n\n# Load environment variables from .env file\n# Option 1: Load from current directory (default)\nload_dotenv()\n\n# Option 2: Load from a custom location (uncomment and modify as needed)\n# load_dotenv('~/custom.env') # Load from home directory\n# load_dotenv('/path/to/your/.env') # Load from absolute path\n# load_dotenv(os.path.expanduser('~/sentinel-credentials.env')) # Expand ~ to home directory\n\n# Register for free at https://www.graphistry.com\n# Credentials loaded from .env file\ngraphistry.register(\n api=3,\n protocol=\"https\",\n server=\"hub.graphistry.com\",\n personal_key_id=os.getenv('GRAPHISTRY_PERSONAL_KEY_ID'),\n personal_key_secret=os.getenv('GRAPHISTRY_PERSONAL_KEY_SECRET')\n)\n\n# Configure Sentinel connection\n# Workspace ID loaded from .env file\nWORKSPACE_ID = os.getenv('SENTINEL_WORKSPACE_ID')\n\nif not WORKSPACE_ID:\n raise ValueError(\"SENTINEL_WORKSPACE_ID not found in environment variables. Please check your .env file.\")\n\ng = graphistry.configure_sentinel(\n workspace_id=WORKSPACE_ID,\n use_device_auth=True # Use device code authentication\n)" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Option 2: Service Principal Authentication (Recommended for Production)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "# Alternative: Service Principal authentication from .env file\n# Uncomment the lines below if you prefer Service Principal over device authentication\n# g = graphistry.configure_sentinel(\n# workspace_id=os.getenv('SENTINEL_WORKSPACE_ID'),\n# tenant_id=os.getenv('AZURE_TENANT_ID'),\n# client_id=os.getenv('AZURE_CLIENT_ID'),\n# client_secret=os.getenv('AZURE_CLIENT_SECRET')\n# )\n\n# Alternative: Use DefaultAzureCredential (tries Azure CLI, Managed Identity, etc.)\n# g = graphistry.configure_sentinel(\n# workspace_id=os.getenv('SENTINEL_WORKSPACE_ID')\n# )" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test Connection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "# Test the connection\n# Note: If using device authentication, you'll see a code and URL to visit for authentication\ntry:\n g.sentinel_health_check()\n print(\"✅ Successfully connected to Microsoft Sentinel!\")\nexcept Exception as e:\n print(f\"❌ Connection failed: {e}\")\n print(\"💡 If using device auth, make sure to complete the authentication in your browser first.\")" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Explore Available Data\n", + "\n", + "Let's start by exploring what tables are available in your Sentinel workspace:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "# List all available tables\ntry:\n tables_df = g.sentinel_tables()\n print(f\"Found {len(tables_df)} tables in workspace\")\n print(\"\\nSecurity-related tables:\")\n security_tables = tables_df[tables_df['DataType'].str.contains('Security|Alert|Incident', case=False, na=False)]\n if not security_tables.empty:\n print(security_tables['DataType'].tolist())\n else:\n print(\"No security-related tables found\")\n print(f\"\\nAll tables: {tables_df['DataType'].tolist()}\")\nexcept Exception as e:\n print(f\"Failed to list tables: {e}\")\n print(\"This might happen if the workspace has no data or insufficient permissions\")" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": "# Get schema for SecurityEvent table (if available)\ntry:\n if 'SecurityEvent' in tables_df['DataType'].values:\n schema = g.sentinel_schema('SecurityEvent')\n print(\"SecurityEvent table schema:\")\n print(schema[['ColumnName', 'DataType']].head(10))\n else:\n print(\"SecurityEvent table not found in workspace\")\n print(\"Available tables for schema inspection:\", tables_df['DataType'].head(5).tolist())\nexcept Exception as e:\n print(f\"Failed to get schema: {e}\")" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Security Analysis Examples\n", + "\n", + "### 1. Failed Login Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 1 users with multiple failed logins\n", + " UserPrincipalName FailureCount UniqueIPs \\\n", + "0 sindre@graphistry.com 12 3 \n", + "\n", + " LatestFailure \n", + "0 2025-09-22 10:14:57.559331+00:00 \n" + ] + } + ], + "source": [ + "# Query failed login attempts (last 7 days)\n", + "failed_logins_query = \"\"\"\n", + "SigninLogs\n", + "| where TimeGenerated > ago(7d)\n", + "| where ResultType != \"0\" // 0 = success\n", + "| project TimeGenerated, UserPrincipalName, IPAddress, Location, ResultType, ResultDescription\n", + "| summarize \n", + " FailureCount = count(),\n", + " UniqueIPs = dcount(IPAddress),\n", + " LatestFailure = max(TimeGenerated)\n", + " by UserPrincipalName\n", + "| where FailureCount > 5\n", + "| order by FailureCount desc\n", + "| take 50\n", + "\"\"\"\n", + "\n", + "try:\n", + " failed_logins = g.kql(failed_logins_query, timespan=timedelta(days=7))\n", + " print(f\"Found {len(failed_logins)} users with multiple failed logins\")\n", + " print(failed_logins.head())\n", + "except Exception as e:\n", + " print(f\"Query failed: {e}\")\n", + " print(\"This might happen if SigninLogs table is not available in your workspace\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Security Alerts Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 0 security alerts in the last 24 hours\n", + "No alerts found (this is good!)\n" + ] + } + ], + "source": [ + "# Query recent security alerts\n", + "alerts_query = \"\"\"\n", + "SecurityAlert\n", + "| where TimeGenerated > ago(24h)\n", + "| project \n", + " TimeGenerated,\n", + " AlertName,\n", + " AlertSeverity,\n", + " CompromisedEntity,\n", + " Tactics,\n", + " Techniques,\n", + " Status\n", + "| order by TimeGenerated desc\n", + "\"\"\"\n", + "\n", + "try:\n", + " alerts = g.kql_last(alerts_query, hours=24)\n", + " print(f\"Found {len(alerts)} security alerts in the last 24 hours\")\n", + " if len(alerts) > 0:\n", + " print(\"\\nAlert severity distribution:\")\n", + " print(alerts['AlertSeverity'].value_counts())\n", + " print(\"\\nSample alerts:\")\n", + " print(alerts[['TimeGenerated', 'AlertName', 'AlertSeverity']].head())\n", + " else:\n", + " print(\"No alerts found (this is good!)\")\n", + "except Exception as e:\n", + " print(f\"Query failed: {e}\")\n", + " print(\"This might happen if SecurityAlert table is not available\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Network Traffic Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Query network connections (example with CommonSecurityLog)\n", + "network_query = \"\"\"\n", + "CommonSecurityLog\n", + "| where TimeGenerated > ago(1h)\n", + "| where isnotempty(SourceIP) and isnotempty(DestinationIP)\n", + "| project \n", + " TimeGenerated,\n", + " SourceIP,\n", + " DestinationIP,\n", + " DestinationPort,\n", + " Protocol,\n", + " Activity,\n", + " DeviceVendor\n", + "| summarize \n", + " ConnectionCount = count(),\n", + " UniquePorts = dcount(DestinationPort)\n", + " by SourceIP, DestinationIP\n", + "| where ConnectionCount > 10\n", + "| order by ConnectionCount desc\n", + "| take 100\n", + "\"\"\"\n", + "\n", + "try:\n", + " network_data = g.kql_last(network_query, hours=1)\n", + " print(f\"Found {len(network_data)} significant network connections\")\n", + " if len(network_data) > 0:\n", + " print(network_data.head())\n", + "except Exception as e:\n", + " print(f\"Query failed: {e}\")\n", + " print(\"This might happen if CommonSecurityLog table is not available\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Graph Visualization\n", + "\n", + "Now let's create some graph visualizations from the security data:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. User-IP Relationship Graph" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created graph with 7 nodes and 4 edges\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Failed memoization speedup attempt due to Pandas internal hash function failing. Continuing without memoization speedups.This is fine, but for speedups around skipping re-uploads of previously seen tables, try identifying which columns have types that Pandas cannot hash, and convert them to hashable types like strings.\n" + ] + } + ], + "source": [ + "# Query for user-IP relationships\n", + "user_ip_query = \"\"\"\n", + "SigninLogs\n", + "| where TimeGenerated > ago(24h)\n", + "| where isnotempty(UserPrincipalName) and isnotempty(IPAddress)\n", + "| project UserPrincipalName, IPAddress, TimeGenerated, ResultType, Location\n", + "| summarize \n", + " LoginCount = count(),\n", + " FailureCount = countif(ResultType != \"0\"),\n", + " LatestLogin = max(TimeGenerated),\n", + " Locations = make_set(Location)\n", + " by UserPrincipalName, IPAddress\n", + "| extend RiskScore = FailureCount * 2 + iff(LoginCount == 1, 1, 0)\n", + "| take 500\n", + "\"\"\"\n", + "\n", + "try:\n", + " user_ip_data = g.kql_last(user_ip_query, hours=24)\n", + " \n", + " if len(user_ip_data) > 0:\n", + " # Create nodes and edges for graph visualization\n", + " \n", + " # Create user nodes\n", + " users = user_ip_data[['UserPrincipalName']].drop_duplicates()\n", + " users['node_type'] = 'user'\n", + " users['node_id'] = users['UserPrincipalName']\n", + " users['node_label'] = users['UserPrincipalName']\n", + " \n", + " # Create IP nodes \n", + " ips = user_ip_data[['IPAddress']].drop_duplicates()\n", + " ips['node_type'] = 'ip'\n", + " ips['node_id'] = ips['IPAddress']\n", + " ips['node_label'] = ips['IPAddress']\n", + " \n", + " # Combine nodes\n", + " nodes = pd.concat([\n", + " users[['node_id', 'node_label', 'node_type']],\n", + " ips[['node_id', 'node_label', 'node_type']]\n", + " ], ignore_index=True)\n", + " \n", + " # Create edges\n", + " edges = user_ip_data.copy()\n", + " edges['source'] = edges['UserPrincipalName']\n", + " edges['target'] = edges['IPAddress']\n", + " edges['edge_weight'] = edges['LoginCount']\n", + " edges['edge_color'] = edges['RiskScore'].apply(\n", + " lambda x: 'red' if x > 5 else 'orange' if x > 2 else 'green'\n", + " )\n", + " \n", + " # Create and plot graph\n", + " graph = g.nodes(nodes, node='node_id')\\\n", + " .edges(edges, source='source', destination='target')\\\n", + " .encode_point_color('node_type')\\\n", + " .encode_edge_color('edge_color')\\\n", + " .settings(url_params={'splashAfter': 'false'})\n", + " \n", + " print(f\"Created graph with {len(nodes)} nodes and {len(edges)} edges\")\n", + " \n", + " # Plot the graph\n", + " graph.plot()\n", + " else:\n", + " print(\"No data available for user-IP graph\")\n", + " \n", + "except Exception as e:\n", + " print(f\"Graph creation failed: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Alert Correlation Graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Query for alert correlations\n", + "alert_correlation_query = \"\"\"\n", + "SecurityAlert\n", + "| where TimeGenerated > ago(7d)\n", + "| project \n", + " AlertName,\n", + " CompromisedEntity,\n", + " Tactics,\n", + " AlertSeverity,\n", + " TimeGenerated\n", + "| extend EntityType = case(\n", + " CompromisedEntity contains \"@\", \"User\",\n", + " CompromisedEntity matches regex @\"\\\\b(?:[0-9]{1,3}\\\\.){3}[0-9]{1,3}\\\\b\", \"IP\",\n", + " \"Host\"\n", + ")\n", + "| summarize \n", + " AlertCount = count(),\n", + " Severities = make_set(AlertSeverity),\n", + " TacticsList = make_set(Tactics)\n", + " by AlertName, CompromisedEntity, EntityType\n", + "| where AlertCount > 1\n", + "| take 200\n", + "\"\"\"\n", + "\n", + "try:\n", + " alert_data = g.kql(alert_correlation_query, timespan=timedelta(days=7))\n", + " \n", + " if len(alert_data) > 0:\n", + " # Create alert type nodes\n", + " alert_types = alert_data[['AlertName']].drop_duplicates()\n", + " alert_types['node_type'] = 'alert'\n", + " alert_types['node_id'] = alert_types['AlertName']\n", + " alert_types['node_label'] = alert_types['AlertName']\n", + " \n", + " # Create entity nodes\n", + " entities = alert_data[['CompromisedEntity', 'EntityType']].drop_duplicates()\n", + " entities['node_type'] = entities['EntityType'].str.lower()\n", + " entities['node_id'] = entities['CompromisedEntity']\n", + " entities['node_label'] = entities['CompromisedEntity']\n", + " \n", + " # Combine nodes\n", + " alert_nodes = pd.concat([\n", + " alert_types[['node_id', 'node_label', 'node_type']],\n", + " entities[['node_id', 'node_label', 'node_type']]\n", + " ], ignore_index=True)\n", + " \n", + " # Create edges (alert -> entity)\n", + " alert_edges = alert_data.copy()\n", + " alert_edges['source'] = alert_edges['AlertName']\n", + " alert_edges['target'] = alert_edges['CompromisedEntity']\n", + " alert_edges['edge_weight'] = alert_edges['AlertCount']\n", + " \n", + " # Create and plot graph\n", + " alert_graph = g.nodes(alert_nodes, node='node_id')\\\n", + " .edges(alert_edges, source='source', destination='target')\\\n", + " .encode_point_color('node_type')\\\n", + " .encode_edge_size('edge_weight')\\\n", + " .settings(url_params={'splashAfter': 'false'})\n", + " \n", + " print(f\"Created alert correlation graph with {len(alert_nodes)} nodes and {len(alert_edges)} edges\")\n", + " \n", + " # Plot the graph\n", + " alert_graph.plot()\n", + " else:\n", + " print(\"No alert correlation data available\")\n", + " \n", + "except Exception as e:\n", + " print(f\"Alert correlation graph failed: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Advanced Analysis\n", + "\n", + "### Multi-table Correlation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Complex query joining multiple data sources\n", + "correlation_query = \"\"\"\n", + "// Get security incidents\n", + "let incidents = SecurityIncident\n", + "| where TimeGenerated > ago(30d)\n", + "| project IncidentNumber, Title, Severity, Status, Owner;\n", + "\n", + "// Get related alerts \n", + "let alerts = SecurityAlert\n", + "| where TimeGenerated > ago(30d)\n", + "| project AlertName, CompromisedEntity, AlertSeverity, Tactics;\n", + "\n", + "// Join and analyze\n", + "incidents\n", + "| join kind=inner (alerts) on $left.Title == $right.AlertName\n", + "| summarize \n", + " IncidentCount = dcount(IncidentNumber),\n", + " AffectedEntities = dcount(CompromisedEntity),\n", + " TacticsUsed = make_set(Tactics)\n", + " by Title, Severity\n", + "| order by IncidentCount desc\n", + "\"\"\"\n", + "\n", + "try:\n", + " correlation_data = g.kql(correlation_query, timespan=timedelta(days=30))\n", + " print(f\"Found {len(correlation_data)} incident-alert correlations\")\n", + " if len(correlation_data) > 0:\n", + " print(correlation_data.head())\n", + "except Exception as e:\n", + " print(f\"Correlation query failed: {e}\")\n", + " print(\"This requires both SecurityIncident and SecurityAlert tables\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "## Summary\n\nThis notebook demonstrated:\n\n1. **Connecting to Microsoft Sentinel** using Azure authentication (device code, service principal, or DefaultAzureCredential)\n2. **Exploring available data** with `sentinel_tables()` and `sentinel_schema()`\n3. **Security analysis** using KQL queries for:\n - Failed login analysis\n - Security alerts monitoring\n - Network traffic analysis\n4. **Graph visualization** of:\n - User-IP relationships\n - Alert correlations\n5. **Advanced correlation** across multiple data sources\n\n## Next Steps\n\n- **Customize queries** for your specific security use cases and available data tables\n- **Create automated dashboards** by scheduling notebook execution\n- **Integrate with threat intelligence** feeds using additional KQL joins\n- **Build detection rules** based on graph patterns you discover\n- **Scale analysis** by adjusting time windows and data volumes\n\n## Troubleshooting Tips\n\n- **No data found**: Some workspaces may not have SecurityEvent, SigninLogs, or SecurityAlert tables\n- **Authentication issues**: Try `az login` first, or check your service principal credentials\n- **Permission errors**: Ensure your account has Log Analytics Reader permissions\n- **Empty results**: Adjust time ranges - some workspaces have limited data retention\n\n## Resources\n\n- [Microsoft Sentinel KQL Reference](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/query/)\n- [Graphistry Documentation](https://pygraphistry.readthedocs.io/)\n- [Azure Monitor Query Documentation](https://docs.microsoft.com/en-us/python/api/azure-monitor-query/)\n- [Sentinel Data Connectors](https://docs.microsoft.com/en-us/azure/sentinel/connect-data-sources)" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Graphistry Dev", + "language": "python", + "name": "graphistry-dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.18" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/graphistry/__init__.py b/graphistry/__init__.py index fa6a35d340..e30d6214ab 100644 --- a/graphistry/__init__.py +++ b/graphistry/__init__.py @@ -41,6 +41,8 @@ kusto_from_client, kql, kusto_graph, + configure_sentinel, + sentinel_from_client, gsql, gsql_endpoint, cosmos, diff --git a/graphistry/client_session.py b/graphistry/client_session.py index f12f7bcb56..4861b65140 100644 --- a/graphistry/client_session.py +++ b/graphistry/client_session.py @@ -9,6 +9,7 @@ from . import util from .plugins_types.spanner_types import SpannerConfig from .plugins_types.kusto_types import KustoConfig +from .plugins_types.sentinel_types import SentinelConfig @@ -85,6 +86,7 @@ def __init__(self) -> None: # NOTE: These are dataclasses, so we shallow copy them self.kusto: Optional[KustoConfig] = None self.spanner: Optional[SpannerConfig] = None + self.sentinel: Optional[SentinelConfig] = None # TODO: Migrate to a pattern like Kusto or Spanner self._bolt_driver: Optional[Any] = None diff --git a/graphistry/plotter.py b/graphistry/plotter.py index bdf71f3117..ed33881648 100644 --- a/graphistry/plotter.py +++ b/graphistry/plotter.py @@ -13,13 +13,14 @@ from .compute.conditional import ConditionalMixin from .compute.cluster import ClusterMixin from .plugins.kusto import KustoMixin +from .plugins.sentinel import SentinelMixin from .plugins.spanner import SpannerMixin from .client_session import AuthManagerProtocol # NOTE: Cooperative mixins must call: # super().__init__(*a, **kw) in their __init__ method # to pass along args/kwargs to the next mixin in the chain class Plotter( - KustoMixin, SpannerMixin, + SentinelMixin, KustoMixin, SpannerMixin, CosmosMixin, NeptuneMixin, HeterographEmbedModuleMixin, SearchToGraphMixin, @@ -51,6 +52,7 @@ class Plotter( - :py:class:`graphistry.gremlin.GremlinMixin`: Provides Gremlin query support for graph databases. - :py:class:`graphistry.gremlin.CosmosMixin`: Integrates with Azure Cosmos DB. - :py:class:`graphistry.gremlin.NeptuneMixin`: Integrates with AWS Neptune DB. + - :py:class:`graphistry.plugins.sentinel.SentinelMixin`: Integrates with Microsoft Sentinel Log Analytics. - :py:class:`graphistry.plugins.kusto.KustoMixin`: Integrates with Azure Kusto DB. - :py:class:`graphistry.plugins.spanner.SpannerMixin`: Integrates with Google Spanner DB. diff --git a/graphistry/plugins/kusto.py b/graphistry/plugins/kusto.py index 69e2bbe80d..9593988e62 100644 --- a/graphistry/plugins/kusto.py +++ b/graphistry/plugins/kusto.py @@ -1,6 +1,7 @@ import time import pandas as pd -from typing import Any, List, Optional, TYPE_CHECKING, Union, overload, Literal +from typing import Any, List, Optional, TYPE_CHECKING, Union, overload, Literal, Tuple +from datetime import datetime, timedelta if TYPE_CHECKING: from azure.kusto.data import KustoClient @@ -176,9 +177,11 @@ def kql( self, query: str, *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, - single_table: Literal[True] = True - ) -> List[pd.DataFrame]: + single_table: Literal[True] = True, + include_statistics: bool = False + ) -> pd.DataFrame: ... @overload @@ -186,9 +189,11 @@ def kql( self, query: str, *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, - single_table: Literal[False] - ) -> pd.DataFrame: + single_table: Literal[False], + include_statistics: bool = False + ) -> List[pd.DataFrame]: ... @overload @@ -196,8 +201,10 @@ def kql( self, query: str, *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, - single_table: bool = True + single_table: bool = True, + include_statistics: bool = False ) -> Union[pd.DataFrame, List[pd.DataFrame]]: ... @@ -205,8 +212,10 @@ def kql( self, query: str, *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, unwrap_nested: Optional[bool] = None, - single_table: bool = True + single_table: bool = True, + include_statistics: bool = False ) -> Union[pd.DataFrame, List[pd.DataFrame]]: """Execute KQL query and return result tables as DataFrames. @@ -217,10 +226,14 @@ def kql( :param query: KQL query string to execute :type query: str + :param timespan: Time range for the query (ignored by Kusto, for compatibility with Sentinel) + :type timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] :param unwrap_nested: Strategy for handling nested/dynamic columns :type unwrap_nested: Optional[bool] :param single_table: If True, return single DataFrame (first table if multiple); if False, return list :type single_table: bool + :param include_statistics: Include query statistics (ignored by Kusto, for compatibility with Sentinel) + :type include_statistics: bool :returns: Single DataFrame if single_table=True, else list of DataFrames :rtype: Union[pd.DataFrame, List[pd.DataFrame]] diff --git a/graphistry/plugins/sentinel.py b/graphistry/plugins/sentinel.py new file mode 100644 index 0000000000..d0c4f02a3b --- /dev/null +++ b/graphistry/plugins/sentinel.py @@ -0,0 +1,699 @@ +import time +import pandas as pd +from typing import Any, List, Optional, TYPE_CHECKING, Union, overload, Literal, Tuple +from datetime import datetime, timedelta + +if TYPE_CHECKING: + from azure.monitor.query import LogsQueryClient + from azure.core.credentials import TokenCredential + from azure.core.exceptions import HttpResponseError + from azure.identity import DefaultAzureCredential, ClientSecretCredential, DeviceCodeCredential +else: + LogsQueryClient = Any + TokenCredential = Any + HttpResponseError = Any + DefaultAzureCredential = Any + ClientSecretCredential = Any + DeviceCodeCredential = Any + +from graphistry.Plottable import Plottable +from graphistry.util import setup_logger +from graphistry.plugins_types.sentinel_types import ( + SentinelConfig, + SentinelConnectionError, + SentinelQueryError, + SentinelQueryResult +) + +logger = setup_logger(__name__) + + +class SentinelMixin(Plottable): + """ + Microsoft Sentinel Log Analytics integration for Graphistry. + + This mixin allows you to query Microsoft Sentinel (Azure Log Analytics) + using KQL (Kusto Query Language) and visualize the results with Graphistry. + """ + + def configure_sentinel( + self, + workspace_id: str, + tenant_id: Optional[str] = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + credential: Optional["TokenCredential"] = None, + default_timespan: Optional[timedelta] = None, + use_device_auth: bool = False, + ) -> Plottable: + """Configure Microsoft Sentinel Log Analytics connection settings. + + Sets up the connection parameters for accessing a Log Analytics workspace. + Authentication can be done via: + - Custom credential object (highest priority) + - Service principal (client_id, client_secret, tenant_id) + - DefaultAzureCredential (includes Azure CLI, Managed Identity, etc.) + + :param workspace_id: Log Analytics workspace ID (GUID format) + :type workspace_id: str + :param tenant_id: Azure AD tenant ID for authentication + :type tenant_id: Optional[str] + :param client_id: Azure AD application (client) ID for service principal auth + :type client_id: Optional[str] + :param client_secret: Azure AD application secret for service principal auth + :type client_secret: Optional[str] + :param credential: Custom credential object for authentication + :type credential: Optional[TokenCredential] + :param default_timespan: Default time range for queries (defaults to 24 hours) + :type default_timespan: Optional[timedelta] + :param use_device_auth: Use device code authentication (shows code and URL) + :type use_device_auth: bool + :returns: Self for method chaining + :rtype: Plottable + + **Example: Azure CLI authentication (development)** + :: + + import graphistry + # First run: az login + g = graphistry.configure_sentinel( + workspace_id="12345678-1234-1234-1234-123456789abc" + ) + + **Example: Service principal authentication (production)** + :: + + import graphistry + g = graphistry.configure_sentinel( + workspace_id="12345678-1234-1234-1234-123456789abc", + tenant_id="your-tenant-id", + client_id="your-client-id", + client_secret="your-client-secret" + ) + + **Example: Device code authentication (interactive)** + :: + + import graphistry + g = graphistry.configure_sentinel( + workspace_id="12345678-1234-1234-1234-123456789abc", + use_device_auth=True + ) + # This will show a code and URL for authentication + + **Example: Custom credential** + :: + + from azure.identity import DeviceCodeCredential + import graphistry + + credential = DeviceCodeCredential() + g = graphistry.configure_sentinel( + workspace_id="12345678-1234-1234-1234-123456789abc", + credential=credential + ) + """ + self.session.sentinel = SentinelConfig( + workspace_id=workspace_id, + tenant_id=tenant_id, + client_id=client_id, + client_secret=client_secret, + credential=credential, + default_timespan=default_timespan or timedelta(hours=24), + use_device_auth=use_device_auth, + ) + return self + + def sentinel_from_client( + self, + client: LogsQueryClient, + workspace_id: str, + default_timespan: Optional[timedelta] = None + ) -> Plottable: + """Configure Sentinel using an existing LogsQueryClient connection. + + Use this method when you already have a configured LogsQueryClient + and want to reuse it with Graphistry. + + :param client: Pre-configured LogsQueryClient + :type client: azure.monitor.query.LogsQueryClient + :param workspace_id: Log Analytics workspace ID + :type workspace_id: str + :param default_timespan: Default time range for queries + :type default_timespan: Optional[timedelta] + :returns: Self for method chaining + :rtype: Plottable + + **Example** + :: + + from azure.monitor.query import LogsQueryClient + from azure.identity import DefaultAzureCredential + import graphistry + + # Create client + credential = DefaultAzureCredential() + logs_client = LogsQueryClient(credential) + + # Use with Graphistry + g = graphistry.sentinel_from_client( + logs_client, + "12345678-1234-1234-1234-123456789abc" + ) + """ + # Clean up existing client if different + if self.session.sentinel is not None and client is not self.session.sentinel._client: + self.sentinel_close() + + self.session.sentinel = SentinelConfig( + workspace_id=workspace_id, + default_timespan=default_timespan or timedelta(hours=24), + _client=client, + ) + return self + + @property + def _sentinel_config(self) -> SentinelConfig: + """Get the current Sentinel configuration.""" + if self.session.sentinel is None: + raise ValueError("SentinelMixin is not configured") + return self.session.sentinel + + @property + def sentinel_client(self) -> LogsQueryClient: + """Get or create the LogsQueryClient instance.""" + if self._sentinel_config._client is not None: + return self._sentinel_config._client + client = init_sentinel_client(self._sentinel_config) + self._sentinel_config._client = client + return client + + def sentinel_close(self) -> None: + """Close the Sentinel client connection. + + Note: LogsQueryClient doesn't require explicit cleanup, + but this method is provided for API consistency. + + **Example** + :: + + import graphistry + g = graphistry.configure_sentinel(...) + # ... perform queries ... + g.sentinel_close() + """ + if self.session.sentinel is None: + return + # LogsQueryClient doesn't need explicit cleanup + # Just clear the cached client reference + self.session.sentinel._client = None + + def sentinel_health_check(self) -> None: + """Perform a health check on the Sentinel connection. + + Executes a simple query (Heartbeat | take 1) to verify that the connection + to the Log Analytics workspace is working properly. + + :raises SentinelConnectionError: If the connection test fails + + **Example** + :: + + import graphistry + g = graphistry.configure_sentinel(...) + g.sentinel_health_check() # Verify connection works + """ + try: + self._sentinel_query("Heartbeat | take 1", timespan=timedelta(hours=1)) + logger.info("Sentinel health check successful") + except Exception as e: + raise SentinelConnectionError(f"Health check failed: {e}") from e + + @overload + def kql( + self, + query: str, + *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, + unwrap_nested: Optional[bool] = None, + single_table: Literal[True] = True, + include_statistics: bool = False + ) -> pd.DataFrame: + ... + + @overload + def kql( + self, + query: str, + *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, + unwrap_nested: Optional[bool] = None, + single_table: Literal[False], + include_statistics: bool = False + ) -> List[pd.DataFrame]: + ... + + @overload + def kql( + self, + query: str, + *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, + unwrap_nested: Optional[bool] = None, + single_table: bool = True, + include_statistics: bool = False + ) -> Union[pd.DataFrame, List[pd.DataFrame]]: + ... + + def kql( + self, + query: str, + *, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None, + unwrap_nested: Optional[bool] = None, + single_table: bool = True, + include_statistics: bool = False + ) -> Union[pd.DataFrame, List[pd.DataFrame]]: + """Execute KQL query and return result tables as DataFrames. + + Submits a Kusto Query Language (KQL) query to Microsoft Sentinel (Log Analytics) + and returns the results. By default, expects a single table result and returns + it as a DataFrame. If multiple tables are returned, only the first is returned + with a warning. Set single_table=False to get all result tables. + + :param query: KQL query string to execute + :type query: str + :param timespan: Time range for the query (default: 24 hours) + :type timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] + :param unwrap_nested: Strategy for handling nested/dynamic columns + :type unwrap_nested: Optional[bool] + :param single_table: If True, return single DataFrame; if False, return list + :type single_table: bool + :param include_statistics: Include query statistics in DataFrame attrs + :type include_statistics: bool + :returns: Single DataFrame if single_table=True, else list of DataFrames + :rtype: Union[pd.DataFrame, List[pd.DataFrame]] + + **unwrap_nested semantics:** + + - **True**: Always attempt to unwrap nested columns; raise on failure + - **None**: Use heuristic - unwrap if the result looks nested + - **False**: Never attempt to unwrap nested columns + + **Example: Basic security query (single table mode)** + :: + + import graphistry + from datetime import timedelta + g = graphistry.configure_sentinel(...) + + query = ''' + SecurityEvent + | where TimeGenerated > ago(1d) + | where EventID == 4625 // Failed logon + | project TimeGenerated, Account, Computer, IpAddress + | take 1000 + ''' + + # Query last 7 days + df = g.kql(query, timespan=timedelta(days=7)) + print(f"Found {len(df)} failed logon events") + + **Example: Get all tables as list** + :: + + # Always get a list of all tables + dfs = g.kql(query, single_table=False) + df = dfs[0] + + **Example: Query with specific time range** + :: + + from datetime import datetime, timedelta + + # Query specific time window + start = datetime(2024, 1, 1) + end = datetime(2024, 1, 7) + df = g.kql(query, timespan=(start, end)) + + **Example: Multi-table query** + :: + + query = ''' + SecurityEvent | summarize Count=count() by EventID | top 5 by Count; + SecurityAlert | take 10 + ''' + + # With single_table=False, returns all tables + frames = g.kql(query, single_table=False) + events_df = frames[0] + alerts_df = frames[1] + """ + results = self._sentinel_query(query, timespan=timespan) + + if not results: + if single_table: + raise ValueError("Query returned no results") + return [] + + dfs: List[pd.DataFrame] = [] + + for result in results: + # Determine if we should unwrap nested data + do_unwrap = ( + unwrap_nested is True + or (unwrap_nested is None and _should_unwrap(result)) + ) + + if do_unwrap: + try: + df_unwrapped = _unwrap_nested(result) + dfs.append(df_unwrapped) + continue + except Exception as exc: + if unwrap_nested is True: + raise RuntimeError(f"Failed to unwrap nested data: {exc}") from exc + # Heuristic miss - fall back to flat table + pass + + # Default: flat table + if not result.column_names: + # Safety fallback + dfs.append(pd.DataFrame(result.data)) + else: + dfs.append(pd.DataFrame(result.data, columns=result.column_names)) + + # Auto-unbox single table result if requested + if single_table: + if len(dfs) > 1: + logger.warning(f"Query returned {len(dfs)} tables, returning first table only") + return dfs[0] + + return dfs + + def kql_last( + self, + query: str, + *, + hours: float = 1, + **kwargs + ) -> Union[pd.DataFrame, List[pd.DataFrame]]: + """Execute KQL query for the last N hours. + + Convenience wrapper for kql() that automatically sets the timespan + to the last N hours from now. + + :param query: KQL query string to execute + :type query: str + :param hours: Number of hours to look back (default: 1) + :type hours: float + :param kwargs: Additional arguments passed to kql() + :returns: Query results as DataFrame(s) + :rtype: Union[pd.DataFrame, List[pd.DataFrame]] + + **Example: Get security alerts from last 24 hours** + :: + + import graphistry + g = graphistry.configure_sentinel(...) + + alerts = g.kql_last(''' + SecurityAlert + | project TimeGenerated, AlertName, Severity + | order by TimeGenerated desc + ''', hours=24) + + **Example: Get recent failed logins (last hour)** + :: + + # Default is 1 hour + recent_failures = g.kql_last(''' + SecurityEvent + | where EventID == 4625 + | summarize FailCount=count() by Account + ''') + """ + return self.kql(query, timespan=timedelta(hours=hours), **kwargs) + + def sentinel_tables(self) -> pd.DataFrame: + """List all available tables in the Log Analytics workspace. + + :returns: DataFrame with table names + :rtype: pd.DataFrame + + **Example** + :: + + import graphistry + g = graphistry.configure_sentinel(...) + + # Get list of all tables + tables = g.sentinel_tables() + print(f"Found {len(tables)} tables") + print(tables.head(10)) + """ + # Use Usage table to get all table names - this avoids union conflicts + query = """ + Usage + | where TimeGenerated > ago(30d) + | distinct DataType + | sort by DataType asc + """ + return self.kql(query, timespan=timedelta(days=30)) + + def sentinel_schema(self, table: str) -> pd.DataFrame: + """Get schema information for a specific table. + + :param table: Name of the table to inspect + :type table: str + :returns: DataFrame with column names and types + :rtype: pd.DataFrame + + **Example** + :: + + import graphistry + g = graphistry.configure_sentinel(...) + + # Get schema for SecurityEvent table + schema = g.sentinel_schema("SecurityEvent") + print(schema[['ColumnName', 'DataType']]) + """ + query = f"{table} | getschema" + return self.kql(query, timespan=timedelta(minutes=5)) + + def _sentinel_query( + self, + query: str, + timespan: Optional[Union[timedelta, Tuple[datetime, datetime]]] = None + ) -> List[SentinelQueryResult]: + """Execute KQL query and return raw results. + + Internal method for executing KQL queries and returning raw Sentinel + query results without DataFrame conversion. + + :param query: KQL query string to execute + :type query: str + :param timespan: Time range for the query + :type timespan: Optional[Union[timedelta, tuple[datetime, datetime]]] + :returns: List of raw query results + :rtype: List[SentinelQueryResult] + :raises SentinelQueryError: If the query execution fails + """ + from azure.monitor.query import LogsQueryStatus + from azure.core.exceptions import HttpResponseError + + logger.debug(f"SentinelMixin._sentinel_query(): {query}") + + # Use default timespan if not provided + if timespan is None: + timespan = self._sentinel_config.default_timespan + + try: + start = time.time() + response = self.sentinel_client.query_workspace( + workspace_id=self._sentinel_config.workspace_id, + query=query, + timespan=timespan + ) + + # Check for partial failures + if response.status == LogsQueryStatus.PARTIAL: + logger.warning(f"Query returned partial results: {response.partial_error}") + elif response.status == LogsQueryStatus.FAILURE: + raise SentinelQueryError(f"Query failed: {response.partial_error}") + + results = [] + row_lengths = [] + + # Process each table in the response + for table in response.tables: + rows = [list(row) for row in table.rows] + + # Handle different column formats + if hasattr(table.columns[0], 'name') if table.columns else False: + # Columns are objects with name/type attributes + col_names = [col.name for col in table.columns] + col_types = [col.type for col in table.columns] + else: + # Columns are strings (column names only) + col_names = list(table.columns) + col_types = ['string'] * len(col_names) # Default to string type + + # Handle table name + table_name = getattr(table, 'name', None) + + results.append(SentinelQueryResult( + data=rows, + column_names=col_names, + column_types=col_types, + table_name=table_name + )) + row_lengths.append((len(rows), len(col_names))) + + logger.info(f"Query returned {len(results)} tables shapes: {row_lengths} in {time.time() - start:.3f} sec") + return results + + except HttpResponseError as e: + logger.error(f"Sentinel query failed: {e}") + raise SentinelQueryError(f"Query failed: {e}") from e + except Exception as e: + logger.error(f"Unexpected error during query: {e}") + raise SentinelQueryError(f"Unexpected error: {e}") from e + + +def init_sentinel_client(cfg: SentinelConfig) -> "LogsQueryClient": + """Initialize Sentinel Log Analytics client with appropriate authentication. + + Authentication precedence: + 1. Custom credential object (if provided) + 2. Service Principal (if credentials provided) + 3. Device code authentication (if use_device_auth=True) + 4. DefaultAzureCredential (tries multiple methods automatically) + + For Azure CLI auth: Run 'az login' before using this method. + """ + from azure.identity import DefaultAzureCredential, ClientSecretCredential, DeviceCodeCredential + from azure.monitor.query import LogsQueryClient + + try: + assert cfg.workspace_id is not None, "workspace_id is not set" + + if cfg.credential: + credential = cfg.credential + logger.info("Using custom credential object for Sentinel") + elif cfg.client_id and cfg.client_secret and cfg.tenant_id: + credential = ClientSecretCredential( + tenant_id=cfg.tenant_id, + client_id=cfg.client_id, + client_secret=cfg.client_secret + ) + logger.info(f"Using Service Principal authentication for workspace {cfg.workspace_id}") + elif cfg.use_device_auth: + credential = DeviceCodeCredential( + tenant_id=cfg.tenant_id # Optional, uses common tenant if not provided + ) + logger.info(f"Using Device Code authentication for workspace {cfg.workspace_id}") + logger.info("You will be prompted to visit a URL and enter a code to authenticate") + else: + credential = DefaultAzureCredential() + logger.info(f"Using DefaultAzureCredential (Azure CLI, Managed Identity, etc.) for workspace {cfg.workspace_id}") + + client = LogsQueryClient(credential) + return client + + except Exception as exc: + raise SentinelConnectionError(f"Failed to initialize Sentinel client: {exc}") from exc + + +# Sentinel Utils - adapted from Kusto plugin +def _is_dynamic(val: Any) -> bool: + """Check if value is a nested/dynamic JSON type.""" + return isinstance(val, (dict, list)) + + +def _unwrap_nested(result: SentinelQueryResult) -> pd.DataFrame: + """ + Transform a Sentinel result whose columns contain nested/dynamic objects. + + - dict -> dot-flattened + - list[dict] -> explode + flatten + - list[scalar] -> keep as-is + """ + df = pd.DataFrame(result.data, columns=result.column_names) + if not result.column_types: + return df + + for col, col_type in zip(result.column_names, result.column_types): + # Check for dynamic/object types (common in Sentinel) + if col_type.lower() in ["dynamic", "object", "string"]: + # Check if column contains JSON strings that need parsing + if col_type.lower() == "string" and len(df) > 0: + try: + # Try to parse first non-null value as JSON + sample = df[col].dropna().iloc[0] if not df[col].dropna().empty else None + if sample and isinstance(sample, str) and (sample.startswith('{') or sample.startswith('[')): + import json + df[col] = df[col].apply(lambda x: json.loads(x) if pd.notna(x) and isinstance(x, str) else x) + except (json.JSONDecodeError, IndexError): + continue # Not JSON, keep as string + + # Handle lists of dicts - need to explode + list_of_dicts = df[col].apply( + lambda v: isinstance(v, list) and (not v or all(isinstance(x, dict) for x in v)) + ) + if list_of_dicts.any(): + df[col] = df[col].where( + list_of_dicts, + df[col].apply(lambda x: [x] if pd.notna(x) else x) + ) + df = df.explode(col, ignore_index=True) + + # Flatten dict columns + dict_rows = df[col].apply(lambda v: isinstance(v, dict)) + if dict_rows.any(): + flat = pd.json_normalize(df.loc[dict_rows, col].tolist(), sep='.').add_prefix(f"{col}.") + flat.index = df.loc[dict_rows].index + df = df.join(flat, how='left') + df[col] = df[col].mask(dict_rows, pd.NA) + + # Drop column if all values are NA after processing + if df[col].isna().all(): + df = df.drop(columns=[col]) + + # Clean up - replace pd.NA with None for consistency + df = df.astype(object).where(pd.notna(df), None) + return df.reset_index(drop=True) + + +def _should_unwrap(result: SentinelQueryResult, sample_rows: int = 5) -> bool: + """ + Decide whether result looks like it contains nested/dynamic columns. + + Strategy: + 1. Check column types for 'dynamic' or 'object' + 2. Inspect sample rows for dict/list values + 3. Check for JSON strings + """ + # Check column types + if result.column_types: + for col_type in result.column_types: + if col_type.lower() in ["dynamic", "object"]: + return True + + # Sample data for nested structures + for col_idx in range(len(result.column_names)): + sample = (row[col_idx] for row in result.data[:sample_rows] if row) + for val in sample: + if _is_dynamic(val): + return True + # Check for JSON strings + if isinstance(val, str) and val and (val.startswith('{') or val.startswith('[')): + try: + import json + json.loads(val) + return True + except (json.JSONDecodeError, ValueError): + continue + + return False diff --git a/graphistry/plugins_types/sentinel_types.py b/graphistry/plugins_types/sentinel_types.py new file mode 100644 index 0000000000..a32773e02d --- /dev/null +++ b/graphistry/plugins_types/sentinel_types.py @@ -0,0 +1,73 @@ +from typing import Optional, List, Any, TYPE_CHECKING +from dataclasses import dataclass +from datetime import timedelta + +if TYPE_CHECKING: + from azure.monitor.query import LogsQueryClient + from azure.core.credentials import TokenCredential +else: + LogsQueryClient = Any + TokenCredential = Any + + +class SentinelConnectionError(Exception): + """Raised when connection to Log Analytics workspace fails""" + pass + + +class SentinelQueryError(Exception): + """Raised when query execution fails""" + pass + + +class SentinelQueryResult: + """Container for a single query result table from Microsoft Sentinel""" + + def __init__( + self, + data: List[List[Any]], + column_names: List[str], + column_types: List[str], + table_name: Optional[str] = None + ): + """ + Initialize a Sentinel query result. + + :param data: List of rows, where each row is a list of values + :param column_names: List of column names + :param column_types: List of column types (e.g., 'string', 'datetime', 'int') + :param table_name: Optional name of the result table + """ + self.data = data + self.column_names = column_names + self.column_types = column_types + self.table_name = table_name + + +@dataclass +class SentinelConfig: + """Configuration for Microsoft Sentinel Log Analytics connection""" + + workspace_id: str + """The Log Analytics workspace ID (GUID format)""" + + tenant_id: Optional[str] = None + """Azure AD tenant ID for authentication""" + + client_id: Optional[str] = None + """Azure AD application (client) ID for service principal auth""" + + client_secret: Optional[str] = None + """Azure AD application secret for service principal auth""" + + credential: Optional[TokenCredential] = None + """Custom credential object for authentication""" + + default_timespan: timedelta = timedelta(hours=24) + """Default time range for queries when not specified""" + + use_device_auth: bool = False + """Use device code authentication flow""" + + _client: Optional[LogsQueryClient] = None + """Cached client instance (internal use)""" diff --git a/graphistry/pygraphistry.py b/graphistry/pygraphistry.py index e39b48c7ec..11c50e7be6 100644 --- a/graphistry/pygraphistry.py +++ b/graphistry/pygraphistry.py @@ -1961,6 +1961,32 @@ def kusto_graph(self, graph_name: str, snap_name: Optional[str] = None) -> Plott return cast(Plotter, self._plotter().kusto_graph(graph_name, snap_name)) kusto_graph.__doc__ = Plotter.kusto_graph.__doc__ + # ---- Sentinel / Log Analytics API ---------------------------------------------------- # + + def configure_sentinel( + self, + workspace_id: str, + tenant_id: Optional[str] = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + credential: Optional[Any] = None, + default_timespan: Optional[Any] = None, + use_device_auth: bool = False, + ) -> Plotter: + return cast(Plotter, self._plotter().configure_sentinel( + workspace_id=workspace_id, + tenant_id=tenant_id, + client_id=client_id, + client_secret=client_secret, + credential=credential, + default_timespan=default_timespan, + use_device_auth=use_device_auth + )) + configure_sentinel.__doc__ = Plotter.configure_sentinel.__doc__ + + def sentinel_from_client(self, client: Any, workspace_id: str, default_timespan: Optional[Any] = None) -> Plotter: + return cast(Plotter, self._plotter().sentinel_from_client(client, workspace_id, default_timespan)) + sentinel_from_client.__doc__ = Plotter.sentinel_from_client.__doc__ def gsql_endpoint(self, @@ -2603,6 +2629,8 @@ def _handle_api_response(self, response): kusto_from_client = PyGraphistry.kusto_from_client kql = PyGraphistry.kql kusto_graph = PyGraphistry.kusto_graph +configure_sentinel = PyGraphistry.configure_sentinel +sentinel_from_client = PyGraphistry.sentinel_from_client cosmos = PyGraphistry.cosmos neptune = PyGraphistry.neptune gremlin = PyGraphistry.gremlin diff --git a/graphistry/tests/test_sentinel.py b/graphistry/tests/test_sentinel.py new file mode 100644 index 0000000000..76c3a4a705 --- /dev/null +++ b/graphistry/tests/test_sentinel.py @@ -0,0 +1,380 @@ +import unittest +from unittest.mock import Mock, MagicMock, patch, PropertyMock +import pandas as pd +from datetime import datetime, timedelta +from typing import List + +from graphistry.plugins.sentinel import SentinelMixin +from graphistry.plugins_types.sentinel_types import ( + SentinelConfig, + SentinelConnectionError, + SentinelQueryError, + SentinelQueryResult +) + + +class TestSentinelMixin(unittest.TestCase): + """Test cases for SentinelMixin functionality.""" + + def setUp(self): + """Set up test fixtures.""" + # Create a mock Plotter instance with SentinelMixin + from graphistry.plugins.sentinel import SentinelMixin + + class MockPlotter(SentinelMixin): + def __init__(self): + self.session = MagicMock() + self.session.sentinel = None + + self.plotter = MockPlotter() + self.workspace_id = "12345678-1234-1234-1234-123456789abc" + + def test_configure_sentinel_basic(self): + """Test basic Sentinel configuration.""" + result = self.plotter.configure_sentinel( + workspace_id=self.workspace_id + ) + + self.assertEqual(result, self.plotter) + self.assertIsNotNone(self.plotter.session.sentinel) + self.assertEqual(self.plotter.session.sentinel.workspace_id, self.workspace_id) + self.assertEqual(self.plotter.session.sentinel.default_timespan, timedelta(hours=24)) + + def test_configure_sentinel_service_principal(self): + """Test Sentinel configuration with service principal.""" + result = self.plotter.configure_sentinel( + workspace_id=self.workspace_id, + tenant_id="tenant-123", + client_id="client-456", + client_secret="secret-789" + ) + + self.assertEqual(result, self.plotter) + config = self.plotter.session.sentinel + self.assertEqual(config.workspace_id, self.workspace_id) + self.assertEqual(config.tenant_id, "tenant-123") + self.assertEqual(config.client_id, "client-456") + self.assertEqual(config.client_secret, "secret-789") + + def test_configure_sentinel_custom_timespan(self): + """Test Sentinel configuration with custom default timespan.""" + custom_timespan = timedelta(days=7) + self.plotter.configure_sentinel( + workspace_id=self.workspace_id, + default_timespan=custom_timespan + ) + + self.assertEqual(self.plotter.session.sentinel.default_timespan, custom_timespan) + + @patch('graphistry.plugins.sentinel.init_sentinel_client') + def test_sentinel_client_lazy_initialization(self, mock_init): + """Test that Sentinel client is lazily initialized.""" + mock_client = MagicMock() + mock_init.return_value = mock_client + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + # Client should not be initialized yet + mock_init.assert_not_called() + + # Access client property + client = self.plotter.sentinel_client + + # Now client should be initialized + mock_init.assert_called_once() + self.assertEqual(client, mock_client) + + # Accessing again should not reinitialize + client2 = self.plotter.sentinel_client + mock_init.assert_called_once() + self.assertEqual(client2, mock_client) + + @patch('graphistry.plugins.sentinel.LogsQueryClient') + def test_sentinel_from_client(self, mock_client_class): + """Test configuration from existing client.""" + existing_client = MagicMock() + + result = self.plotter.sentinel_from_client( + client=existing_client, + workspace_id=self.workspace_id + ) + + self.assertEqual(result, self.plotter) + self.assertEqual(self.plotter.session.sentinel.workspace_id, self.workspace_id) + self.assertEqual(self.plotter.session.sentinel._client, existing_client) + + def test_sentinel_close(self): + """Test closing Sentinel connection.""" + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + self.plotter.session.sentinel._client = MagicMock() + + self.plotter.sentinel_close() + + self.assertIsNone(self.plotter.session.sentinel._client) + + @patch.object(SentinelMixin, '_sentinel_query') + def test_kql_single_table(self, mock_query): + """Test KQL query with single table result.""" + # Mock query result + mock_result = SentinelQueryResult( + data=[['value1', 'value2'], ['value3', 'value4']], + column_names=['col1', 'col2'], + column_types=['string', 'string'] + ) + mock_query.return_value = [mock_result] + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + query = "SecurityEvent | take 10" + df = self.plotter.kql(query) + + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df), 2) + self.assertEqual(list(df.columns), ['col1', 'col2']) + mock_query.assert_called_once() + + @patch.object(SentinelMixin, '_sentinel_query') + def test_kql_multiple_tables(self, mock_query): + """Test KQL query with multiple table results.""" + # Mock query results + mock_results = [ + SentinelQueryResult( + data=[['data1']], + column_names=['col1'], + column_types=['string'] + ), + SentinelQueryResult( + data=[['data2']], + column_names=['col2'], + column_types=['string'] + ) + ] + mock_query.return_value = mock_results + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + query = "SecurityEvent | take 5; SecurityAlert | take 5" + dfs = self.plotter.kql(query, single_table=False) + + self.assertIsInstance(dfs, list) + self.assertEqual(len(dfs), 2) + self.assertIsInstance(dfs[0], pd.DataFrame) + self.assertIsInstance(dfs[1], pd.DataFrame) + + @patch.object(SentinelMixin, '_sentinel_query') + def test_kql_with_timespan(self, mock_query): + """Test KQL query with custom timespan.""" + mock_query.return_value = [] + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + custom_timespan = timedelta(days=30) + with self.assertRaises(ValueError): # No results + self.plotter.kql("test query", timespan=custom_timespan) + + mock_query.assert_called_with("test query", timespan=custom_timespan) + + @patch.object(SentinelMixin, 'kql') + def test_kql_last(self, mock_kql): + """Test kql_last convenience method.""" + mock_df = pd.DataFrame({'col': [1, 2, 3]}) + mock_kql.return_value = mock_df + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + result = self.plotter.kql_last("test query", hours=48) + + self.assertEqual(result, mock_df) + mock_kql.assert_called_with("test query", timespan=timedelta(hours=48)) + + @patch.object(SentinelMixin, 'kql') + def test_sentinel_tables(self, mock_kql): + """Test sentinel_tables method.""" + mock_df = pd.DataFrame({'TableName': ['Table1', 'Table2']}) + mock_kql.return_value = mock_df + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + result = self.plotter.sentinel_tables() + + self.assertEqual(result, mock_df) + mock_kql.assert_called_with( + "union withsource=TableName * | distinct TableName | sort by TableName asc", + timespan=timedelta(minutes=5) + ) + + @patch.object(SentinelMixin, 'kql') + def test_sentinel_schema(self, mock_kql): + """Test sentinel_schema method.""" + mock_df = pd.DataFrame({ + 'ColumnName': ['Col1', 'Col2'], + 'DataType': ['string', 'datetime'] + }) + mock_kql.return_value = mock_df + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + result = self.plotter.sentinel_schema("SecurityEvent") + + self.assertEqual(result, mock_df) + mock_kql.assert_called_with( + "SecurityEvent | getschema", + timespan=timedelta(minutes=5) + ) + + @patch.object(SentinelMixin, '_sentinel_query') + def test_sentinel_health_check_success(self, mock_query): + """Test successful health check.""" + mock_query.return_value = [MagicMock()] + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + # Should not raise + self.plotter.sentinel_health_check() + + mock_query.assert_called_with("Heartbeat | take 1", timespan=timedelta(hours=1)) + + @patch.object(SentinelMixin, '_sentinel_query') + def test_sentinel_health_check_failure(self, mock_query): + """Test health check failure.""" + mock_query.side_effect = Exception("Connection failed") + + self.plotter.configure_sentinel(workspace_id=self.workspace_id) + + with self.assertRaises(SentinelConnectionError) as ctx: + self.plotter.sentinel_health_check() + + self.assertIn("Health check failed", str(ctx.exception)) + + +class TestSentinelUtils(unittest.TestCase): + """Test cases for Sentinel utility functions.""" + + def test_unwrap_nested_simple(self): + """Test unwrapping simple nested data.""" + from graphistry.plugins.sentinel import _unwrap_nested + + result = SentinelQueryResult( + data=[ + [{'key': 'value1', 'nested': {'inner': 'data1'}}], + [{'key': 'value2', 'nested': {'inner': 'data2'}}] + ], + column_names=['data'], + column_types=['object'] + ) + + df = _unwrap_nested(result) + + self.assertIn('data.key', df.columns) + self.assertIn('data.nested.inner', df.columns) + self.assertEqual(len(df), 2) + + def test_unwrap_nested_json_string(self): + """Test unwrapping JSON strings.""" + from graphistry.plugins.sentinel import _unwrap_nested + + result = SentinelQueryResult( + data=[ + ['{"key": "value1", "number": 42}'], + ['{"key": "value2", "number": 84}'] + ], + column_names=['json_data'], + column_types=['string'] + ) + + df = _unwrap_nested(result) + + self.assertIn('json_data.key', df.columns) + self.assertIn('json_data.number', df.columns) + self.assertEqual(df['json_data.key'].iloc[0], 'value1') + self.assertEqual(df['json_data.number'].iloc[0], 42) + + def test_should_unwrap_detection(self): + """Test detection of nested data.""" + from graphistry.plugins.sentinel import _should_unwrap + + # Should unwrap - has object type + result1 = SentinelQueryResult( + data=[[{'nested': 'data'}]], + column_names=['col'], + column_types=['object'] + ) + self.assertTrue(_should_unwrap(result1)) + + # Should unwrap - has dict data + result2 = SentinelQueryResult( + data=[[{'key': 'value'}]], + column_names=['col'], + column_types=['string'] + ) + self.assertTrue(_should_unwrap(result2)) + + # Should not unwrap - simple data + result3 = SentinelQueryResult( + data=[['simple', 'text']], + column_names=['col1', 'col2'], + column_types=['string', 'string'] + ) + self.assertFalse(_should_unwrap(result3)) + + +class TestSentinelAuthentication(unittest.TestCase): + """Test cases for Sentinel authentication.""" + + @patch('graphistry.plugins.sentinel.LogsQueryClient') + @patch('graphistry.plugins.sentinel.DefaultAzureCredential') + def test_init_default_credential(self, mock_credential_class, mock_client_class): + """Test initialization with DefaultAzureCredential.""" + from graphistry.plugins.sentinel import init_sentinel_client + + mock_credential = MagicMock() + mock_credential_class.return_value = mock_credential + + config = SentinelConfig(workspace_id="test-workspace") + init_sentinel_client(config) + + mock_credential_class.assert_called_once() + mock_client_class.assert_called_once_with(mock_credential) + + @patch('graphistry.plugins.sentinel.LogsQueryClient') + @patch('graphistry.plugins.sentinel.ClientSecretCredential') + def test_init_service_principal(self, mock_credential_class, mock_client_class): + """Test initialization with service principal.""" + from graphistry.plugins.sentinel import init_sentinel_client + + mock_credential = MagicMock() + mock_credential_class.return_value = mock_credential + + config = SentinelConfig( + workspace_id="test-workspace", + tenant_id="tenant", + client_id="client", + client_secret="secret" + ) + init_sentinel_client(config) + + mock_credential_class.assert_called_once_with( + tenant_id="tenant", + client_id="client", + client_secret="secret" + ) + mock_client_class.assert_called_once_with(mock_credential) + + @patch('graphistry.plugins.sentinel.LogsQueryClient') + def test_init_custom_credential(self, mock_client_class): + """Test initialization with custom credential.""" + from graphistry.plugins.sentinel import init_sentinel_client + + custom_credential = MagicMock() + config = SentinelConfig( + workspace_id="test-workspace", + credential=custom_credential + ) + + init_sentinel_client(config) + + mock_client_class.assert_called_once_with(custom_credential) + + +if __name__ == '__main__': + unittest.main() diff --git a/mypy.ini b/mypy.ini index 48d2a4279d..835bbc23cc 100644 --- a/mypy.ini +++ b/mypy.ini @@ -110,5 +110,14 @@ ignore_missing_imports = True [mypy-azure.kusto.*] ignore_missing_imports = True +[mypy-azure.monitor.*] +ignore_missing_imports = True + +[mypy-azure.identity.*] +ignore_missing_imports = True + +[mypy-azure.core.*] +ignore_missing_imports = True + [mypy-requests.*] ignore_missing_imports = True diff --git a/setup.py b/setup.py index f2f3527717..dfcaf26197 100755 --- a/setup.py +++ b/setup.py @@ -55,6 +55,7 @@ def unique_flatten_dict(d): 'nodexl': ['openpyxl==3.1.0', 'xlrd'], 'jupyter': ['ipython'], 'spanner': ['google-cloud-spanner'], + 'sentinel': ['azure-monitor-query>=1.2.0', 'azure-identity>=1.12.0'], 'kusto': ['azure-kusto-data', 'azure-identity'] } @@ -150,5 +151,5 @@ def unique_flatten_dict(d): "Code of Conduct": "https://github.com/graphistry/pygraphistry/blob/main/CODE_OF_CONDUCT.md", "Support": "https://www.graphistry.com/support", }, - keywords=['cugraph', 'cudf', 'cuml', 'dask', 'Databricks', 'GFQL', 'GPU', 'Graph', 'graphviz', 'GraphX', 'Gremlin', 'igraph', 'Jupyter', 'Neo4j', 'Neptune', 'Network', 'NetworkX', 'Notebook', 'OpenSearch', 'Pandas', 'Plot', 'RAPIDS', 'RDF', 'Splunk', 'Spark', 'SQL', 'Tinkerpop', 'UMAP', 'Visualization', 'Torch', 'DGL', 'GNN'] + keywords=['cugraph', 'cudf', 'cuml', 'dask', 'Databricks', 'GFQL', 'GPU', 'Graph', 'graphviz', 'GraphX', 'Gremlin', 'igraph', 'Jupyter', 'Neo4j', 'Neptune', 'Network', 'NetworkX', 'Notebook', 'OpenSearch', 'Pandas', 'Plot', 'RAPIDS', 'RDF', 'Sentinel', 'Splunk', 'Spark', 'SQL', 'Tinkerpop', 'UMAP', 'Visualization', 'Torch', 'DGL', 'GNN'] )