diff --git a/Guided Hunting - Covid-19 Themed Threats.ipynb b/Guided Hunting - Covid-19 Themed Threats.ipynb new file mode 100644 index 00000000..037d653e --- /dev/null +++ b/Guided Hunting - Covid-19 Themed Threats.ipynb @@ -0,0 +1,641 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Guided Hunting - Covid-19 Themed Threats\n", + "**Notebook Version:** 1.0
\n", + "**Python Version:** Python 3.6 (including Python 3.6 - AzureML)
\n", + "**Data Sources Required:** CommonSecurityLog, OfficeActivity, SecurityEvent
\n", + " \n", + "This Notebook assists defenders in hunting for Covid-19 themed attacks by identifying anomalous Covid-19 related events within your Azure Sentinel Workspace. This is designed to be a hunting notebook and has a high probability of returning false positives and returned data points should not be seen as detections without further investigation.\n", + "\n", + "**How to use:**
\n", + "Run the cells in this Notebook in order, at various points in the Notebook flow you will be prompted to enter or select options relevant to the scope of your triage.
\n", + "This Notebook presumes you have Azure Sentinel Workspace settings and Threat Intelligence providers configured in a config file. If you do not have this in place please refer https://msticpy.readthedocs.io/en/latest/getting_started/msticpyconfig.html# to https://github.com/Azure/Azure-Sentinel-Notebooks/blob/master/ConfiguringNotebookEnvironment.ipynb and to set this up." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Check that the Notebook kernel is Pytyhon 3.6\n", + "import sys\n", + "MIN_REQ_PYTHON = (3,6)\n", + "if sys.version_info < MIN_REQ_PYTHON:\n", + " print('Check the Kernel->Change Kernel menu and ensure that Python 3.6')\n", + " print('or later is selected as the active kernel.')\n", + " sys.exit(\"Python %s.%s or later is required.\\n\" % MIN_REQ_PYTHON)\n", + "\n", + "# Install required packages for this Notebook\n", + "!pip install msticpy --upgrade --user\n", + "!pip install python-whois --user --upgrade\n", + "!pip install git+\"https://github.com/tqdm/tqdm\" --upgrade --user\n", + "!pip install IPWhois --upgrade --user\n", + "!pip install tldextract --upgrade --user" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Import required packages\n", + "import warnings\n", + "warnings.simplefilter(action='ignore', category=FutureWarning)\n", + "print('Importing python packages....')\n", + "import whois\n", + "import dns\n", + "import tldextract\n", + "import datetime\n", + "import ipywidgets as widgets\n", + "import pandas as pd\n", + "from tqdm.notebook import tqdm\n", + "tqdm.pandas(desc=\"Lookup progress\")\n", + "print('Importing msticpy packages...')\n", + "from msticpy.sectools import *\n", + "from msticpy.nbtools import *\n", + "pd.set_option('display.max_rows', 100)\n", + "pd.set_option('display.max_columns', 50)\n", + "pd.set_option('display.max_colwidth', 100)\n", + "WIDGET_DEFAULTS = {'layout': widgets.Layout(width=\"900px\"),\n", + " 'style': {'description_width': 'initial'}}\n", + "from msticpy.nbtools.foliummap import FoliumMap, get_center_ip_entities, get_map_center\n", + "from msticpy.data.data_providers import QueryProvider\n", + "from msticpy.nbtools.utility import md, md_warn\n", + "pd.options.mode.chained_assignment = None\n", + "from msticpy.nbtools.wsconfig import WorkspaceConfig\n", + "from msticpy.sectools.ip_utils import convert_to_ip_entities\n", + "%env KQLMAGIC_LOAD_MODE=silent\n", + "print('Imports complete')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Collect Azure Sentinel Workspace Details from our config file and use them to connect\n", + "try:\n", + " #Update to WorkspaceConfig(workspace=\"WORKSPACE_NAME\") to get alerts from a Workspace other than your default one.\n", + " ws_config = WorkspaceConfig(workspace=\"Centrica\")\n", + " ws_id = ws_config['workspace_id']\n", + " ten_id = ws_config['tenant_id']\n", + " md(\"Workspace details collected from config file\")\n", + " qry_prov = QueryProvider('LogAnalytics')\n", + " qry_prov.connect(connection_str=ws_config.code_connect_str)\n", + "except RuntimeError:\n", + " md(\"\"\"You do not have any Workspaces configured in your config files.\n", + " Please run the https://github.com/Azure/Azure-Sentinel-Notebooks/blob/master/ConfiguringNotebookEnvironment.ipynb\n", + " to setup these files before proceeding\"\"\" ,'bold')\n", + " \n", + "ti = TILookup()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Network event investigation\n", + "\n", + "Select the time window in which to review network logs in (default is last 24 hours). In large environments you may need to make this time windows smaller in order to avoid query timeout." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query_times = nbwidgets.QueryTime(units='hours',\n", + " max_before=72, max_after=0, before=24)\n", + "query_times.display()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get Covid-19 related URLs from Network Logs\n", + "start = query_times.start\n", + "end = query_times.end\n", + "url_q = f\"\"\"\n", + "CommonSecurityLog\n", + "| where TimeGenerated between (datetime({start})..datetime({end}))\n", + "| extend Url = iif(isnotempty(RequestURL), RequestURL , iif(isnotempty(DestinationHostName), DestinationHostName, \"None\"))\n", + "| where Url != \"None\" \n", + "| distinct Url\n", + "| where tolower(Url) matches regex(\"(?i)(covid|corona.*virus)\")\n", + "\"\"\"\n", + "print(\"Collecting data...\")\n", + "url_data = qry_prov.exec_query(url_q)\n", + "print(\"Done\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from functools import lru_cache\n", + "\n", + "@lru_cache(maxsize=5000)\n", + "def get_domain(url):\n", + " try:\n", + " _, domain,tld = tldextract.extract(url)\n", + " return f\"{domain}.{tld}\"\n", + " except:\n", + " return None\n", + "\n", + "@lru_cache(maxsize=5000)\n", + "def whois_url(dom):\n", + " try:\n", + " wis = whois.whois(dom)\n", + " return wis['creation_date']\n", + " except:\n", + " return None\n", + "\n", + "tqdm.pandas(desc=\"Lookup progress\")\n", + "\n", + "if isinstance(url_data, pd.DataFrame) and not url_data.empty:\n", + " md(\"Extracting domains\")\n", + " url_data['domain'] = url_data['Url'].progress_apply(get_domain)\n", + " url_data = url_data['domain'].drop_duplicates().reset_index().drop(['index'], axis=1)\n", + " md(\"Getting domain registration dates for {len(url_data)} unique domains\")\n", + " md(\"This can take a while for large numbers of domains ~ 100 domains/min\")\n", + " url_data['creation_date'] = url_data['domain'].progress_apply(whois_url)\n", + "else:\n", + " md(\"No matches found.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pick a date to filter out domains registered before:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "#Date Picker for excluding dates\n", + "date_pick = widgets.DatePicker(\n", + " description='Pick a Date',\n", + " disabled=False,\n", + " value= datetime.date(2020, 3, 1)\n", + ")\n", + "display(date_pick)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "start_time = datetime.datetime.combine(date_pick.value, datetime.datetime.min.time())\n", + "\n", + "\n", + "def clean_dates(row):\n", + " if type(row['creation_date']) == datetime.datetime:\n", + " return row['creation_date']\n", + " elif type(row['creation_date']) == list: \n", + " return row['creation_date'][0]\n", + " elif row['creation_date'] == \"before Aug-1996\":\n", + " return datetime.datetime(1996, 8, 1, 0, 0 ,0)\n", + " else:\n", + " return None\n", + " \n", + "recent_urls_df = None\n", + "if isinstance(url_data, pd.DataFrame) and not url_data.empty: \n", + " url_data = url_data.mask(url_data.eq('None')).dropna() \n", + " md(\"Formatting and filtering dates\")\n", + " url_data['creation_date'] = url_data.progress_apply(clean_dates, axis=1)\n", + " filter_mask = url_data['creation_date'] > start_time\n", + " recent_urls_df = url_data[filter_mask]\n", + " md(\"Recently registered domains relating to Covid-19\",\"bold\")\n", + " display(recent_urls_df)\n", + "else:\n", + " md(\"No URL data to process.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Lookup domain in threat intel\n", + "def lookup_dom(row):\n", + " sev = []\n", + " resp = ti.lookup_ioc(observable=row[\"domain\"], providers=['OTX', 'XForce', 'VirusTotal'])\n", + " for response in resp[1]:\n", + " sev.append(response[1].severity) \n", + " if 'high' in sev:\n", + " severity = \"High\"\n", + " elif 'warning' in sev:\n", + " severity = \"Warning\"\n", + " elif 'information' in sev:\n", + " severity = \"Information\"\n", + " else:\n", + " severity = \"None\"\n", + " return severity\n", + "\n", + "# Lookup primary IP address of domain\n", + "def get_ip(row):\n", + " try:\n", + " ip = dns.resolver.query(row['domain'], \"A\")\n", + " return ip[0]\n", + " except:\n", + " return None\n", + "\n", + "# Lookup IP address in threat intel\n", + "def lookup_ip(row):\n", + " sev = []\n", + " resp = ti.lookup_ioc(observable=str(row[\"ip\"]), providers=['OTX', 'XForce', 'VirusTotal'])\n", + " for response in resp[1]:\n", + " sev.append(response[1].severity) \n", + " if 'high' in sev:\n", + " severity = \"High\"\n", + " elif 'warning' in sev:\n", + " severity = \"Warning\"\n", + " elif 'information' in sev:\n", + " severity = \"Information\"\n", + " else:\n", + " severity = \"None\"\n", + " return severity\n", + "\n", + "\n", + "# Highlight cells based on Threat Intelligence results. \n", + "def color_cells(val):\n", + " if isinstance(val, str):\n", + " if val.lower() == \"high\":\n", + " color = 'Red'\n", + " elif val.lower() == 'warning':\n", + " color = 'Orange'\n", + " elif val.lower() == 'information':\n", + " color = 'Green'\n", + " else:\n", + " color = 'none'\n", + " else:\n", + " color = 'none'\n", + " return 'background-color: %s' % color \n", + "\n", + "if isinstance(recent_urls_df, pd.DataFrame) and not recent_urls_df.empty:\n", + " md(\"Getting IP addresses for domain\")\n", + " recent_urls_df['ip'] = recent_urls_df.progress_apply(get_ip, axis=1)\n", + " md(\"Looking up IP addresses in threat intelligence\")\n", + " recent_urls_df['IP TI Risk'] = recent_urls_df.progress_apply(lookup_ip, axis=1)\n", + " md(\"Looking up domains in threat intelligence\")\n", + " recent_urls_df['Domain TI Risk'] = recent_urls_df.progress_apply(lookup_dom, axis=1)\n", + " md(\"Threat Intellignce results for domains and assocaited IP addresses:\", \"bold\")\n", + " display(recent_urls_df.style.applymap(color_cells).hide_index())\n", + "else:\n", + " md(f\"No domains registered since {start_time} were found\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Select a domain to get more details on:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if isinstance(recent_urls_df, pd.DataFrame) and not recent_urls_df.empty:\n", + " doms = list(recent_urls_df['domain'])\n", + " dom_picker = widgets.Dropdown(\n", + " options=doms,\n", + " description='Domain:',\n", + " disabled=False,\n", + " )\n", + " display(dom_picker)\n", + "else:\n", + " md(f\"No domains registered since {start_time} were found\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if isinstance(recent_urls_df, pd.DataFrame) and not recent_urls_df.empty:\n", + " dom_details = recent_urls_df[recent_urls_df['domain'] == dom_picker.value]\n", + " md(\"Domain details:\",\"bold\")\n", + " display(dom_details)\n", + " resp = ti.lookup_ioc(observable=dom_details.iloc[0]['domain'], providers=['OTX', 'XForce', 'VirusTotal'])\n", + " resp = ti.result_to_df(resp)\n", + " md(\"Domain TI details:\",\"bold\")\n", + " display(resp)\n", + " ip_resp = ti.lookup_ioc(observable=str(dom_details.iloc[0]['ip']), providers=['OTX', 'XForce', 'VirusTotal'])\n", + " ip_resp = ti.result_to_df(ip_resp)\n", + " md(\"IP address TI details:\",\"bold\")\n", + " display(ip_resp)\n", + " dom_q = f\"\"\"\n", + " CommonSecurityLog\n", + " | where TimeGenerated between (datetime({start})..datetime({end}))\n", + " //| extend Url = iif(isnotempty(RequestURL), RequestURL , iif(isnotempty(DestinationHostName), DestinationHostName, \"None\"))\n", + " //| where Url != \"None\" and tolower(Url) contains ('{dom_picker.value}')\n", + " | where RequestURL has \"{dom_picker.value.strip()}\" or DestinationHostName has \"{dom_picker.value.strip()}\"\n", + " \"\"\"\n", + " print(\"Getting raw events (this can take a few minutes)...\")\n", + " dom_data = qry_prov.exec_query(dom_q)\n", + " md(\"Raw log events:\",\"bold\")\n", + " display(dom_data)\n", + "else:\n", + " md(f\"No domains registered since {start_time} were found\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if isinstance(recent_urls_df, pd.DataFrame) and not recent_urls_df.empty:\n", + " ips = dom_data['DestinationIP'].unique()\n", + " folium_map = FoliumMap()\n", + " ip_entities = [] \n", + " for ip in ips:\n", + " ip_entities.append(convert_to_ip_entities(ip)[0])\n", + " md(f'Map of destination IP addresses associated with {dom_picker.value}', 'bold')\n", + " icon_props = {'color': 'orange'}\n", + " location = get_map_center(ip_entities)\n", + " folium_map.add_ip_cluster(ip_entities=ip_entities, location=location,\n", + " **icon_props)\n", + " display(folium_map.folium_map)\n", + "else:\n", + " md(f\"No domains registered since {start_time} were found\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Office Activity Investigation\n", + "\n", + "Review Covid-19 related files that have been accessed by large part of your organization. There is a good chance many of these are legitimate organizational documents but some may be widely shared malicious or mis-leading documents.\n", + "Enter the approximate number of users in your organization and the query will identify documents accessed by more than 10% of your user base.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "users = widgets.IntText(\n", + " value=1000,\n", + " description='No. of users:',\n", + " disabled=False\n", + ")\n", + "display(users)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Widely accessed file query\n", + "percentage_users = users.value * 0.1\n", + "files_q = f\"\"\"\n", + "OfficeActivity\n", + "| where SourceFileName matches regex(\"(?i)(covid|corona.*virus)\")\n", + "| where Operation == \"FileAccessed\"\n", + "| summarize dcount(UserId) by SourceFileName, OfficeObjectId\n", + "| where dcount_UserId > {percentage_users}\n", + "| sort by dcount_UserId\n", + "\"\"\"\n", + "\n", + "files = qry_prov.exec_query(files_q)\n", + "if isinstance(files, pd.DataFrame) and not files.empty:\n", + " display(files)\n", + "else:\n", + " md(\"No Covid-19 related files found.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Look for Covid-19 related documents that have been uploaded by User Agents that have not been widely seen in the environment over the last 30 days. This may indicate malicious users uploading documents." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rare_uas_q = \"\"\"\n", + "let rare_uas = (\n", + "OfficeActivity\n", + "| where TimeGenerated > ago(30d)\n", + "| where Operation == \"FileUploaded\"\n", + "| where UserAgent !startswith \"Microsoft Office\" and UserAgent !startswith \"OneNote\" and UserAgent !startswith \"OneDrive\" and UserAgent !startswith \"Outlook\" and UserAgent !startswith \"Exchange\" \n", + "| summarize count() by UserAgent\n", + "| project UserAgent);\n", + "OfficeActivity\n", + "| where Operation == \"FileUploaded\"\n", + "| where SourceFileName matches regex(\"(?i)(covid|corona.*virus)\")\n", + "| where UserAgent in~ (rare_uas)\n", + "\"\"\"\n", + "print(\"Collecting data...\")\n", + "rare_uas = None\n", + "rare_uas = qry_prov.exec_query(rare_uas_q)\n", + "\n", + "if isinstance(rare_uas, pd.DataFrame) and not rare_uas.empty:\n", + " md(\"Done\")\n", + "else:\n", + " md(\"No Covid-19 related activity found.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uas_picker = None\n", + "if isinstance(rare_uas, pd.DataFrame) and not rare_uas.empty:\n", + " uas = rare_uas['UserAgent'].unique()\n", + " uas_picker = widgets.Dropdown(\n", + " options=uas,\n", + " description='User Agent:',\n", + " disabled=False,\n", + " )\n", + " display(uas_picker)\n", + "else:\n", + " md(f\"No rare User Agents Found\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if uas_picker:\n", + " filtered_rare_uas = rare_uas[rare_uas['UserAgent']==uas_picker.value]\n", + " display(filtered_rare_uas)\n", + "else:\n", + " md(f\"No rare User Agents Found\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rare_uas_ips = filtered_rare_uas['ClientIP'].unique()\n", + "folium_map = FoliumMap()\n", + "ip_entities = [] \n", + "for ip in rare_uas_ips:\n", + " ip_entities.append(convert_to_ip_entities(ip)[0])\n", + "md('Map of the source IP of file uploads from rare user agents', 'bold')\n", + "icon_props = {'color': 'orange'}\n", + "location = get_map_center(ip_entities)\n", + "folium_map.add_ip_cluster(ip_entities=ip_entities, location=location,\n", + " **icon_props)\n", + "display(folium_map.folium_map)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Host Activity Investigation\n", + "Look for new processes spawned from a command line containing COVID-19 related names that may be phishing lures. We start by looking at the number of hosts observed with the command line spawning the particular process and from there can drill down into a specific command line." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "process_q = f\"\"\"\n", + "SecurityEvent\n", + "| where TimeGenerated between (datetime({start})..datetime({end}))\n", + "| where EventID == 4688\n", + "| where CommandLine matches regex(\"(?i)(covid|corona.*virus)\")\n", + "| summarize dcount(Computer) by CommandLine, NewProcessName\n", + "| sort by dcount_Computer\"\"\"\n", + "\n", + "process_data = qry_prov.exec_query(process_q)\n", + "\n", + "cmd_lines = None\n", + "if isinstance(process_data, pd.DataFrame) and not process_data.empty:\n", + " display(process_data)\n", + " cmd_lines = process_data['CommandLine'].unique()\n", + "else:\n", + " md(\"No Covid related process data found\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Select command line to look at in more detail:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if cmd_lines:\n", + " cmd_line = widgets.Dropdown(\n", + " options=cmd_lines,\n", + " description='Command Lines:',\n", + " disabled=False,\n", + " )\n", + " display(cmd_line)\n", + "else:\n", + " md(\"No Covid related process data found\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if cmd_lines:\n", + " cmd_line_clean = cmd_line.value.replace('\\\\', \"\\\\\\\\\")\n", + " cmd_line_q = f\"\"\"\n", + " SecurityEvent\n", + " | where TimeGenerated between (datetime({start})..datetime({end}))\n", + " | where EventID == 4688\n", + " | where CommandLine == '{cmd_line_clean}'\n", + " \"\"\"\n", + "\n", + " cmd_line_events = qry_prov.exec_query(cmd_line_q)\n", + "\n", + " if isinstance(cmd_line_events, pd.DataFrame) and not cmd_line_events.empty:\n", + " display(cmd_line_events)\n", + " else:\n", + " md(\"No events found\")\n", + "else:\n", + " md(\"No Covid related process data found\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}