Skip to content

Commit

Permalink
Merge pull request #2133 from blacklanternsecurity/misc-bugfixes
Browse files Browse the repository at this point in the history
Lift 10K target limit, misc bugfixes
  • Loading branch information
TheTechromancer authored Jan 6, 2025
2 parents f6dd3de + 1ab0f9a commit 59ffee2
Show file tree
Hide file tree
Showing 9 changed files with 91 additions and 19 deletions.
8 changes: 7 additions & 1 deletion bbot/core/helpers/async_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,18 @@ async def lock(self, name):
class TaskCounter:
def __init__(self):
self.tasks = {}
self.lock = asyncio.Lock() # create a new lock
self._lock = None

@property
def value(self):
return sum([t.n for t in self.tasks.values()])

@property
def lock(self):
if self._lock is None:
self._lock = asyncio.Lock()
return self._lock

def count(self, task_name, n=1, _log=True):
if callable(task_name):
task_name = f"{task_name.__qualname__}()"
Expand Down
7 changes: 5 additions & 2 deletions bbot/core/helpers/regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,12 @@
)
ip_range_regexes = [re.compile(r, re.I) for r in _ip_range_regexes]

# dns names with periods
# all dns names including IP addresses and bare hostnames (e.g. "localhost")
_dns_name_regex = r"(?:\w(?:[\w-]{0,100}\w)?\.?)+(?:[xX][nN]--)?[^\W_]{1,63}\.?"
dns_name_extraction_regex = re.compile(_dns_name_regex, re.I)
# dns names with periods (e.g. "www.example.com")
_dns_name_regex_with_period = r"(?:\w(?:[\w-]{0,100}\w)?\.)+(?:[xX][nN]--)?[^\W_]{1,63}\.?"

dns_name_extraction_regex = re.compile(_dns_name_regex_with_period, re.I)
dns_name_validation_regex = re.compile(r"^" + _dns_name_regex + r"$", re.I)

_email_regex = r"(?:[^\W_][\w\-\.\+']{,100})@" + _dns_name_regex
Expand Down
8 changes: 5 additions & 3 deletions bbot/modules/internal/excavate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import yara
import json
import html
import time
import inspect
import regex as re
from pathlib import Path
Expand Down Expand Up @@ -776,8 +777,7 @@ class HostnameExtractor(ExcavateRule):

def __init__(self, excavate):
super().__init__(excavate)
if excavate.scan.dns_yara_rules_uncompiled:
self.yara_rules["hostname_extraction"] = excavate.scan.dns_yara_rules_uncompiled
self.yara_rules.update(excavate.scan.dns_yara_rules_uncompiled)

async def process(self, yara_results, event, yara_rule_settings, discovery_context):
for identifier in yara_results.keys():
Expand Down Expand Up @@ -882,10 +882,12 @@ async def setup(self):
yara.set_config(max_match_data=yara_max_match_data)
yara_rules_combined = "\n".join(self.yara_rules_dict.values())
try:
self.info(f"Compiling {len(self.yara_rules_dict):,} YARA rules")
start = time.time()
self.verbose(f"Compiling {len(self.yara_rules_dict):,} YARA rules")
for rule_name, rule_content in self.yara_rules_dict.items():
self.debug(f" - {rule_name}")
self.yara_rules = yara.compile(source=yara_rules_combined)
self.verbose(f"{len(self.yara_rules_dict):,} YARA rules compiled in {time.time() - start:.2f} seconds")
except yara.SyntaxError as e:
self.debug(yara_rules_combined)
return False, f"Yara Rules failed to compile with error: [{e}]"
Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/wappalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class wappalyzer(BaseModule):
"created_date": "2022-04-15",
"author": "@liquidsec",
}
deps_pip = ["python-Wappalyzer~=0.3.1", "aiohttp~=3.9.0b0"]
deps_pip = ["python-Wappalyzer~=0.3.1", "aiohttp~=3.9.0b0", "setuptools"]
# accept all events regardless of scope distance
scope_distance_modifier = None
_module_threads = 5
Expand Down
18 changes: 14 additions & 4 deletions bbot/scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1089,9 +1089,19 @@ def dns_yara_rules_uncompiled(self):
regexes_component_list = []
for i, r in enumerate(self.dns_regexes_yara):
regexes_component_list.append(rf"$dns_name_{i} = /\b{r.pattern}/ nocase")
if regexes_component_list:
regexes_component = " ".join(regexes_component_list)
self._dns_yara_rules_uncompiled = f'rule hostname_extraction {{meta: description = "matches DNS hostname pattern derived from target(s)" strings: {regexes_component} condition: any of them}}'

# Chunk the regexes into groups of 10,000
chunk_size = 10000
rules = {}
for chunk_index in range(0, len(regexes_component_list), chunk_size):
chunk = regexes_component_list[chunk_index : chunk_index + chunk_size]
if chunk:
regexes_component = " ".join(chunk)
rule_name = f"hostname_extraction_{chunk_index // chunk_size}"
rule = f'rule {rule_name} {{meta: description = "matches DNS hostname pattern derived from target(s)" strings: {regexes_component} condition: any of them}}'
rules[rule_name] = rule

self._dns_yara_rules_uncompiled = rules
return self._dns_yara_rules_uncompiled

async def dns_yara_rules(self):
Expand All @@ -1100,7 +1110,7 @@ async def dns_yara_rules(self):
import yara

self._dns_yara_rules = await self.helpers.run_in_executor(
yara.compile, source=self.dns_yara_rules_uncompiled
yara.compile, source="\n".join(self.dns_yara_rules_uncompiled.values())
)
return self._dns_yara_rules

Expand Down
14 changes: 7 additions & 7 deletions bbot/test/bbot_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,12 @@ class bbot_events:
return bbot_events


@pytest.fixture(scope="session", autouse=True)
def install_all_python_deps():
deps_pip = set()
for module in DEFAULT_PRESET.module_loader.preloaded().values():
deps_pip.update(set(module.get("deps", {}).get("pip", [])))
# @pytest.fixture(scope="session", autouse=True)
# def install_all_python_deps():
# deps_pip = set()
# for module in DEFAULT_PRESET.module_loader.preloaded().values():
# deps_pip.update(set(module.get("deps", {}).get("pip", [])))

constraint_file = tempwordlist(get_python_constraints())
# constraint_file = tempwordlist(get_python_constraints())

subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip))
# subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip))
33 changes: 33 additions & 0 deletions bbot/test/test_step_1/test_dns.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,39 @@ async def test_dns_graph_structure(bbot_scanner):
assert str(events_by_data["evilcorp.com"].module) == "host"


@pytest.mark.asyncio
async def test_hostname_extraction(bbot_scanner):
scan = bbot_scanner("evilcorp.com", config={"dns": {"minimal": False}})
await scan.helpers.dns._mock_dns(
{
"evilcorp.com": {
"A": ["127.0.0.1"],
"TXT": [
"v=spf1 include:spf-a.evilcorp.com include:spf-b.evilcorp.com include:icpbounce.com include:shops.shopify.com include:_spf.qemailserver.com include:spf.mandrillapp.com include:spf.protection.office365.us include:spf-003ea501.gpphosted.com 127.0.0.1 -all"
],
}
}
)
events = [e async for e in scan.async_start()]
dns_name_events = [e for e in events if e.type == "DNS_NAME"]
main_dns_event = [e for e in dns_name_events if e.data == "evilcorp.com"]
assert len(main_dns_event) == 1
main_dns_event = main_dns_event[0]
dns_children = main_dns_event.dns_children
assert dns_children["A"] == {"127.0.0.1"}
assert dns_children["TXT"] == {
"spf-a.evilcorp.com",
"spf-b.evilcorp.com",
"icpbounce.com",
"shops.shopify.com",
"_spf.qemailserver.com",
"spf.mandrillapp.com",
"spf.protection.office365.us",
"spf-003ea501.gpphosted.com",
"127.0.0.1",
}


@pytest.mark.asyncio
async def test_dns_helpers(bbot_scanner):
assert service_record("") is False
Expand Down
18 changes: 18 additions & 0 deletions bbot/test/test_step_1/test_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,21 @@ async def test_python_output_matches_json(bbot_scanner):
assert len([e for e in events if e["type"] == "ORG_STUB"]) == 1
assert len([e for e in events if e["type"] == "IP_ADDRESS"]) == 1
assert events == json_events


@pytest.mark.asyncio
async def test_huge_target_list(bbot_scanner):
# single target should only have one rule
scan = bbot_scanner("evilcorp.com", config={"excavate": True})
await scan._prep()
assert "hostname_extraction_0" in scan.modules["excavate"].yara_rules_dict
assert "hostname_extraction_1" not in scan.modules["excavate"].yara_rules_dict

# over 10000 targets should be broken into two rules
num_targets = 10005
targets = [f"evil{i}.com" for i in range(num_targets)]
scan = bbot_scanner(*targets, config={"excavate": True})
await scan._prep()
assert "hostname_extraction_0" in scan.modules["excavate"].yara_rules_dict
assert "hostname_extraction_1" in scan.modules["excavate"].yara_rules_dict
assert "hostname_extraction_2" not in scan.modules["excavate"].yara_rules_dict
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ env = [
"BBOT_TESTING = True",
]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
asyncio_default_fixture_loop_scope = "session"

[build-system]
requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"]
Expand Down

0 comments on commit 59ffee2

Please sign in to comment.