Skip to content

Commit 73b2f75

Browse files
authored
Merge pull request #2340 from aboutcode-org/fix_enhancement_pipelines
Fix exploit ehancement pipelines
2 parents 8a8e5d5 + 4f114d6 commit 73b2f75

11 files changed

Lines changed: 299 additions & 186 deletions
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Generated by Django 5.2.11 on 2026-06-11 13:01
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("vulnerabilities", "0134_advisoryv2__all_impacts_unfurled_at_and_more"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="advisoryexploit",
15+
name="record_id",
16+
field=models.CharField(
17+
blank=True,
18+
help_text="The unique identifier for the exploit record in the original upstream data source, such as the CISA KEV ID or the exploitdb ID.",
19+
max_length=255,
20+
null=True,
21+
),
22+
),
23+
migrations.AddConstraint(
24+
model_name="advisoryexploit",
25+
constraint=models.UniqueConstraint(
26+
fields=("advisory", "data_source", "record_id"),
27+
name="unique_advisory_exploit_source",
28+
),
29+
),
30+
]

vulnerabilities/models.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3959,10 +3959,25 @@ class AdvisoryExploit(models.Model):
39593959
help_text="The URL to the exploit as provided in the original upstream data source.",
39603960
)
39613961

3962+
record_id = models.CharField(
3963+
null=True,
3964+
blank=True,
3965+
max_length=255,
3966+
help_text="The unique identifier for the exploit record in the original upstream data source, such as the CISA KEV ID or the exploitdb ID.",
3967+
)
3968+
39623969
@property
39633970
def get_known_ransomware_campaign_use_type(self):
39643971
return "Known" if self.known_ransomware_campaign_use else "Unknown"
39653972

3973+
class Meta:
3974+
constraints = [
3975+
models.UniqueConstraint(
3976+
fields=["advisory", "data_source", "record_id"],
3977+
name="unique_advisory_exploit_source",
3978+
)
3979+
]
3980+
39663981

39673982
class SSVC(models.Model):
39683983
vector = models.CharField(max_length=255, help_text="The vector string representing the SSVC.")

vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py

Lines changed: 68 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,10 @@
1515
import requests
1616
from aboutcode.pipeline import LoopProgress
1717
from dateutil import parser as dateparser
18-
from django.db import DataError
1918

20-
from vulnerabilities.models import AdvisoryAlias
2119
from vulnerabilities.models import AdvisoryExploit
22-
from vulnerabilities.models import AdvisoryReference
23-
from vulnerabilities.models import AdvisoryV2
2420
from vulnerabilities.pipelines import VulnerableCodePipeline
25-
from vulnerabilities.utils import relate_aliases_with_advisories
21+
from vulnerabilities.utils import build_alias_to_advisory_map
2622

2723

2824
class ExploitDBImproverPipeline(VulnerableCodePipeline):
@@ -66,86 +62,80 @@ def add_exploit(self):
6662
raw_data = list(csvreader)
6763
fetched_exploit_count = len(raw_data)
6864

69-
vulnerability_exploit_count = 0
7065
self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records")
7166
progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log)
7267

73-
for row in progress.iter(raw_data):
74-
vulnerability_exploit_count += add_vulnerability_exploit(row, self.log)
75-
76-
self.log(f"Successfully added {vulnerability_exploit_count:,d} exploit-db advisory exploit")
77-
78-
79-
def add_vulnerability_exploit(row, logger):
80-
advisories = set()
81-
82-
aliases = row["codes"].split(";") if row["codes"] else []
83-
84-
if not aliases:
85-
return 0
68+
all_aliases = set()
8669

87-
advisories = relate_aliases_with_advisories(aliases)
70+
for row in raw_data:
71+
if row["codes"]:
72+
all_aliases.update(row["codes"].split(";"))
8873

89-
if not advisories:
90-
logger(f"No advisory found for aliases {aliases}")
91-
return 0
74+
alias_to_advisories = build_alias_to_advisory_map(all_aliases)
9275

93-
date_added = parse_date(row["date_added"])
94-
source_date_published = parse_date(row["date_published"])
95-
source_date_updated = parse_date(row["date_updated"])
76+
exploits = []
77+
seen = set()
9678

97-
for advisory in advisories:
98-
add_exploit_references(row["codes"], row["source_url"], row["file"], advisory, logger)
99-
try:
100-
AdvisoryExploit.objects.update_or_create(
101-
advisory=advisory,
102-
data_source="Exploit-DB",
103-
defaults={
104-
"date_added": date_added,
105-
"description": row["description"],
106-
"known_ransomware_campaign_use": row["verified"],
107-
"source_date_published": source_date_published,
108-
"exploit_type": row["type"],
109-
"platform": row["platform"],
110-
"source_date_updated": source_date_updated,
111-
"source_url": row["source_url"],
112-
},
113-
)
114-
except DataError as e:
115-
logger(
116-
f"Failed to Create the Vulnerability Exploit-DB with error {e!r}:\n{traceback_format_exc()}",
117-
level=logging.ERROR,
118-
)
119-
return 1
120-
121-
122-
def add_exploit_references(ref_id, direct_url, path, adv, logger):
123-
url_map = {
124-
"file_url": f"https://gitlab.com/exploit-database/exploitdb/-/blob/main/{path}",
125-
"direct_url": direct_url,
126-
}
127-
128-
for key, url in url_map.items():
129-
if url:
130-
try:
131-
ref, created = AdvisoryReference.objects.update_or_create(
132-
url=url,
133-
defaults={
134-
"reference_id": ref_id,
135-
"reference_type": AdvisoryReference.EXPLOIT,
136-
},
137-
)
138-
139-
if created:
140-
ref.advisories.add(adv)
141-
ref.save()
142-
logger(f"Created {ref} for {adv} with {key}={url}")
143-
144-
except DataError as e:
145-
logger(
146-
f"Failed to Create the Vulnerability Reference For Exploit-DB with error {e!r}:\n{traceback_format_exc()}",
147-
level=logging.ERROR,
148-
)
79+
for row in progress.iter(raw_data):
80+
aliases = row["codes"].split(";") if row["codes"] else []
81+
82+
if not aliases:
83+
continue
84+
85+
date_added = parse_date(row["date_added"])
86+
source_date_published = parse_date(row["date_published"])
87+
source_date_updated = parse_date(row["date_updated"])
88+
89+
for alias in aliases:
90+
for advisory in alias_to_advisories.get(alias, ()):
91+
92+
key = (
93+
advisory.id,
94+
"Exploit-DB",
95+
alias,
96+
)
97+
98+
if key in seen:
99+
continue
100+
101+
seen.add(key)
102+
103+
exploits.append(
104+
AdvisoryExploit(
105+
advisory=advisory,
106+
record_id=alias,
107+
data_source="Exploit-DB",
108+
date_added=date_added,
109+
description=row["description"],
110+
known_ransomware_campaign_use=row["verified"],
111+
source_date_published=source_date_published,
112+
exploit_type=row["type"],
113+
platform=row["platform"],
114+
source_date_updated=source_date_updated,
115+
source_url=row["source_url"],
116+
)
117+
)
118+
119+
AdvisoryExploit.objects.bulk_create(
120+
exploits,
121+
update_conflicts=True,
122+
unique_fields=[
123+
"advisory",
124+
"data_source",
125+
"record_id",
126+
],
127+
update_fields=[
128+
"date_added",
129+
"description",
130+
"known_ransomware_campaign_use",
131+
"source_date_published",
132+
"exploit_type",
133+
"platform",
134+
"source_date_updated",
135+
"source_url",
136+
],
137+
batch_size=1000,
138+
)
149139

150140

151141
def parse_date(date_string):

vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py

Lines changed: 52 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,9 @@
1313
import requests
1414
from aboutcode.pipeline import LoopProgress
1515

16-
from vulnerabilities.models import AdvisoryAlias
1716
from vulnerabilities.models import AdvisoryExploit
18-
from vulnerabilities.models import AdvisoryV2
1917
from vulnerabilities.pipelines import VulnerableCodePipeline
20-
from vulnerabilities.utils import relate_aliases_with_advisories
18+
from vulnerabilities.utils import build_alias_to_advisory_map
2119

2220

2321
class VulnerabilityKevPipeline(VulnerableCodePipeline):
@@ -54,40 +52,56 @@ def fetch_exploits(self):
5452
def add_exploits(self):
5553
fetched_exploit_count = self.kev_data.get("count")
5654
self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records")
57-
58-
vulnerability_exploit_count = 0
5955
progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log)
60-
61-
for record in progress.iter(self.kev_data.get("vulnerabilities", [])):
62-
vulnerability_exploit_count += add_vulnerability_exploit(
63-
kev_vul=record,
64-
logger=self.log,
65-
)
66-
67-
self.log(f"Successfully added {vulnerability_exploit_count:,d} kev exploit")
68-
69-
70-
def add_vulnerability_exploit(kev_vul, logger):
71-
cve_id = kev_vul.get("cveID")
72-
73-
if not cve_id:
74-
return 0
75-
76-
advisories = relate_aliases_with_advisories([cve_id])
77-
78-
for advisory in advisories:
79-
AdvisoryExploit.objects.update_or_create(
80-
advisory=advisory,
81-
data_source="KEV",
82-
defaults={
83-
"description": kev_vul["shortDescription"],
84-
"date_added": kev_vul["dateAdded"],
85-
"required_action": kev_vul["requiredAction"],
86-
"due_date": kev_vul["dueDate"],
87-
"notes": kev_vul["notes"],
88-
"known_ransomware_campaign_use": (
89-
True if kev_vul["knownRansomwareCampaignUse"] == "Known" else False
90-
),
91-
},
56+
cve_ids = {
57+
record["cveID"] for record in self.kev_data["vulnerabilities"] if record.get("cveID")
58+
}
59+
60+
cve_to_advisories = build_alias_to_advisory_map(cve_ids)
61+
62+
exploits = []
63+
64+
advisories_seen_multiple_times = set()
65+
66+
for record in progress.iter(self.kev_data["vulnerabilities"]):
67+
cve_id = record.get("cveID")
68+
69+
if not cve_id:
70+
continue
71+
72+
for advisory in cve_to_advisories.get(cve_id, []):
73+
if (advisory.avid, cve_id) in advisories_seen_multiple_times:
74+
continue
75+
advisories_seen_multiple_times.add((advisory.avid, cve_id))
76+
exploits.append(
77+
AdvisoryExploit(
78+
advisory=advisory,
79+
record_id=cve_id,
80+
data_source="KEV",
81+
description=record["shortDescription"],
82+
date_added=record["dateAdded"],
83+
required_action=record["requiredAction"],
84+
due_date=record["dueDate"],
85+
notes=record["notes"],
86+
known_ransomware_campaign_use=(
87+
record["knownRansomwareCampaignUse"] == "Known"
88+
),
89+
)
90+
)
91+
if not exploits:
92+
return
93+
94+
AdvisoryExploit.objects.bulk_create(
95+
exploits,
96+
update_conflicts=True,
97+
unique_fields=["advisory", "data_source", "record_id"],
98+
update_fields=[
99+
"description",
100+
"date_added",
101+
"required_action",
102+
"due_date",
103+
"notes",
104+
"known_ransomware_campaign_use",
105+
],
106+
batch_size=1000,
92107
)
93-
return 1

0 commit comments

Comments
 (0)