Skip to content

Commit 466a4db

Browse files
authored
Merge pull request #546 from intelowlproject/develop
1.6.5
2 parents 663459c + 2825dfe commit 466a4db

23 files changed

+396
-231
lines changed

.env_template

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ COMPOSE_FILE=docker/default.yml:docker/local.override.yml
1313
#COMPOSE_FILE=docker/default.yml:docker/local.override.yml:docker/elasticsearch.yml
1414

1515
# If you want to run a specific version, populate this
16-
# REACT_APP_INTELOWL_VERSION="1.6.4"
16+
# REACT_APP_INTELOWL_VERSION="1.6.5"

api/views/feeds.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def feeds(request, feed_type, attack_type, prioritize, format_):
2525
attack_type (str): Type of attack (e.g., all, specific attack types).
2626
prioritize (str): Prioritization mechanism to use (e.g., recent, persistent).
2727
format_ (str): Desired format of the response (e.g., json, csv, txt).
28-
exclude_mass_scanners (bool): query parameter flag to exclude IOCs that are known mass scanners.
28+
include_mass_scanners (bool): query parameter flag to include IOCs that are known mass scanners.
2929
3030
Returns:
3131
Response: The HTTP response with formatted IOC data.
@@ -34,8 +34,8 @@ def feeds(request, feed_type, attack_type, prioritize, format_):
3434

3535
feed_params = FeedRequestParams({"feed_type": feed_type, "attack_type": attack_type, "format_": format_})
3636
feed_params.set_prioritization(prioritize)
37-
if request.query_params and "exclude_mass_scanners" in request.query_params:
38-
feed_params.exclude_mass_scanners()
37+
if request.query_params and "include_mass_scanners" in request.query_params:
38+
feed_params.include_mass_scanners()
3939

4040
valid_feed_types = get_valid_feed_types()
4141
iocs_queryset = get_queryset(request, feed_params, valid_feed_types)
@@ -59,8 +59,8 @@ def feeds_pagination(request):
5959
feed_params = FeedRequestParams(request.query_params)
6060
feed_params.format = "json"
6161
feed_params.set_prioritization(request.query_params.get("prioritize"))
62-
if request.query_params and "exclude_mass_scanners" in request.query_params:
63-
feed_params.exclude_mass_scanners()
62+
if request.query_params and "include_mass_scanners" in request.query_params:
63+
feed_params.include_mass_scanners()
6464

6565
valid_feed_types = get_valid_feed_types()
6666
iocs_queryset = get_queryset(request, feed_params, valid_feed_types)
@@ -83,8 +83,8 @@ def feeds_advanced(request):
8383
attack_type (str): Type of attack to filter. (supported: `scanner`, `payload_request`, `all`; default: `all`)
8484
max_age (int): Maximum number of days since last occurrence. E.g. an IOC that was last seen 4 days ago is excluded by default. (default: 3)
8585
min_days_seen (int): Minimum number of days on which an IOC must have been seen. (default: 1)
86-
include_reputation (str): `;`-separated list of reputation values to include, e.g. `known attacker` or `known attacker;` to include IOCs without reputation. (default: include all)
87-
exclude_reputation (str): `;`-separated list of reputation values to exclude, e.g. `mass scanner` or `mass scanner;bot, crawler`. (default: exclude none)
86+
include_reputation (str): `;`-separated list of reputation values to include, e.g. `known attacker` or `known attacker;` to include IOCs without reputation. (default: include all) this has precedence over exclusion
87+
exclude_reputation (str): `;`-separated list of reputation values to exclude, e.g. `mass scanner` or `mass scanner;bot, crawler`. (default: exclude mass scanners)
8888
feed_size (int): Number of IOC items to return. (default: 5000)
8989
ordering (str): Field to order results by, with optional `-` prefix for descending. (default: `-last_seen`)
9090
verbose (bool): `true` to include IOC properties that contain a lot of data, e.g. the list of days it was seen. (default: `false`)

api/views/utils.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from django.contrib.postgres.aggregates import ArrayAgg
1212
from django.db.models import F, Q
1313
from django.http import HttpResponse, HttpResponseBadRequest, StreamingHttpResponse
14-
from greedybear.consts import FEEDS_LICENSE, PAYLOAD_REQUEST, SCANNER
14+
from greedybear.consts import FEEDS_LICENSE
1515
from greedybear.models import IOC, GeneralHoneypot, Statistics
1616
from greedybear.settings import EXTRACTION_INTERVAL
1717
from rest_framework import status
@@ -75,10 +75,11 @@ def __init__(self, query_params: dict):
7575
self.paginate = query_params.get("paginate", "false").lower()
7676
self.format = query_params.get("format_", "json").lower()
7777
self.feed_type_sorting = None
78-
79-
def exclude_mass_scanners(self):
8078
self.exclude_reputation.append("mass scanner")
8179

80+
def include_mass_scanners(self):
81+
self.exclude_reputation.remove("mass scanner")
82+
8283
def set_prioritization(self, prioritize: str):
8384
match prioritize:
8485
case "recent":
@@ -154,11 +155,14 @@ def get_queryset(request, feed_params, valid_feed_types):
154155
query_dict["number_of_days_seen__gte"] = int(feed_params.min_days_seen)
155156
if feed_params.include_reputation:
156157
query_dict["ip_reputation__in"] = feed_params.include_reputation
158+
for reputation_type in feed_params.include_reputation:
159+
if reputation_type in feed_params.exclude_reputation:
160+
feed_params.exclude_reputation.remove(reputation_type)
157161

158162
iocs = (
159163
IOC.objects.filter(**query_dict)
160164
.filter(Q(cowrie=True) | Q(log4j=True) | Q(general_honeypot__active=True))
161-
.exclude(ip_reputation__in=feed_params.exclude_reputation)
165+
.exclude(Q() if "nothing" in feed_params.exclude_reputation else Q(ip_reputation__in=feed_params.exclude_reputation))
162166
.annotate(value=F("name"))
163167
.annotate(honeypots=ArrayAgg("general_honeypot__name"))
164168
.order_by(feed_params.ordering)[: int(feed_params.feed_size)]

docker/.version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
REACT_APP_GREEDYBEAR_VERSION="1.6.4"
1+
REACT_APP_GREEDYBEAR_VERSION="1.6.5"

docker/Dockerfile_nginx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM library/nginx:1.27.5-alpine
1+
FROM library/nginx:1.29.0-alpine
22
RUN mkdir -p /var/cache/nginx /var/cache/nginx/feeds
33
RUN apk update && apk upgrade && apk add bash
44
ENV NGINX_LOG_DIR=/var/log/nginx

greedybear/admin.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,30 @@
55
from django.contrib import admin, messages
66
from django.db.models import Q
77
from django.utils.translation import ngettext
8-
from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot
8+
from greedybear.models import IOC, CommandSequence, CowrieSession, GeneralHoneypot, MassScanners, Sensors, Statistics
99

1010
logger = logging.getLogger(__name__)
1111

12-
# there is no need to view the sensors in the admin page.
13-
# @admin.register(Sensors)
14-
# class SensorsModelAdmin(admin.ModelAdmin):
15-
# list_display = [field.name for field in Sensors._meta.get_fields()]
12+
13+
@admin.register(Sensors)
14+
class SensorsModelAdmin(admin.ModelAdmin):
15+
list_display = [field.name for field in Sensors._meta.get_fields()]
16+
17+
18+
@admin.register(Statistics)
19+
class StatisticsModelAdmin(admin.ModelAdmin):
20+
list_display = ["source", "view", "request_date"]
21+
list_filter = ["source"]
22+
search_fields = ["source"]
23+
search_help_text = ["search for the IP address source"]
24+
25+
26+
@admin.register(MassScanners)
27+
class MassScannersModelAdmin(admin.ModelAdmin):
28+
list_display = ["ip_address", "added", "reason"]
29+
list_filter = ["reason"]
30+
search_fields = ["ip_address"]
31+
search_help_text = ["search for the IP address source"]
1632

1733

1834
class SessionInline(admin.TabularInline):
@@ -28,13 +44,17 @@ class SessionInline(admin.TabularInline):
2844
class CowrieSessionModelAdmin(admin.ModelAdmin):
2945
list_display = ["session_id", "start_time", "duration", "login_attempt", "credentials", "command_execution", "interaction_count", "source"]
3046
search_fields = ["source__name"]
47+
search_help_text = ["search for the IP address source"]
3148
raw_id_fields = ["source", "commands"]
49+
list_filter = ["login_attempt", "command_execution"]
3250

3351

3452
@admin.register(CommandSequence)
3553
class CommandSequenceModelAdmin(admin.ModelAdmin):
36-
list_display = ["first_seen", "last_seen", "cluster", "commands"]
54+
list_display = ["first_seen", "last_seen", "cluster", "commands", "commands_hash"]
3755
inlines = [SessionInline]
56+
search_fields = ["source__name", "commands_hash"]
57+
list_filter = ["cluster", "commands_hash"]
3858

3959

4060
@admin.register(IOC)
@@ -59,7 +79,9 @@ class IOCModelAdmin(admin.ModelAdmin):
5979
"destination_ports",
6080
"login_attempts",
6181
]
82+
list_filter = ["type", "log4j", "cowrie", "scanner", "payload_request", "ip_reputation", "asn"]
6283
search_fields = ["name", "related_ioc__name"]
84+
search_help_text = ["search for the IP address source"]
6385
raw_id_fields = ["related_ioc"]
6486
filter_horizontal = ["general_honeypot"]
6587
inlines = [SessionInline]

greedybear/celery.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,9 @@ def setup_loggers(*args, **kwargs):
103103
# This way models learn from complete rather than partial day patterns, which is crucial for their performance.
104104
"train_and_update": {
105105
"task": "greedybear.tasks.chain_train_and_update",
106-
"schedule": crontab(hour=0, minute=hp_extraction_interval // 2),
106+
# Sometimes this could start when the midnight extraction is not ended yet.
107+
# Let's increment this a little.
108+
"schedule": crontab(hour=0, minute=int(hp_extraction_interval / 3 * 2)),
107109
"options": {"queue": "default"},
108110
},
109111
# COMMANDS
@@ -119,4 +121,9 @@ def setup_loggers(*args, **kwargs):
119121
"schedule": crontab(hour=2, minute=3),
120122
"options": {"queue": "default"},
121123
},
124+
"get_mass_scanners": {
125+
"task": "greedybear.tasks.get_mass_scanners",
126+
"schedule": crontab(hour=4, minute=3, day_of_week=0),
127+
"options": {"queue": "default"},
128+
},
122129
}

greedybear/cronjobs/attacks.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from greedybear.cronjobs.base import ElasticJob
1010
from greedybear.cronjobs.scoring.scoring_jobs import UpdateScores
1111
from greedybear.cronjobs.sensors import ExtractSensors
12-
from greedybear.models import IOC, GeneralHoneypot, Sensors
12+
from greedybear.models import IOC, GeneralHoneypot, MassScanners, Sensors
1313
from greedybear.settings import EXTRACTION_INTERVAL, LEGACY_EXTRACTION
1414

1515

@@ -82,11 +82,12 @@ def _get_attacker_data(self, honeypot, fields: list) -> list:
8282
if not ip.strip():
8383
continue
8484
dest_ports = [hit["dest_port"] for hit in hits if "dest_port" in hit]
85+
8586
ioc = IOC(
8687
name=ip,
8788
type=self._get_ioc_type(ip),
8889
interaction_count=len(hits),
89-
ip_reputation=hits[0].get("ip_rep", ""),
90+
ip_reputation=self._get_ip_reputation(ip, hits[0]),
9091
asn=hits[0].get("geoip", {}).get("asn"),
9192
destination_ports=sorted(set(dest_ports)),
9293
login_attempts=len(hits) if honeypot.name == "Heralding" else 0,
@@ -98,6 +99,18 @@ def _get_attacker_data(self, honeypot, fields: list) -> list:
9899
iocs.append(ioc)
99100
return iocs
100101

102+
def _get_ip_reputation(self, ip, hit):
103+
ip_reputation = hit.get("ip_rep", "")
104+
if not ip_reputation:
105+
try:
106+
MassScanners.objects.get(ip_address=ip)
107+
except MassScanners.DoesNotExist:
108+
pass
109+
else:
110+
self.log.info(f"IP {ip} is a mass scanner")
111+
ip_reputation = "mass scanner"
112+
return ip_reputation
113+
101114
def _update_scores(self):
102115
if not self.ioc_records:
103116
return

greedybear/cronjobs/commands/cluster.py

Lines changed: 2 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
import numpy as np
21
from greedybear.cronjobs.base import Cronjob
2+
from greedybear.cronjobs.commands.lsh import LSHConnectedComponents
33
from greedybear.models import CommandSequence
4-
from sklearn.cluster import DBSCAN
54

65

76
def tokenize(sequence: list[str]) -> list[str]:
@@ -25,80 +24,6 @@ def tokenize(sequence: list[str]) -> list[str]:
2524
return result
2625

2726

28-
def jaccard_similarity(seq1: list[str], seq2: list[str]) -> float:
29-
"""
30-
Calculate the Jaccard similarity coefficient between two sequences.
31-
32-
The Jaccard similarity coefficient is defined as the size of the intersection
33-
divided by the size of the union of two sets. It ranges from 0 (completely dissimilar)
34-
to 1 (identical).
35-
36-
Args:
37-
seq1: First sequence of strings to compare
38-
seq2: Second sequence of strings to compare
39-
40-
Returns:
41-
float: Jaccard similarity coefficient between the two sequences.
42-
Returns 0 if both sequences are empty.
43-
"""
44-
set1 = set(seq1)
45-
set2 = set(seq2)
46-
intersection = len(set1.intersection(set2))
47-
union = len(set1.union(set2))
48-
return intersection / union if union != 0 else 0
49-
50-
51-
def compute_similarity_matrix(sequences: list[list[str]]) -> np.ndarray:
52-
"""
53-
Compute a pairwise Jaccard similarity matrix for a list of sequences.
54-
55-
Creates a symmetric matrix where each element [i,j] contains the Jaccard
56-
similarity between sequences[i] and sequences[j]. The diagonal elements
57-
are set to 1.0 (self-similarity).
58-
59-
Time and space complexity: O(n²) where n is the number of sequences
60-
61-
Args:
62-
sequences: List of token sequences to compare.
63-
64-
Returns:
65-
np.ndarray: A symmetric n×n matrix of floats where n=len(sequences).
66-
"""
67-
n = len(sequences)
68-
matrix = np.zeros((n, n))
69-
for i in range(n):
70-
for j in range(i + 1, n):
71-
similarity = jaccard_similarity(sequences[i], sequences[j])
72-
matrix[i, j] = similarity
73-
matrix[j, i] = similarity
74-
matrix[i, i] = 1.0
75-
return matrix
76-
77-
78-
def dbscan_clustering(sequences: list[list[str]], eps: float = 0.5) -> np.ndarray:
79-
"""
80-
Cluster sequences using DBSCAN based on Jaccard similarity.
81-
82-
Performs density-based clustering on sequences using their pairwise Jaccard
83-
similarities. The similarity is converted to distance by subtracting from 1.
84-
Sequences with distance less than eps are considered neighbors.
85-
86-
Args:
87-
sequences: List of token sequences to cluster.
88-
eps: Maximum distance between two samples for them to be
89-
considered as in the same neighborhood. Since we use Jaccard distance,
90-
eps=0.5 means sequences must share at least 50% of their tokens to be
91-
considered similar. Defaults to 0.5.
92-
93-
Returns:
94-
np.ndarray: Array of cluster labels. Shape (n_samples,).
95-
"""
96-
similarity_matrix = compute_similarity_matrix(sequences)
97-
distance_matrix = 1 - similarity_matrix
98-
dbscan = DBSCAN(eps=eps, min_samples=1, metric="precomputed")
99-
return dbscan.fit_predict(distance_matrix)
100-
101-
10227
class ClusterCommandSequences(Cronjob):
10328
"""
10429
A cronjob that clusters command sequences based on their similarity.
@@ -124,7 +49,7 @@ def run(self) -> None:
12449
return
12550
self.log.info(f"clustering {len(sequences)} command sequences")
12651
tokenized_seqs = [tokenize(s.commands) for s in sequences]
127-
cluster_labels = dbscan_clustering(tokenized_seqs)
52+
cluster_labels = LSHConnectedComponents().get_components(tokenized_seqs)
12853
seqs_to_update = []
12954
for seq, label in zip(sequences, cluster_labels):
13055
if seq.cluster != label:

0 commit comments

Comments
 (0)