Skip to content

Commit 447e4df

Browse files
committed
feat: flag bot user agents
Resolves #11 by identifying bot user agents and flagging them for filtering in later reports. This allows us to retain bot data for analysis rather than dropping it outright. Changes: - Marked bot-related events with a `robot` flag for downstream filtering - Implemented test to validate bot detection and flagging functionality
1 parent 8bc123f commit 447e4df

File tree

2 files changed

+54
-6
lines changed

2 files changed

+54
-6
lines changed

src/main/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequest.kt

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,15 @@ private class DataDeserializer : JsonDeserializer<Any?>() {
6464
.build()
6565
}
6666

67+
fun isHackerOrRobot(userAgent: UserAgent): Boolean =
68+
listOf(UserAgent.DEVICE_CLASS, UserAgent.LAYOUT_ENGINE_CLASS, UserAgent.AGENT_CLASS, UserAgent.AGENT_SECURITY)
69+
.any { field ->
70+
userAgent.getValue(field)?.let { value ->
71+
value.startsWith("Hacker", ignoreCase = true) ||
72+
value.startsWith("Robot", ignoreCase = true)
73+
} ?: false
74+
}
75+
6776
override fun deserialize(
6877
parser: JsonParser,
6978
ctxt: DeserializationContext,
@@ -79,27 +88,29 @@ private class DataDeserializer : JsonDeserializer<Any?>() {
7988
node.set<ObjectNode>(
8089
"browser",
8190
browserNode.apply {
82-
put("name", userAgent.getValueOrNull("AgentName"))
83-
put("version", userAgent.getValueOrNull("AgentVersion"))
91+
put("name", userAgent.getValueOrNull(UserAgent.AGENT_NAME))
92+
put("version", userAgent.getValueOrNull(UserAgent.AGENT_VERSION))
8493
},
8594
)
8695

8796
node.set<ObjectNode>(
8897
"device",
8998
ObjectNode(ctxt.nodeFactory).apply {
90-
put("name", userAgent.getValueOrNull("DeviceName"))
91-
put("version", userAgent.getValueOrNull("DeviceVersion"))
99+
put("name", userAgent.getValueOrNull(UserAgent.DEVICE_NAME))
100+
put("version", userAgent.getValueOrNull(UserAgent.DEVICE_VERSION))
92101
},
93102
)
94103

95104
node.set<ObjectNode>(
96105
"os",
97106
ObjectNode(ctxt.nodeFactory).apply {
98-
put("name", userAgent.getValueOrNull("OperatingSystemName"))
99-
put("version", userAgent.getValueOrNull("OperatingSystemVersion"))
107+
put("name", userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_NAME))
108+
put("version", userAgent.getValueOrNull(UserAgent.OPERATING_SYSTEM_VERSION))
100109
},
101110
)
102111

112+
node.put("robot", isHackerOrRobot(userAgent))
113+
103114
return parser.codec.treeToValue(node, Any::class.java)
104115
}
105116
}

src/test/kotlin/ch/srgssr/pillarbox/monitoring/event/model/EventRequestTest.kt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class EventRequestTest(
3232

3333
// Then: The user agent data should have been resolved
3434
val dataNode = eventRequest.data as Map<*, *>
35+
dataNode["robot"] shouldBe false
3536

3637
val browserNode = dataNode["browser"] as Map<*, *>
3738
browserNode["name"] shouldBe "Chrome"
@@ -45,6 +46,42 @@ class EventRequestTest(
4546
osNode["version"] shouldBe ">=10.15.7"
4647
}
4748

49+
should("deserialize an event and flag robot agents") {
50+
// Given: an input with a user agent
51+
val jsonInput =
52+
"""
53+
{
54+
"session_id": "12345",
55+
"event_name": "START",
56+
"timestamp": 1630000000000,
57+
"version": 1,
58+
"data": {
59+
"browser": {
60+
"user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15 (Applebot/0.1; +http://www.apple.com/go/applebot)"
61+
}
62+
}
63+
}
64+
""".trimIndent()
65+
66+
// When: the event is deserialized
67+
val eventRequest = objectMapper.readValue<EventRequest>(jsonInput)
68+
69+
// Then: The user agent data should have been resolved
70+
val dataNode = eventRequest.data as Map<*, *>
71+
dataNode["robot"] shouldBe true
72+
73+
val browserNode = dataNode["browser"] as Map<*, *>
74+
browserNode["name"] shouldBe "Applebot"
75+
browserNode["version"] shouldBe "0.1"
76+
77+
val deviceNode = dataNode["device"] as Map<*, *>
78+
deviceNode["name"] shouldBe "Apple BOT"
79+
80+
val osNode = dataNode["os"] as Map<*, *>
81+
osNode["name"] shouldBe "Cloud"
82+
osNode["version"] shouldBe null
83+
}
84+
4885
should("retain existing data when deserializing an event without user agent") {
4986
// Given: an input without an agent
5087
val jsonInput =

0 commit comments

Comments
 (0)