Skip to content

Commit

Permalink
Adds detection for various libraries, bots and operating systems (#7498)
Browse files Browse the repository at this point in the history
* Adds detection for HTML Parser
* Improves detection for Python urllib
* Adds detection for msray
* Adds detection for Slim
* Adds detection for Fuzz Faster U Fool
* Adds detection for Matomo
* Improves detection for generic bots
* Adds detection for Prometheus
* Improves detection for generic bots
* Adds detection for ArchiveBot
* Adds detection for MADBbot
* Adds detection for Kali
* Adds detection for Oracle Linux
* Improves version detection for TencentOS
* Improves version detection for CentOS
* Move links from comment to url and update some links
* Improve detection for generic bots
* Improves version detection for iOS and macOS
  • Loading branch information
liviuconcioiu authored Nov 14, 2023
1 parent dd1d060 commit 2357f77
Show file tree
Hide file tree
Showing 7 changed files with 203 additions and 23 deletions.
4 changes: 3 additions & 1 deletion Parser/OperatingSystem.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class OperatingSystem extends AbstractParser
'INF' => 'Inferno',
'JME' => 'Java ME',
'KOS' => 'KaiOS',
'KAL' => 'Kali',
'KAN' => 'Kanotix',
'KNO' => 'Knoppix',
'KTV' => 'KreaTV',
Expand Down Expand Up @@ -119,6 +120,7 @@ class OperatingSystem extends AbstractParser
'OBS' => 'OpenBSD',
'OWR' => 'OpenWrt',
'OTV' => 'Opera TV',
'ORA' => 'Oracle Linux',
'ORD' => 'Ordissimo',
'PAR' => 'Pardus',
'PCL' => 'PCLinuxOS',
Expand Down Expand Up @@ -205,7 +207,7 @@ class OperatingSystem extends AbstractParser
'ORD', 'TOS', 'RSO', 'DEE', 'FRE', 'MAG', 'FEN', 'CAI', 'PCL', 'HAS',
'LOS', 'DVK', 'ROK', 'OWR', 'OTV', 'KTV', 'PUR', 'PLA', 'FUC', 'PAR',
'FOR', 'MON', 'KAN', 'ZEN', 'LND', 'LNS', 'CHN', 'AMZ', 'TEN', 'CST',
'NOV', 'ROU', 'ZOR', 'RED', 'VID', 'TIV',
'NOV', 'ROU', 'ZOR', 'RED', 'KAL', 'ORA', 'VID', 'TIV',
],
'Mac' => ['MAC'],
'Mobile Gaming Console' => ['PSP', 'NDS', 'XBX'],
Expand Down
31 changes: 30 additions & 1 deletion Tests/Parser/Client/fixtures/library.yml
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,6 @@
type: library
name: cri-o
version: 1.16.1

-
user_agent: go-containerregistry/v0.11.0
client:
Expand Down Expand Up @@ -528,3 +527,33 @@
type: library
name: Axios
version: "1.2.0"
-
user_agent: HTMLParser/1.6
client:
type: library
name: HTML Parser
version: "1.6"
-
user_agent: python-urllib3/1.26.9
client:
type: library
name: Python urllib
version: 1.26.9
-
user_agent: msray-plus
client:
type: library
name: Msray-Plus
version: ""
-
user_agent: Slim Framework
client:
type: library
name: Slim Framework
version: ""
-
user_agent: Fuzz Faster U Fool v1.5.0-dev
client:
type: library
name: FFUF
version: 1.5.0
26 changes: 25 additions & 1 deletion Tests/Parser/fixtures/oss.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3356,7 +3356,7 @@
os:
name: TencentOS
short_name: TEN
version: 4.14.105
version: "3"
platform:
family: GNU/Linux
-
Expand Down Expand Up @@ -3926,6 +3926,22 @@
version: "14.1"
platform:
family: Mac
-
user_agent: python-requests/2.7.0 CPython/2.7.15 Linux/4.16.0-kali2-amd64
os:
name: Kali
short_name: KAL
version: "2"
platform:
family: GNU/Linux
-
user_agent: python-requests/2.6.0 CPython/2.7.5 Linux/4.1.12-124.15.4.el7uek.x86_64
os:
name: Oracle Linux
short_name: ORA
version: "7"
platform: x64
family: GNU/Linux
-
user_agent: Mozilla/5.0 (X11; Linux armv7l) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Odin/88.4324.2.10 Safari/537.36 Model/Hisense-NT72671D VIDAA/6.0(Hisense;SmartTV;85A66GAVT;NT72671/V0000.06.12N.N0622;UHD;86A6GA;)
os:
Expand All @@ -3950,3 +3966,11 @@
version: 1.0.0
platform: ""
family: GNU/Linux
-
user_agent: Aloha/1 CFNetwork/1490.0.1 Darwin/23.2.0
os:
name: iOS
short_name: IOS
version: "17.2"
platform:
family: iOS
45 changes: 45 additions & 0 deletions Tests/fixtures/bots.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5828,6 +5828,51 @@
name: phpMyAdmin
category: Service Agent
url: https://www.phpmyadmin.net/
-
user_agent: Matomo/4.15.1
bot:
name: Matomo
category: Service Agent
url: https://github.com/matomo-org/matomo
producer:
name: InnoCraft Ltd
url: https://matomo.org/
-
user_agent: CustomUserAgent/1.0
bot:
name: Generic Bot
-
user_agent: Prometheus/2.40.5
bot:
name: Prometheus
category: Service Agent
url: https://github.com/prometheus/prometheus
producer:
name: The Linux Foundation
url: https://www.cncf.io/
-
user_agent: firefox
bot:
name: Generic Bot
-
user_agent: Chrome
bot:
name: Generic Bot
-
user_agent: ArchiveTeam ArchiveBot/20220523.4a672db (wpull 2.0.3) and not Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36
bot:
name: ArchiveBot
category: Crawler
url: https://wiki.archiveteam.org/index.php?title=ArchiveBot
producer:
name: ArchiveTeam
url: https://wiki.archiveteam.org/
-
user_agent: MADBbot/0.1 (Gathering webpages for data analytics; https://madb.zapto.org/bot.html; [email protected])
bot:
name: MADBbot
category: Crawler
url: https://madb.zapto.org/bot.html
-
user_agent: MeltwaterNews www.meltwater.com
bot:
Expand Down
33 changes: 31 additions & 2 deletions regexes/bots.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2035,7 +2035,7 @@
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
name: 'RSSRadio Bot'

- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel|Magellan)'
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel|Magellan|CustomUserAgent)'
name: 'Generic Bot'

- regex: '^sentry'
Expand Down Expand Up @@ -3472,6 +3472,35 @@
category: 'Service Agent'
url: 'https://www.phpmyadmin.net/'

- regex: 'Matomo/([\d+.]+)'
name: 'Matomo'
category: 'Service Agent'
url: 'https://github.com/matomo-org/matomo'
producer:
name: 'InnoCraft Ltd'
url: 'https://matomo.org/'

- regex: 'Prometheus/([\d+.]+)'
name: 'Prometheus'
category: 'Service Agent'
url: 'https://github.com/prometheus/prometheus'
producer:
name: 'The Linux Foundation'
url: 'https://www.cncf.io/'

- regex: 'ArchiveTeam ArchiveBot'
name: 'ArchiveBot'
category: 'Crawler'
url: 'https://wiki.archiveteam.org/index.php?title=ArchiveBot'
producer:
name: 'ArchiveTeam'
url: 'https://wiki.archiveteam.org/'

- regex: 'MADBbot/([\d+.]+)'
name: 'MADBbot'
category: 'Crawler'
url: 'https://madb.zapto.org/bot.html'

- regex: 'MeltwaterNews'
name: 'MeltwaterNews'
category: 'Crawler'
Expand All @@ -3480,5 +3509,5 @@
url: 'https://www.meltwater.com/'

# Generic detections
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider|^firefox$|^chrome$)([^a-z]|$)'
name: 'Generic Bot'
68 changes: 51 additions & 17 deletions regexes/client/libraries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,75 +5,109 @@
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
###############

- regex: 'Fuzz Faster U Fool v(\d+[\.\d]+)'
name: 'FFUF'
version: '$1'
url: 'https://github.com/ffuf/ffuf'

- regex: 'Slim Framework'
name: 'Slim Framework'
version: ''
url: 'https://www.slimframework.com/'

- regex: 'msray-plus'
name: 'Msray-Plus'
version: ''
url: 'https://github.com/super-l/msray'

- regex: 'HTMLParser(?:/(\d+[\.\d]+))?'
name: 'HTML Parser'
version: '$1'
url: 'https://htmlparser.sourceforge.net/'

# got - a nodejs library
- regex: '^got(?:/(\d+\.[.\d]+))? \('
name: 'got'
version: '$1'
url: 'https://github.com/sindresorhus/got'

# Typhoeus (https://github.com/typhoeus/typhoeus)
# Typhoeus
- regex: 'Typhoeus'
name: 'Typhoeus'
version: ''
url: 'https://github.com/typhoeus/typhoeus'

# req (https://github.com/imroc/req)
# req
- regex: 'req/v([\.\d]+)'
name: 'req'
version: '$1'
url: 'https://github.com/imroc/req'

# quic-go (https://github.com/lucas-clemente/quic-go)
# quic-go
- regex: 'quic-go-HTTP/3'
name: 'quic-go'
version: ''
url: 'https://github.com/lucas-clemente/quic-go'

# Azure Data Factory (https://azure.microsoft.com/en-us/products/data-factory/)
# Azure Data Factory
- regex: 'azure-data-factory(?:/(\d+[\.\d]+))?'
name: 'Azure Data Factory'
version: '$1'
url: 'https://azure.microsoft.com/en-us/products/data-factory/'

# Dart (https://dart.dev/)
# Dart
- regex: 'Dart(?:/(\d+[\.\d]+))?'
name: 'Dart'
version: '$1'
url: 'https://dart.dev/'

# r-curl (https://github.com/jeroen/curl)
# r-curl
- regex: 'r-curl(?:/(\d+[\.\d]+))?'
name: 'r-curl'
version: '$1'
url: 'https://github.com/jeroen/curl'

# HTTPX (https://www.python-httpx.org/)
# HTTPX
- regex: 'python-httpx(?:/(\d+[\.\d]+))?'
name: 'HTTPX'
version: '$1'
url: 'https://www.python-httpx.org/'

# fasthttp (https://github.com/valyala/fasthttp)
# fasthttp
- regex: 'fasthttp(?:/(\d+[\.\d]+))?'
name: 'fasthttp'
version: '$1'
url: 'https://github.com/valyala/fasthttp'

# GeoIP Update (https://github.com/maxmind/geoipupdate)
# GeoIP Update
- regex: 'geoipupdate(?:/(\d+[\.\d]+))?'
name: 'GeoIP Update'
version: '$1'
url: 'https://github.com/maxmind/geoipupdate'

# PHP cURL Class (https://github.com/php-curl-class/php-curl-class)
# PHP cURL Class
- regex: 'PHP-Curl-Class(?:/(\d+[\.\d]+))?'
name: 'PHP cURL Class'
version: '$1'
url: 'https://github.com/php-curl-class/php-curl-class'

# cPanel HTTP Client (https://www.cpanel.net/)
# cPanel HTTP Client
- regex: 'Cpanel-HTTP-Client(?:/(\d+[\.\d]+))?'
name: 'cPanel HTTP Client'
version: '$1'
url: 'https://www.cpanel.net/'

# AnyEvent HTTP (http://software.schmorp.de/pkg/AnyEvent)
# AnyEvent HTTP
- regex: 'AnyEvent-HTTP(?:/(\d+[\.\d]+))?'
name: 'AnyEvent HTTP'
version: '$1'
url: 'http://software.schmorp.de/pkg/AnyEvent'

# SlimerJS (https://www.slimerjs.org/)
# SlimerJS
- regex: 'SlimerJS/(\d+[\.\d]+)'
name: 'SlimerJS'
version: '$1'
url: 'https://www.slimerjs.org/'

- regex: 'Wget(?:/(\d+[\.\d]+))?'
name: 'Wget'
Expand Down Expand Up @@ -101,7 +135,7 @@
version: '$1'
url: 'https://pypi.org/project/httplib2/'

- regex: 'Python-urllib(?:/?(\d+[\.\d]+))?'
- regex: 'Python-urllib3?(?:/?(\d+[\.\d]+))?'
name: 'Python urllib'
version: '$1'

Expand Down Expand Up @@ -142,12 +176,12 @@
- regex: 'HTTP_Request2(?:/(\d+[\.\d]+))?'
name: 'HTTP_Request2'
version: '$1'
url: 'http://pear.php.net/package/http_request2'
url: 'https://pear.php.net/package/http_request2'

- regex: 'Mechanize(?:/(\d+[\.\d]+))?'
name: 'Mechanize'
version: '$1'
url: 'http://github.com/sparklemotion/mechanize/'
url: 'https://github.com/sparklemotion/mechanize'

- regex: 'aiohttp(?:/(\d+[\.\d]+))?'
name: 'aiohttp'
Expand Down Expand Up @@ -188,7 +222,7 @@
- regex: 'RestSharp/(\d+[\.\d]+)'
name: 'RestSharp'
version: '$1'
url: 'http://restsharp.org/'
url: 'https://github.com/restsharp/RestSharp'

- regex: 'scalaj-http/(\d+[\.\d]+)'
name: 'ScalaJ HTTP'
Expand Down
Loading

0 comments on commit 2357f77

Please sign in to comment.