Skip to content

Commit

Permalink
Adds detection for various bots (#7954)
Browse files Browse the repository at this point in the history
* Adds detection for CMS Experiment
* Adds detection for SiteCheckerBotCrawler
* Adds detection for trafilatura
* Move sqlmap to libraries
* Adds detection for SBIder
  • Loading branch information
liviuconcioiu authored Dec 18, 2024
1 parent e9d8a6f commit 0aec030
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 17 deletions.
12 changes: 12 additions & 0 deletions Tests/Parser/Client/fixtures/library.yml
Original file line number Diff line number Diff line change
Expand Up @@ -731,3 +731,15 @@
type: library
name: Azure Blob Storage
version: 12.23.0
-
user_agent: trafilatura/1.5.0 (+https://github.com/adbar/trafilatura)
client:
type: library
name: trafilatura
version: 1.5.0
-
user_agent: 'sqlmap/1.8.10.1#dev (https://sqlmap.org)'
client:
type: library
name: sqlmap
version: 1.8.10.1
33 changes: 24 additions & 9 deletions Tests/fixtures/bots.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4000,15 +4000,6 @@
producer:
name: 'IBM Germany Research & Development GmbH'
url: https://exchange.xforce.ibmcloud.com/
-
user_agent: 'sqlmap/1.1.8.2#dev (http://sqlmap.org)'
bot:
name: sqlmap
category: Security Checker
url: http://sqlmap.org/
producer:
name: sqlmap
url: http://sqlmap.org/
-
user_agent: Mozilla/5.0 (compatible; theoldreader.com; 1 subscribers; feed-id=aaa)
bot:
Expand Down Expand Up @@ -8433,3 +8424,27 @@
name: SuggestBot
category: Crawler
url: https://github.com/nettrom/suggestbot
-
user_agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36 (https://securitee.org/cms-experiment-fall2024/)
bot:
name: CMS Experiment
category: Security Checker
url: https://securitee.org/cms-experiment-fall2024/
-
user_agent: SiteCheckerBotCrawler/1.0 (+http://sitechecker.pro)
bot:
name: SiteCheckerBotCrawler
category: Crawler
url: https://sitechecker.pro/
producer:
name: Cyber Circus Limited
url: https://sitechecker.pro/
-
user_agent: SBIder/0.8-dev (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html)
bot:
name: SBIder
category: Crawler
url: https://www.sitesell.com/sbider.html
producer:
name: SiteSell Inc.
url: https://www.sitesell.com/
29 changes: 21 additions & 8 deletions regexes/bots.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1624,14 +1624,6 @@
name: 'Sprinklr, Inc.'
url: 'https://www.sprinklr.com/'

- regex: 'sqlmap/'
name: 'sqlmap'
category: 'Security Checker'
url: 'http://sqlmap.org/'
producer:
name: 'sqlmap'
url: 'http://sqlmap.org/'

- regex: 'SSL Labs'
name: 'SSL Labs'
category: 'Validator'
Expand Down Expand Up @@ -4912,6 +4904,27 @@
category: 'Crawler'
url: 'https://github.com/nettrom/suggestbot'

- regex: 'cms-experiment'
name: 'CMS Experiment'
category: 'Security Checker'
url: 'https://securitee.org/cms-experiment-fall2024/'

- regex: 'SiteCheckerBotCrawler'
name: 'SiteCheckerBotCrawler'
category: 'Crawler'
url: 'https://sitechecker.pro/'
producer:
name: 'Cyber Circus Limited'
url: 'https://sitechecker.pro/'

- regex: 'SBIder'
name: 'SBIder'
category: 'Crawler'
url: 'https://www.sitesell.com/sbider.html'
producer:
name: 'SiteSell Inc.'
url: 'https://www.sitesell.com/'

# Generic bots
- regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherx?web|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|xx032_bo9vs83_2a|sslshed|geckotrail|Wordup|Keydrop|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|Node.js|Report Runner|url|Zeus|ZmEu)$'
name: 'Generic Bot'
Expand Down
10 changes: 10 additions & 0 deletions regexes/client/libraries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -649,3 +649,13 @@
name: 'Azure Blob Storage'
version: '$1'
url: 'https://learn.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python'

- regex: 'trafilatura(?:/(\d+[.\d]+))?'
name: 'trafilatura'
version: '$1'
url: 'https://github.com/adbar/trafilatura'

- regex: 'sqlmap(?:/(\d+[.\d]+))?'
name: 'sqlmap'
version: '$1'
url: 'https://sqlmap.org/'

0 comments on commit 0aec030

Please sign in to comment.