|
552 | 552 | url: 'http://moz.com/'
|
553 | 553 |
|
554 | 554 | - regex: 'facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog'
|
555 |
| - name: 'Facebook External Hit' |
| 555 | + name: 'Facebook Crawler' |
556 | 556 | category: 'Social Media Agent'
|
557 |
| - url: 'https://www.facebook.com/externalhit_uatext.php' |
| 557 | + url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/' |
558 | 558 | producer:
|
559 | 559 | name: 'Meta Platforms, Inc.'
|
560 | 560 | url: 'https://www.meta.com/'
|
|
4374 | 4374 | name: 'Library and Archives Canada'
|
4375 | 4375 | url: 'https://library-archives.canada.ca/'
|
4376 | 4376 |
|
| 4377 | +- regex: 'InsytfulBot/[\d.]+' |
| 4378 | + name: 'InsytfulBot' |
| 4379 | + category: 'Crawler' |
| 4380 | + url: 'https://www.insytful.com/' |
| 4381 | + producer: |
| 4382 | + name: 'Zengenti Limited' |
| 4383 | + url: 'https://www.zengenti.com/' |
| 4384 | + |
| 4385 | +- regex: 'statista\.com' |
| 4386 | + name: 'Statista' |
| 4387 | + category: 'Crawler' |
| 4388 | + url: 'https://www.statista.com/' |
| 4389 | + producer: |
| 4390 | + name: 'Statista, Inc.' |
| 4391 | + url: 'https://www.statista.com/' |
| 4392 | + |
| 4393 | +- regex: 'SubstackContentFetch/[\d.]+' |
| 4394 | + name: 'Substack Content Fetch' |
| 4395 | + category: 'Crawler' |
| 4396 | + url: 'https://substack.com/' |
| 4397 | + producer: |
| 4398 | + name: 'Substack, Inc.' |
| 4399 | + url: 'https://substack.com/' |
| 4400 | + |
| 4401 | +- regex: '^ds9' |
| 4402 | + name: 'Deep SEARCH 9' |
| 4403 | + category: 'Crawler' |
| 4404 | + url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/' |
| 4405 | + producer: |
| 4406 | + name: 'Copyright Clearance Center, Inc.' |
| 4407 | + url: 'https://www.copyright.com/' |
| 4408 | + |
| 4409 | +- regex: 'LiveJournal\.com' |
| 4410 | + name: 'LiveJournal' |
| 4411 | + url: 'https://www.livejournal.com/' |
| 4412 | + category: 'Feed Fetcher' |
| 4413 | + producer: |
| 4414 | + name: 'ООО "СИМ"' |
| 4415 | + url: 'https://www.livejournal.com/' |
| 4416 | + |
| 4417 | +- regex: 'bitdiscovery' |
| 4418 | + name: 'Tenable.asm' |
| 4419 | + category: 'Security Checker' |
| 4420 | + url: 'https://bitdiscovery.com/' |
| 4421 | + producer: |
| 4422 | + name: 'Tenable, Inc.' |
| 4423 | + url: 'https://www.tenable.com/' |
| 4424 | + |
| 4425 | +- regex: 'Castopod/[\d.]+' |
| 4426 | + name: 'Castopod' |
| 4427 | + category: 'Crawler' |
| 4428 | + url: 'https://www.castopod.org/' |
| 4429 | + |
4377 | 4430 | # Generic bots
|
4378 |
| -- regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|survey-security-dot-txt|^xenu|^ZmEu|^(?:chrome|firefox|Zeus)$' |
| 4431 | +- regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|^xenu|^(?:chrome|firefox|KvshClient|Zeus|ZmEu)$' |
4379 | 4432 | name: 'Generic Bot'
|
4380 | 4433 |
|
4381 | 4434 | # Generic detections
|
4382 |
| -- regex: '[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|fetcher|indexer|inspector|monitor|project(?!or)|(?<!Google Wap )proxy|research|resolver|robots|scanner|scraper|script|searcher|(?<!-)security|spider|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)' |
| 4435 | +- regex: '[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\d\.[x\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue )proxy|research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)' |
4383 | 4436 | name: 'Generic Bot'
|
0 commit comments