From a9f29e537996cc2ccb1640286ed3862d04368e85 Mon Sep 17 00:00:00 2001 From: Liviu-Mihail Concioiu Date: Mon, 7 Aug 2023 12:45:37 +0200 Subject: [PATCH] Adds detection for Wear OS, Odd Browser, Mobvoi brand and various bots (#7447) * Adds detection for Research Scan bot * Adds detection for Wear OS * Adds detection for Odd Browser * Fix fixture for Odd Browser * Adds detection for Mobvoi brand and TicWatch Pro S, TicWatch S2 * Adds detection for Mobvoi TicWatch C2, TicWatch C2+ * Adds detection for Mobvoi TicWatch S * Adds detection for Dogecoin Core * Adds detection for Scraping Robot * Improves detection for Googlebot * Adds detection for Yahoo! Japan WSC * Fix identations * Improve regex for Googlebot * Improve regex for Wear OS --- Parser/Client/Browser.php | 4 +- Parser/Device/AbstractDeviceParser.php | 1 + Parser/OperatingSystem.php | 3 +- Tests/Parser/Client/fixtures/browser.yml | 9 ++ Tests/Parser/Client/fixtures/mobile_app.yml | 6 + Tests/Parser/fixtures/oss.yml | 8 + Tests/fixtures/bots.yml | 38 ++++- Tests/fixtures/wearable.yml | 162 +++++++++++++++----- regexes/bots.yml | 34 +++- regexes/client/browsers.yml | 7 + regexes/client/mobile_apps.yml | 5 + regexes/device/mobiles.yml | 24 +-- regexes/oss.yml | 9 +- 13 files changed, 259 insertions(+), 51 deletions(-) diff --git a/Parser/Client/Browser.php b/Parser/Client/Browser.php index 79ad124e3d..916750f081 100644 --- a/Parser/Client/Browser.php +++ b/Parser/Client/Browser.php @@ -356,6 +356,7 @@ class Browser extends AbstractClientParser 'WR' => 'NextWord Browser', 'NT' => 'NTENT Browser', 'OC' => 'Oculus Browser', + 'O6' => 'Odd Browser', 'O1' => 'Opera Mini iOS', 'OB' => 'Obigo', 'O2' => 'Odin', @@ -588,6 +589,7 @@ class Browser extends AbstractClientParser 'HO', 'A5', 'X1', '18', 'B5', 'B6', 'TC', 'A6', '2X', 'F4', 'YG', 'WR', 'NA', 'DM', '1M', 'A7', 'XN', 'XT', 'XB', 'W1', 'HT', 'B8', 'F5', 'B9', 'WA', 'T0', 'HC', + 'O6', ], 'Firefox' => [ 'AX', 'BI', 'BF', 'BH', 'BN', 'C0', 'CU', 'EI', 'F1', @@ -625,7 +627,7 @@ class Browser extends AbstractClientParser 'O4', 'XO', 'U0', 'B0', 'VA', 'X0', 'A5', 'X1', '18', 'B5', 'B6', 'TC', 'A6', '2X', 'F4', 'YG', 'WR', 'NA', 'DM', '1M', 'A7', 'XN', 'XT', 'XB', 'W1', 'HT', 'B7', - 'B9', 'T0', 'I8', + 'B9', 'T0', 'I8', 'O6', ]; /** diff --git a/Parser/Device/AbstractDeviceParser.php b/Parser/Device/AbstractDeviceParser.php index c74fdb91c0..082e61bd6f 100644 --- a/Parser/Device/AbstractDeviceParser.php +++ b/Parser/Device/AbstractDeviceParser.php @@ -985,6 +985,7 @@ abstract class AbstractDeviceParser extends AbstractParser '6W' => 'MobiWire', '9M' => 'Mobo', 'MOB' => 'Mobell', + 'MVO' => 'Mobvoi', 'M4' => 'Modecom', 'MF' => 'Mofut', 'MR' => 'Motorola', diff --git a/Parser/OperatingSystem.php b/Parser/OperatingSystem.php index 39be56d184..fb4884825a 100644 --- a/Parser/OperatingSystem.php +++ b/Parser/OperatingSystem.php @@ -156,6 +156,7 @@ class OperatingSystem extends AbstractParser 'TOS' => 'TmaxOS', 'UBT' => 'Ubuntu', 'WAS' => 'watchOS', + 'WER' => 'Wear OS', 'WTV' => 'WebTV', 'WHS' => 'Whale OS', 'WIN' => 'Windows', @@ -182,7 +183,7 @@ class OperatingSystem extends AbstractParser protected static $osFamilies = [ 'Android' => [ 'AND', 'CYN', 'FIR', 'REM', 'RZD', 'MLD', 'MCD', 'YNS', 'GRI', 'HAR', - 'ADR', 'CLR', 'BOS', 'REV', 'LEN', 'SIR', 'RRS', + 'ADR', 'CLR', 'BOS', 'REV', 'LEN', 'SIR', 'RRS', 'WER', ], 'AmigaOS' => ['AMG', 'MOR'], 'BlackBerry' => ['BLB', 'QNX'], diff --git a/Tests/Parser/Client/fixtures/browser.yml b/Tests/Parser/Client/fixtures/browser.yml index 55667bf13b..1ea3ad4955 100644 --- a/Tests/Parser/Client/fixtures/browser.yml +++ b/Tests/Parser/Client/fixtures/browser.yml @@ -7353,3 +7353,12 @@ engine: Blink engine_version: "" family: Chrome +- + user_agent: Mozilla/5.0 (Linux; Android 9; TicWatch Pro S) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Odd/47.2.1.1 Mobile Safari/537.36 + client: + type: browser + name: Odd Browser + version: 47.2.1.1 + engine: Blink + engine_version: 77.0.3865.92 + family: Chrome diff --git a/Tests/Parser/Client/fixtures/mobile_app.yml b/Tests/Parser/Client/fixtures/mobile_app.yml index 0b28227081..b5dfb31bc9 100644 --- a/Tests/Parser/Client/fixtures/mobile_app.yml +++ b/Tests/Parser/Client/fixtures/mobile_app.yml @@ -1325,6 +1325,12 @@ type: mobile app name: Bitcoin Core version: 22.0.0 +- + user_agent: /Shibetoshi:1.14.6/ + client: + type: mobile app + name: Dogecoin Core + version: 1.14.6 - user_agent: Mozilla/5.0 (Linux; Android 11; Build/RKQ1.201217.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/75.0.3770.156 Mobile Safari/537.36 WpsMoffice/13.23.1/armeabi-v7a/1310 client: diff --git a/Tests/Parser/fixtures/oss.yml b/Tests/Parser/fixtures/oss.yml index dcd487467e..12f1c59820 100644 --- a/Tests/Parser/fixtures/oss.yml +++ b/Tests/Parser/fixtures/oss.yml @@ -3878,3 +3878,11 @@ version: "7.7" platform: "" family: BeOS +- + user_agent: Mozilla/5.0 (Linux; Android 9; TicWatch Pro S) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Odd/47.2.1.1 Mobile Safari/537.36 + os: + name: Wear OS + short_name: WER + version: + platform: + family: Android diff --git a/Tests/fixtures/bots.yml b/Tests/fixtures/bots.yml index 96c728019e..dfd244c51f 100644 --- a/Tests/fixtures/bots.yml +++ b/Tests/fixtures/bots.yml @@ -3338,7 +3338,16 @@ bot: name: Yahoo! Japan BRW category: Crawler - url: https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて + url: https://support.yahoo-net.jp/PccSearch/s/article/H000007955 + producer: + name: Yahoo! Japan Corp. + url: https://www.yahoo.co.jp/ +- + user_agent: Mozilla/5.0 (compatible; Y!J-WSC/1.0; +https://yahoo.jp/3BSZgF) + bot: + name: Yahoo! Japan WSC + category: Crawler + url: https://support.yahoo-net.jp/PccSearch/s/article/H000007955 producer: name: Yahoo! Japan Corp. url: https://www.yahoo.co.jp/ @@ -5692,3 +5701,30 @@ producer: name: Crissy Field GmbH url: https://www.crissyfield.de/ +- + user_agent: Mozilla/5.0 researchscan.comsys.rwth-aachen.de + bot: + name: Research Scan + category: Crawler + url: http://researchscan.comsys.rwth-aachen.de/ + producer: + name: RWTH Aachen University + url: https://www.comsys.rwth-aachen.de/ +- + user_agent: newspaper/0.2.8 + bot: + name: Scraping Robot + category: Crawler + url: https://scrapingrobot.com/ + producer: + name: Sprious LLC + url: https://sprious.com/ +- + user_agent: Google + bot: + name: Googlebot + category: Search bot + url: http://www.google.com/bot.html + producer: + name: Google Inc. + url: http://www.google.com diff --git a/Tests/fixtures/wearable.yml b/Tests/fixtures/wearable.yml index 81e21f4135..40f8928569 100644 --- a/Tests/fixtures/wearable.yml +++ b/Tests/fixtures/wearable.yml @@ -1593,8 +1593,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-R875F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/1.2 Chrome/90.0.4430.210 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1611,8 +1611,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SM-R870) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1629,8 +1629,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SM-R860) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1647,8 +1647,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-R905U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.2. Chrome/102.0.5005.125 Mobile Safari/537.36 29 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1683,8 +1683,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SM-R925F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1701,8 +1701,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-R925N) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/1.2. Chrome/90.0.4430.210 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1719,8 +1719,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-R925U) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.2. Chrome/102.0.5005.125 Mobile Safari/537.36 29 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1737,8 +1737,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-R920) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.2. Chrome/102.0.5005.125 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1773,8 +1773,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SM-R915U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1791,8 +1791,8 @@ - user_agent: ozilla/5.0 (Linux; Android 11; SAMSUNG SM-R915F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/1.2. Chrome/90.0.4430.210 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1809,8 +1809,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SM-R910) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1827,8 +1827,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-R905N) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.2. Chrome/102.0.5005.125 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1845,8 +1845,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SM-R905F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.2. Chrome/102.0.5005.125 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1863,8 +1863,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SM-R895U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1881,8 +1881,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 9; SM-R895N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.7305.55 Mobile Safari/537.36 os: - name: Android - version: "9" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1899,8 +1899,8 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SM-R895F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser @@ -1917,13 +1917,13 @@ - user_agent: Mozilla/5.0 (Linux; Android 11; SM-R900) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Odd/47.2.1.1 Mobile Safari/537.36 os: - name: Android - version: "11" + name: Wear OS + version: "" platform: "" client: type: browser - name: Chrome - version: 77.0.3865.92 + name: Odd Browser + version: 47.2.1.1 engine: Blink engine_version: 77.0.3865.92 device: @@ -1932,3 +1932,93 @@ model: Galaxy Watch 5 40mm os_family: Android browser_family: Chrome +- + user_agent: Mozilla/5.0 (Linux; Android 9; TicWatch Pro S) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Odd/47.2.1.1 Mobile Safari/537.36 + os: + name: Wear OS + version: "" + platform: "" + client: + type: browser + name: Odd Browser + version: 47.2.1.1 + engine: Blink + engine_version: 77.0.3865.92 + device: + type: wearable + brand: Mobvoi + model: TicWatch Pro S + os_family: Android + browser_family: Chrome +- + user_agent: Mozilla/5.0 (Linux; Android 9; TicWatch S2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Odd/47.2.1.1 Mobile Safari/537.36 + os: + name: Wear OS + version: "" + platform: "" + client: + type: browser + name: Odd Browser + version: 47.2.1.1 + engine: Blink + engine_version: 77.0.3865.92 + device: + type: wearable + brand: Mobvoi + model: TicWatch S2 + os_family: Android + browser_family: Chrome +- + user_agent: Mozilla/5.0 (Linux; Android 9; TicWatch C2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Odd/47.2.1.1 Mobile Safari/537.36 + os: + name: Wear OS + version: "" + platform: "" + client: + type: browser + name: Odd Browser + version: 47.2.1.1 + engine: Blink + engine_version: 77.0.3865.92 + device: + type: wearable + brand: Mobvoi + model: TicWatch C2 + os_family: Android + browser_family: Chrome +- + user_agent: Mozilla/5.0 (Linux; Android 9; TicWatch C2+) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Odd/47.2.1.1 Mobile Safari/537.36 + os: + name: Wear OS + version: "" + platform: "" + client: + type: browser + name: Odd Browser + version: 47.2.1.1 + engine: Blink + engine_version: 77.0.3865.92 + device: + type: wearable + brand: Mobvoi + model: TicWatch C2+ + os_family: Android + browser_family: Chrome +- + user_agent: Mozilla/5.0 (Linux; Android 8.0.0; Ticwatch S) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.92 Odd/47.2.1.1 Mobile Safari/537.36 + os: + name: Wear OS + version: "" + platform: "" + client: + type: browser + name: Odd Browser + version: 47.2.1.1 + engine: Blink + engine_version: 77.0.3865.92 + device: + type: wearable + brand: Mobvoi + model: TicWatch S + os_family: Android + browser_family: Chrome diff --git a/regexes/bots.yml b/regexes/bots.yml index f0be1683b6..a1f5e2816b 100644 --- a/regexes/bots.yml +++ b/regexes/bots.yml @@ -720,6 +720,14 @@ name: 'Google Inc.' url: 'http://www.google.com' +- regex: '^Google$' + name: 'Googlebot' + category: 'Search bot' + url: 'http://www.google.com/bot.html' + producer: + name: 'Google Inc.' + url: 'http://www.google.com' + - regex: 'heritrix' name: 'Heritrix' category: 'Crawler' @@ -1762,7 +1770,15 @@ - regex: 'Y!J-BRW' name: 'Yahoo! Japan BRW' category: 'Crawler' - url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて' + url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955' + producer: + name: 'Yahoo! Japan Corp.' + url: 'https://www.yahoo.co.jp/' + +- regex: 'Y!J-WSC' + name: 'Yahoo! Japan WSC' + category: 'Crawler' + url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955' producer: name: 'Yahoo! Japan Corp.' url: 'https://www.yahoo.co.jp/' @@ -3379,6 +3395,22 @@ name: 'Crissy Field GmbH' url: 'https://www.crissyfield.de/' +- regex: 'researchscan.comsys.rwth-aachen.de' + name: 'Research Scan' + category: 'Crawler' + url: 'http://researchscan.comsys.rwth-aachen.de/' + producer: + name: 'RWTH Aachen University' + url: 'https://www.comsys.rwth-aachen.de/' + +- regex: 'newspaper/([\d+.]+)' + name: 'Scraping Robot' + category: 'Crawler' + url: 'https://scrapingrobot.com/' + producer: + name: 'Sprious LLC' + url: 'https://sprious.com/' + # Generic detections - regex: '[a-z0-9\-_]*((?