From b8591d94167a20ec7b37dbce43db99f2c2de41d4 Mon Sep 17 00:00:00 2001 From: boocmp Date: Sat, 11 Jan 2025 23:12:43 +0700 Subject: [PATCH] u16 --- benches/bench_matching.rs | 30 ++-- examples/example.rs | 2 +- src/blocker.rs | 78 +++++---- src/data_format/v0.rs | 62 +++++-- src/filters/fb_network.rs | 183 ++++++++++++++++----- src/filters/network.rs | 30 +++- src/filters/network_matchers.rs | 69 ++++---- src/flat/fb_network_filter.fbs | 9 +- src/flat/fb_network_filter_generated.rs | 84 ++++++++-- src/utils.rs | 35 +++- tests/unit/filters/cosmetic.rs | 210 ++++++++++++++++++------ tests/unit/filters/network_matchers.rs | 14 +- tests/unit/utils.rs | 19 ++- 13 files changed, 586 insertions(+), 239 deletions(-) diff --git a/benches/bench_matching.rs b/benches/bench_matching.rs index 155b5e4e..e3abb96a 100644 --- a/benches/bench_matching.rs +++ b/benches/bench_matching.rs @@ -2,11 +2,11 @@ use criterion::*; use serde::{Deserialize, Serialize}; -use adblock::Engine; use adblock::blocker::{Blocker, BlockerOptions}; use adblock::request::Request; use adblock::resources::ResourceStorage; use adblock::url_parser::parse_url; +use adblock::Engine; #[path = "../tests/test_utils.rs"] mod test_utils; @@ -36,11 +36,11 @@ fn load_requests() -> Vec { reqs } -fn get_blocker(rules: impl IntoIterator>) -> Blocker { +fn get_blocker(rules: impl IntoIterator>) -> Blocker { let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default()); let blocker_options = BlockerOptions { - enable_optimizations: true, + enable_optimizations: false, }; Blocker::new(network_filters, &blocker_options) @@ -57,11 +57,15 @@ fn bench_rule_matching(engine: &Engine, requests: &Vec) -> (u32, u3 passes += 1; } }); - // println!("Got {} matches, {} passes, {} errors", matches, passes, errors); + println!("Got {} matches, {} passes", matches, passes); (matches, passes) } -fn bench_matching_only(blocker: &Blocker, resources: &ResourceStorage, requests: &Vec) -> (u32, u32) { +fn bench_matching_only( + blocker: &Blocker, + resources: &ResourceStorage, + requests: &Vec, +) -> (u32, u32) { let mut matches = 0; let mut passes = 0; requests.iter().for_each(|parsed| { @@ -139,9 +143,7 @@ fn rule_match(c: &mut Criterion) { fn rule_match_parsed_el(c: &mut Criterion) { let mut group = c.benchmark_group("rule-match-parsed"); - let rules = rules_from_lists(&[ - "data/easylist.to/easylist/easylist.txt", - ]); + let rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let requests = load_requests(); let requests_parsed: Vec<_> = requests .into_iter() @@ -219,9 +221,7 @@ fn serialization(c: &mut Criterion) { b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0)) }); group.bench_function("el", move |b| { - let full_rules = rules_from_lists(&[ - "data/easylist.to/easylist/easylist.txt", - ]); + let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0)) @@ -256,9 +256,7 @@ fn deserialization(c: &mut Criterion) { }) }); group.bench_function("el", move |b| { - let full_rules = rules_from_lists(&[ - "data/easylist.to/easylist/easylist.txt", - ]); + let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let engine = Engine::from_rules(full_rules, Default::default()); let serialized = engine.serialize_raw().unwrap(); @@ -292,9 +290,7 @@ fn rule_match_browserlike_comparable(c: &mut Criterion) { group.throughput(Throughput::Elements(requests_len)); group.sample_size(20); - fn requests_parsed( - requests: &[TestRequest], - ) -> Vec<(String, String, String, String, bool)> { + fn requests_parsed(requests: &[TestRequest]) -> Vec<(String, String, String, String, bool)> { requests .iter() .map(|r| { diff --git a/examples/example.rs b/examples/example.rs index fa6d94d9..8e2b0c15 100644 --- a/examples/example.rs +++ b/examples/example.rs @@ -6,7 +6,7 @@ use adblock::{ fn main() { let rules = vec![ - String::from("-advertisement-icon."), + String::from("-advertisement-icon.$domain=example.com|hui.ru|pizda.com"), String::from("-advertisement-management/"), String::from("-advertisement."), String::from("-advertisement/script."), diff --git a/src/blocker.rs b/src/blocker.rs index df5e8ca0..12e0c7f0 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -749,6 +749,8 @@ pub(crate) struct NetworkFilterList { pub(crate) filter_map: HashMap>>, pub(crate) flat_filters_buffer: Vec, pub(crate) flat_filter_map: HashMap>, + pub(crate) include_domains_map: HashMap, + pub(crate) exclude_domains_map: HashMap, } impl NetworkFilterList { @@ -766,9 +768,6 @@ impl NetworkFilterList { // Build a HashMap of tokens to Network Filters (held through Arc, Atomic Reference Counter) let mut filter_map = HashMap::with_capacity(filter_tokens.len()); - - let mut flat_filter_map = HashMap::with_capacity(filter_tokens.len()); - let mut flat_builder = crate::filters::fb_network::FlatNetworkFiltersListBuilder::new(); { for (filter_pointer, multi_tokens) in filter_tokens { for tokens in multi_tokens { @@ -787,11 +786,6 @@ impl NetworkFilterList { _ => {} } } - insert_dup( - &mut flat_filter_map, - best_token, - flat_builder.add((*filter_pointer).clone()), - ); insert_dup(&mut filter_map, best_token, Arc::clone(&filter_pointer)); } } @@ -799,8 +793,10 @@ impl NetworkFilterList { let mut self_ = NetworkFilterList { filter_map, - flat_filters_buffer: flat_builder.finish(), - flat_filter_map: flat_filter_map, + flat_filters_buffer: vec![], + flat_filter_map: HashMap::new(), + include_domains_map: HashMap::new(), + exclude_domains_map: HashMap::new(), }; if optimize { @@ -809,6 +805,32 @@ impl NetworkFilterList { self_.filter_map.shrink_to_fit(); } + let mut flat_builder = crate::filters::fb_network::FlatNetworkFiltersListBuilder::new(); + + for (key, value) in &self_.filter_map { + for v in value { + let nf = (*(*v)).clone(); + let index = flat_builder.add(nf); + insert_dup(&mut self_.flat_filter_map, *key, index); + } + } + self_.flat_filters_buffer = flat_builder.finish(); + + let root = unsafe { fb::root_as_network_filter_list_unchecked(&self_.flat_filters_buffer) }; + + for (index, item) in root.unique_include_domains().iter().enumerate() { + self_ + .include_domains_map + .insert(item, u16::try_from(index).expect("ok")); + } + for (index, item) in root.unique_exclude_domains().iter().enumerate() { + self_ + .exclude_domains_map + .insert(item, u16::try_from(index).expect("ok")); + } + self_.include_domains_map.shrink_to_fit(); + self_.exclude_domains_map.shrink_to_fit(); + self_ } @@ -916,32 +938,20 @@ impl NetworkFilterList { unsafe { fb::root_as_network_filter_list_unchecked(&self.flat_filters_buffer) }; let filters = storage.global_list(); - if let Some(source_hostname_hashes) = request.source_hostname_hashes.as_ref() { - for token in source_hostname_hashes { - if let Some(filter_bucket) = self.flat_filter_map.get(token) { - for filter_index in filter_bucket { - let flat_filter = filters.get(*filter_index as usize); - let mut filter = FlatNetworkFilterView::from(&flat_filter); - filter.key = *filter_index as u64; - - if filter.matches(request, regex_manager) - && filter.tag.map_or(true, |t| active_tags.contains(t)) - { - return Some(filter.mask); - } - } - } - } - } - - for token in request_tokens { + for token in request + .source_hostname_hashes + .as_ref() + .into_iter() + .flatten() + .chain(request_tokens.into_iter()) + { if let Some(filter_bucket) = self.flat_filter_map.get(token) { for filter_index in filter_bucket { let flat_filter = filters.get(*filter_index as usize); let mut filter = FlatNetworkFilterView::from(&flat_filter); filter.key = *filter_index as u64; - if filter.matches(request, regex_manager) + if filter.matches(request, self, regex_manager) && filter.tag.map_or(true, |t| active_tags.contains(t)) { return Some(filter.mask); @@ -973,7 +983,7 @@ impl NetworkFilterList { if let Some(filter_bucket) = self.filter_map.get(token) { for filter in filter_bucket { // if matched, also needs to be tagged with an active tag (or not tagged at all) - if filter.matches(request, regex_manager) + if filter.matches(request, self, regex_manager) && filter .tag .as_ref() @@ -991,7 +1001,7 @@ impl NetworkFilterList { if let Some(filter_bucket) = self.filter_map.get(token) { for filter in filter_bucket { // if matched, also needs to be tagged with an active tag (or not tagged at all) - if filter.matches(request, regex_manager) + if filter.matches(request, self, regex_manager) && filter .tag .as_ref() @@ -1048,7 +1058,7 @@ impl NetworkFilterList { if let Some(filter_bucket) = self.filter_map.get(token) { for filter in filter_bucket { // if matched, also needs to be tagged with an active tag (or not tagged at all) - if filter.matches(request, regex_manager) + if filter.matches(request, self, regex_manager) && filter .tag .as_ref() @@ -1066,7 +1076,7 @@ impl NetworkFilterList { if let Some(filter_bucket) = self.filter_map.get(token) { for filter in filter_bucket { // if matched, also needs to be tagged with an active tag (or not tagged at all) - if filter.matches(request, regex_manager) + if filter.matches(request, self, regex_manager) && filter .tag .as_ref() diff --git a/src/data_format/v0.rs b/src/data_format/v0.rs index 169745fc..858c482a 100644 --- a/src/data_format/v0.rs +++ b/src/data_format/v0.rs @@ -61,8 +61,12 @@ impl From<&HostnameRuleDb> for LegacyHostnameRuleDb { for (hash, bin) in v.uninject_script.0.iter() { for f in bin { db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::UnhideScriptInject(f.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::UnhideScriptInject(f.to_owned())]); + .and_modify(|v| { + v.push(LegacySpecificFilterType::UnhideScriptInject(f.to_owned())) + }) + .or_insert_with(|| { + vec![LegacySpecificFilterType::UnhideScriptInject(f.to_owned())] + }); } } for (hash, bin) in v.procedural_action.0.iter() { @@ -71,8 +75,15 @@ impl From<&HostnameRuleDb> for LegacyHostnameRuleDb { Ok(f) => { if let Some((selector, style)) = f.as_css() { db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::Style(selector.clone(), style.clone()))) - .or_insert_with(|| vec![LegacySpecificFilterType::Style(selector, style)]); + .and_modify(|v| { + v.push(LegacySpecificFilterType::Style( + selector.clone(), + style.clone(), + )) + }) + .or_insert_with(|| { + vec![LegacySpecificFilterType::Style(selector, style)] + }); } } _ => (), @@ -85,17 +96,25 @@ impl From<&HostnameRuleDb> for LegacyHostnameRuleDb { Ok(f) => { if let Some((selector, style)) = f.as_css() { db.entry(*hash) - .and_modify(|v| v.push(LegacySpecificFilterType::UnhideStyle(selector.to_owned(), style.to_owned()))) - .or_insert_with(|| vec![LegacySpecificFilterType::UnhideStyle(selector.to_owned(), style.to_owned())]); + .and_modify(|v| { + v.push(LegacySpecificFilterType::UnhideStyle( + selector.to_owned(), + style.to_owned(), + )) + }) + .or_insert_with(|| { + vec![LegacySpecificFilterType::UnhideStyle( + selector.to_owned(), + style.to_owned(), + )] + }); } } _ => (), } } } - LegacyHostnameRuleDb { - db, - } + LegacyHostnameRuleDb { db } } } @@ -115,10 +134,22 @@ impl Into for LegacyHostnameRuleDb { match rule { LegacySpecificFilterType::Hide(s) => hide.insert(&hash, s), LegacySpecificFilterType::Unhide(s) => unhide.insert(&hash, s), - LegacySpecificFilterType::Style(s, st) => procedural_action.insert_procedural_action_filter(&hash, &ProceduralOrActionFilter::from_css(s, st)), - LegacySpecificFilterType::UnhideStyle(s, st) => procedural_action_exception.insert_procedural_action_filter(&hash, &ProceduralOrActionFilter::from_css(s, st)), - LegacySpecificFilterType::ScriptInject(s) => inject_script.insert(&hash, (s, Default::default())), - LegacySpecificFilterType::UnhideScriptInject(s) => uninject_script.insert(&hash, s), + LegacySpecificFilterType::Style(s, st) => procedural_action + .insert_procedural_action_filter( + &hash, + &ProceduralOrActionFilter::from_css(s, st), + ), + LegacySpecificFilterType::UnhideStyle(s, st) => procedural_action_exception + .insert_procedural_action_filter( + &hash, + &ProceduralOrActionFilter::from_css(s, st), + ), + LegacySpecificFilterType::ScriptInject(s) => { + inject_script.insert(&hash, (s, Default::default())) + } + LegacySpecificFilterType::UnhideScriptInject(s) => { + uninject_script.insert(&hash, s) + } } } } @@ -358,6 +389,8 @@ impl From for NetworkFilterList { // TODO(boocmp): ??? flat_filters_buffer: vec![], flat_filter_map: HashMap::new(), + include_domains_map: HashMap::new(), + exclude_domains_map: HashMap::new(), } } } @@ -448,7 +481,8 @@ impl From for (Blocker, CosmeticFilterCache) { let mut specific_rules: HostnameRuleDb = v.specific_rules.into(); specific_rules.procedural_action = HostnameFilterBin(v.procedural_action); - specific_rules.procedural_action_exception = HostnameFilterBin(v.procedural_action_exception); + specific_rules.procedural_action_exception = + HostnameFilterBin(v.procedural_action_exception); ( Blocker { diff --git a/src/filters/fb_network.rs b/src/filters/fb_network.rs index 3349407d..d10196ac 100644 --- a/src/filters/fb_network.rs +++ b/src/filters/fb_network.rs @@ -2,10 +2,11 @@ use std::vec; use flatbuffers::WIPOffset; +use crate::blocker::NetworkFilterList; use crate::filters::network::{NetworkFilter, NetworkFilterMask}; use crate::regex_manager::RegexManager; use crate::request::{self}; -use crate::utils::Hash; +use crate::utils::{self, Hash}; extern crate flatbuffers; #[allow(dead_code, unused_imports, unsafe_code)] @@ -18,6 +19,9 @@ use super::network::NetworkMatchable; pub struct FlatNetworkFiltersListBuilder<'a> { builder: flatbuffers::FlatBufferBuilder<'a>, filters: Vec>>, + + include_domains: Vec, + exclude_domains: Vec, } impl<'a> FlatNetworkFiltersListBuilder<'a> { @@ -25,25 +29,40 @@ impl<'a> FlatNetworkFiltersListBuilder<'a> { Self { builder: flatbuffers::FlatBufferBuilder::new(), filters: vec![], + include_domains: vec![], + exclude_domains: vec![], + } + } + + fn get_or_insert(arr: &mut Vec, h: Hash) -> u16 { + if let Some(index) = arr.iter().position(|&x| x == h) { + u16::try_from(index).expect("< u16 max") + } else { + arr.push(h); + u16::try_from(arr.len() - 1).expect("< u16 max") } } pub fn add(&mut self, network_filter: NetworkFilter) -> u32 { - let opt_domains = network_filter - .opt_domains - .map(|mut opt_domains| { - opt_domains.insert(0, network_filter.opt_domains_union.unwrap()); - opt_domains - }) - .map(|v| self.builder.create_vector(&v)); - - let opt_not_domains = network_filter - .opt_not_domains - .map(|mut opt_not_domains| { - opt_not_domains.insert(0, network_filter.opt_not_domains_union.unwrap()); - opt_not_domains - }) - .map(|v| self.builder.create_vector(&v)); + let opt_domains = network_filter.opt_domains.map(|v| { + let mut o: Vec = v + .into_iter() + .map(|x| Self::get_or_insert(&mut self.include_domains, x)) + .collect(); + o.sort_unstable(); + o.dedup(); + self.builder.create_vector(&o) + }); + + let opt_not_domains = network_filter.opt_not_domains.map(|v| { + let mut o: Vec = v + .into_iter() + .map(|x| Self::get_or_insert(&mut self.exclude_domains, x)) + .collect(); + o.sort_unstable(); + o.dedup(); + self.builder.create_vector(&o) + }); let modifier_option = network_filter .modifier_option @@ -51,7 +70,7 @@ impl<'a> FlatNetworkFiltersListBuilder<'a> { let hostname = network_filter .hostname - .map(|s| self.builder.create_string(&s)); + .map(|s| self.builder.create_shared_string(&s)); let tag = network_filter .tag @@ -61,7 +80,7 @@ impl<'a> FlatNetworkFiltersListBuilder<'a> { let offsets: Vec> = network_filter .filter .iter() - .map(|s| self.builder.create_string(s)) + .map(|s| self.builder.create_shared_string(s)) .collect(); Some(self.builder.create_vector(&offsets)) } else { @@ -88,15 +107,27 @@ impl<'a> FlatNetworkFiltersListBuilder<'a> { pub fn finish(&mut self) -> Vec { let filters = self.builder.create_vector(&self.filters); + let include_domains = self.builder.create_vector(&self.include_domains); + let exclude_domains = self.builder.create_vector(&self.exclude_domains); + let storage = fb::NetworkFilterList::create( &mut self.builder, &&fb::NetworkFilterListArgs { global_list: Some(filters), + unique_include_domains: Some(include_domains), + unique_exclude_domains: Some(exclude_domains), }, ); self.builder.finish(storage, None); - Vec::from(self.builder.finished_data()) + let r = Vec::from(self.builder.finished_data()); + println!( + "bytes {} i {} e {}", + r.len(), + self.include_domains.len(), + self.exclude_domains.len() + ); + r } } pub struct FlatPatterns<'a> { @@ -145,35 +176,31 @@ impl<'a> ExactSizeIterator for FlatPatternsIterator<'a> { } pub struct FlatNetworkFilterView<'a> { + fb_filter: &'a fb::NetworkFilter<'a>, pub key: u64, pub mask: NetworkFilterMask, - pub patterns: FlatPatterns<'a>, - pub modifier_option: Option<&'a str>, - pub hostname: Option<&'a str>, - pub opt_domains: Option<&'a [Hash]>, - pub opt_not_domains: Option<&'a [Hash]>, pub tag: Option<&'a str>, } impl<'a> From<&'a fb::NetworkFilter<'a>> for FlatNetworkFilterView<'a> { #[inline(always)] fn from(filter: &'a fb::NetworkFilter<'a>) -> Self { - let opt_domains = filter.opt_domains().map(|domains| unsafe { + /*let opt_domains = filter.opt_domains().map(|domains| unsafe { let bytes = domains.bytes(); std::slice::from_raw_parts( - bytes.as_ptr() as *const u64, - bytes.len() / std::mem::size_of::(), + bytes.as_ptr() as *const u32, + bytes.len() / std::mem::size_of::(), ) }); let opt_not_domains = filter.opt_not_domains().map(|domains| unsafe { let bytes = domains.bytes(); std::slice::from_raw_parts( - bytes.as_ptr() as *const u64, - bytes.len() / std::mem::size_of::(), + bytes.as_ptr() as *const u32, + bytes.len() / std::mem::size_of::(), ) - }); - Self { - key: (filter._tab.buf().as_ptr() as *const u64) as u64, + });*/ + /*Self { + key: 0, mask: unsafe { NetworkFilterMask::from_bits_unchecked(filter.mask()) }, patterns: FlatPatterns { data: filter.patterns(), @@ -183,28 +210,94 @@ impl<'a> From<&'a fb::NetworkFilter<'a>> for FlatNetworkFilterView<'a> { opt_domains: opt_domains, opt_not_domains: opt_not_domains, tag: filter.tag(), + }*/ + Self { + fb_filter: filter, + key: 0, + mask: unsafe { NetworkFilterMask::from_bits_unchecked(filter.mask()) }, + tag: filter.tag(), + } + } +} + +struct CheckOptionsParams { + pub mask: NetworkFilterMask, +} + +impl<'a> From<&'a FlatNetworkFilterView<'a>> for CheckOptionsParams { + #[inline(always)] + fn from(filter: &'a FlatNetworkFilterView<'a>) -> Self { + Self { mask: filter.mask } + } +} + +struct CheckPatternsParams<'a> { + pub patterns: FlatPatterns<'a>, + pub hostname: Option<&'a str>, +} + +impl<'a> From<&'a FlatNetworkFilterView<'a>> for CheckPatternsParams<'a> { + #[inline(always)] + fn from(filter: &'a FlatNetworkFilterView<'a>) -> Self { + Self { + patterns: FlatPatterns { + data: filter.fb_filter.patterns(), + }, + hostname: if filter.mask.is_hostname_anchor() { + filter.fb_filter.hostname() + } else { + None + }, } } } impl<'a> NetworkMatchable for FlatNetworkFilterView<'a> { - fn matches(&self, request: &request::Request, regex_manager: &mut RegexManager) -> bool { - use crate::filters::network_matchers::{check_options, check_pattern}; - check_options( - self.mask, - self.opt_domains.map(|d| d[1..].as_ref()), - self.opt_domains.map(|d| d[0]), - self.opt_not_domains.map(|d| d[1..].as_ref()), - self.opt_not_domains.map(|d| d[0]), - request, - ) && check_pattern( + fn matches( + &self, + request: &request::Request, + network_list: &NetworkFilterList, + regex_manager: &mut RegexManager, + ) -> bool { + use crate::filters::network_matchers::{ + check_excluded_domains, check_included_domains, check_options, check_pattern, + }; + let cop = CheckOptionsParams::from(self); + if !check_options(cop.mask, request) { + return false; + } + let opt_not_domains = self.fb_filter.opt_not_domains().map(|domains| unsafe { + let bytes = domains.bytes(); + std::slice::from_raw_parts( + bytes.as_ptr() as *const u16, + bytes.len() / std::mem::size_of::(), + ) + }); + if !check_excluded_domains(opt_not_domains, request, &network_list.exclude_domains_map) { + return false; + } + let opt_domains = self.fb_filter.opt_domains().map(|domains| unsafe { + let bytes = domains.bytes(); + std::slice::from_raw_parts( + bytes.as_ptr() as *const u16, + bytes.len() / std::mem::size_of::(), + ) + }); + if !check_included_domains(opt_domains, request, &network_list.include_domains_map) { + return false; + } + let cpp = CheckPatternsParams::from(self); + if !check_pattern( self.mask, - self.patterns.iter(), - self.hostname, + cpp.patterns.iter(), + cpp.hostname, self.key, request, regex_manager, - ) + ) { + return false; + } + true } #[cfg(test)] diff --git a/src/filters/network.rs b/src/filters/network.rs index cb5d0ee1..183e68a5 100644 --- a/src/filters/network.rs +++ b/src/filters/network.rs @@ -9,6 +9,7 @@ use thiserror::Error; use std::fmt; +use crate::blocker::NetworkFilterList; use crate::filters::abstract_network::{ AbstractNetworkFilter, NetworkFilterLeftAnchor, NetworkFilterOption, NetworkFilterRightAnchor, }; @@ -917,30 +918,41 @@ impl fmt::Display for NetworkFilter { } pub trait NetworkMatchable { - fn matches(&self, request: &request::Request, regex_manager: &mut RegexManager) -> bool; + fn matches( + &self, + request: &request::Request, + filter_list: &NetworkFilterList, + regex_manager: &mut RegexManager, + ) -> bool; #[cfg(test)] fn matches_test(&self, request: &request::Request) -> bool; } impl NetworkMatchable for NetworkFilter { - fn matches(&self, request: &request::Request, regex_manager: &mut RegexManager) -> bool { - use crate::filters::network_matchers::{check_options, check_pattern}; - check_options( - self.mask, + fn matches( + &self, + request: &request::Request, + network_list: &NetworkFilterList, + regex_manager: &mut RegexManager, + ) -> bool { + //use crate::filters::network_matchers::{check_domains, check_options, check_pattern}; + /*check_options(self.mask, request) + &&*/ + /*check_domains( self.opt_domains.as_deref(), - self.opt_domains_union, self.opt_not_domains.as_deref(), - self.opt_not_domains_union, request, - ) && check_pattern( + )*/ + /*&& check_pattern( self.mask, self.filter.iter(), self.hostname.as_deref(), self.key(), request, regex_manager, - ) + )*/ + false } #[cfg(test)] diff --git a/src/filters/network_matchers.rs b/src/filters/network_matchers.rs index 491c8525..718d202e 100644 --- a/src/filters/network_matchers.rs +++ b/src/filters/network_matchers.rs @@ -8,6 +8,7 @@ use crate::filters::network::NetworkFilterMask; use crate::regex_manager::RegexManager; use crate::request; use crate::utils::{self, Hash}; +use std::collections::HashMap; impl NetworkFilterMask { #[inline(always)] @@ -157,9 +158,7 @@ where return true; } let request_url = request.get_url(mask.match_case()); - filters.any(|f| { - memmem::find(request_url.as_bytes(), f.as_bytes()).is_some() - }) + filters.any(|f| memmem::find(request_url.as_bytes(), f.as_bytes()).is_some()) } // pattern| @@ -471,14 +470,8 @@ where } } -pub fn check_options<'a>( - mask: NetworkFilterMask, - opt_domains: Option<&'a [Hash]>, - opt_domains_union: Option, - opt_not_domains: Option<&'a [Hash]>, - opt_not_domains_union: Option, - request: &request::Request, -) -> bool { +#[inline(always)] +pub fn check_options<'a>(mask: NetworkFilterMask, request: &request::Request) -> bool { // Bad filter never matches if mask.is_badfilter() { return false; @@ -494,47 +487,51 @@ pub fn check_options<'a>( return false; } + true +} + +#[inline(always)] +pub fn check_included_domains<'a>( + opt_domains: Option<&[u16]>, + request: &request::Request, + map: &HashMap, +) -> bool { // Source URL must be among these domains to match if let Some(included_domains) = opt_domains.as_ref() { if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { - // If the union of included domains is recorded - if let Some(included_domains_union) = opt_domains_union { - // If there isn't any source hash that matches the union, there's no match at all - if source_hashes - .iter() - .all(|h| h & included_domains_union != *h) - { - return false; + if source_hashes.iter().all(|h| { + if let Some(index) = map.get(h) { + !utils::bin_lookup(included_domains, *index) + } else { + true } - } - if source_hashes - .iter() - .all(|h| !utils::bin_lookup(included_domains, *h)) - { + }) { return false; } } } + true +} +#[inline(always)] +pub fn check_excluded_domains<'a>( + opt_not_domains: Option<&[u16]>, + request: &request::Request, + map: &HashMap, +) -> bool { if let Some(excluded_domains) = opt_not_domains.as_ref() { if let Some(source_hashes) = request.source_hostname_hashes.as_ref() { - // If the union of excluded domains is recorded - if let Some(excluded_domains_union) = opt_not_domains_union { - // If there's any source hash that matches the union, check the actual values - if source_hashes.iter().any(|h| { - (h & excluded_domains_union == *h) && utils::bin_lookup(excluded_domains, *h) - }) { - return false; + if source_hashes.iter().any(|h| { + if let Some(index) = map.get(h) { + utils::bin_lookup(excluded_domains, *index) + } else { + false } - } else if source_hashes - .iter() - .any(|h| utils::bin_lookup(excluded_domains, *h)) - { + }) { return false; } } } - true } diff --git a/src/flat/fb_network_filter.fbs b/src/flat/fb_network_filter.fbs index 3bab5cb2..94375e0d 100644 --- a/src/flat/fb_network_filter.fbs +++ b/src/flat/fb_network_filter.fbs @@ -5,10 +5,8 @@ namespace fb; table NetworkFilter { mask: uint32; // NetworkFilterMask (network.rs) - // opt_domains[0] and opt_not_domains[0] contains the united (by or operation) hash of all hashes of [1..] - // This type of storage saves 8 bytes per filter if these arrays are empty. - opt_domains: [uint64]; - opt_not_domains: [uint64]; + opt_domains: [uint16]; + opt_not_domains: [uint16]; patterns: [string]; modifier_option: string; @@ -19,6 +17,9 @@ table NetworkFilter { table NetworkFilterList { global_list: [NetworkFilter] (required); + + unique_include_domains: [uint64] (required); + unique_exclude_domains: [uint64] (required); } root_type NetworkFilterList; diff --git a/src/flat/fb_network_filter_generated.rs b/src/flat/fb_network_filter_generated.rs index eaa073f2..3b1ec50e 100644 --- a/src/flat/fb_network_filter_generated.rs +++ b/src/flat/fb_network_filter_generated.rs @@ -101,18 +101,18 @@ impl<'a> NetworkFilter<'a> { unsafe { self._tab.get::(NetworkFilter::VT_MASK, Some(0)).unwrap()} } #[inline] - pub fn opt_domains(&self) -> Option> { + pub fn opt_domains(&self) -> Option> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot - unsafe { self._tab.get::>>(NetworkFilter::VT_OPT_DOMAINS, None)} + unsafe { self._tab.get::>>(NetworkFilter::VT_OPT_DOMAINS, None)} } #[inline] - pub fn opt_not_domains(&self) -> Option> { + pub fn opt_not_domains(&self) -> Option> { // Safety: // Created from valid Table for this object // which contains a valid value in this slot - unsafe { self._tab.get::>>(NetworkFilter::VT_OPT_NOT_DOMAINS, None)} + unsafe { self._tab.get::>>(NetworkFilter::VT_OPT_NOT_DOMAINS, None)} } #[inline] pub fn patterns(&self) -> Option>> { @@ -152,8 +152,8 @@ impl flatbuffers::Verifiable for NetworkFilter<'_> { use self::flatbuffers::Verifiable; v.visit_table(pos)? .visit_field::("mask", Self::VT_MASK, false)? - .visit_field::>>("opt_domains", Self::VT_OPT_DOMAINS, false)? - .visit_field::>>("opt_not_domains", Self::VT_OPT_NOT_DOMAINS, false)? + .visit_field::>>("opt_domains", Self::VT_OPT_DOMAINS, false)? + .visit_field::>>("opt_not_domains", Self::VT_OPT_NOT_DOMAINS, false)? .visit_field::>>>("patterns", Self::VT_PATTERNS, false)? .visit_field::>("modifier_option", Self::VT_MODIFIER_OPTION, false)? .visit_field::>("hostname", Self::VT_HOSTNAME, false)? @@ -164,8 +164,8 @@ impl flatbuffers::Verifiable for NetworkFilter<'_> { } pub struct NetworkFilterArgs<'a> { pub mask: u32, - pub opt_domains: Option>>, - pub opt_not_domains: Option>>, + pub opt_domains: Option>>, + pub opt_not_domains: Option>>, pub patterns: Option>>>, pub modifier_option: Option>, pub hostname: Option>, @@ -196,11 +196,11 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> NetworkFilterBuilder<'a, 'b, A> self.fbb_.push_slot::(NetworkFilter::VT_MASK, mask, 0); } #[inline] - pub fn add_opt_domains(&mut self, opt_domains: flatbuffers::WIPOffset>) { + pub fn add_opt_domains(&mut self, opt_domains: flatbuffers::WIPOffset>) { self.fbb_.push_slot_always::>(NetworkFilter::VT_OPT_DOMAINS, opt_domains); } #[inline] - pub fn add_opt_not_domains(&mut self, opt_not_domains: flatbuffers::WIPOffset>) { + pub fn add_opt_not_domains(&mut self, opt_not_domains: flatbuffers::WIPOffset>) { self.fbb_.push_slot_always::>(NetworkFilter::VT_OPT_NOT_DOMAINS, opt_not_domains); } #[inline] @@ -251,8 +251,8 @@ impl core::fmt::Debug for NetworkFilter<'_> { #[derive(Debug, Clone, PartialEq)] pub struct NetworkFilterT { pub mask: u32, - pub opt_domains: Option>, - pub opt_not_domains: Option>, + pub opt_domains: Option>, + pub opt_not_domains: Option>, pub patterns: Option>, pub modifier_option: Option, pub hostname: Option, @@ -323,6 +323,8 @@ impl<'a> flatbuffers::Follow<'a> for NetworkFilterList<'a> { impl<'a> NetworkFilterList<'a> { pub const VT_GLOBAL_LIST: flatbuffers::VOffsetT = 4; + pub const VT_UNIQUE_INCLUDE_DOMAINS: flatbuffers::VOffsetT = 6; + pub const VT_UNIQUE_EXCLUDE_DOMAINS: flatbuffers::VOffsetT = 8; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -334,6 +336,8 @@ impl<'a> NetworkFilterList<'a> { args: &'args NetworkFilterListArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = NetworkFilterListBuilder::new(_fbb); + if let Some(x) = args.unique_exclude_domains { builder.add_unique_exclude_domains(x); } + if let Some(x) = args.unique_include_domains { builder.add_unique_include_domains(x); } if let Some(x) = args.global_list { builder.add_global_list(x); } builder.finish() } @@ -343,8 +347,18 @@ impl<'a> NetworkFilterList<'a> { let x = self.global_list(); x.iter().map(|t| t.unpack()).collect() }; + let unique_include_domains = { + let x = self.unique_include_domains(); + x.into_iter().collect() + }; + let unique_exclude_domains = { + let x = self.unique_exclude_domains(); + x.into_iter().collect() + }; NetworkFilterListT { global_list, + unique_include_domains, + unique_exclude_domains, } } @@ -355,6 +369,20 @@ impl<'a> NetworkFilterList<'a> { // which contains a valid value in this slot unsafe { self._tab.get::>>>(NetworkFilterList::VT_GLOBAL_LIST, None).unwrap()} } + #[inline] + pub fn unique_include_domains(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::>>(NetworkFilterList::VT_UNIQUE_INCLUDE_DOMAINS, None).unwrap()} + } + #[inline] + pub fn unique_exclude_domains(&self) -> flatbuffers::Vector<'a, u64> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::>>(NetworkFilterList::VT_UNIQUE_EXCLUDE_DOMAINS, None).unwrap()} + } } impl flatbuffers::Verifiable for NetworkFilterList<'_> { @@ -365,18 +393,24 @@ impl flatbuffers::Verifiable for NetworkFilterList<'_> { use self::flatbuffers::Verifiable; v.visit_table(pos)? .visit_field::>>>("global_list", Self::VT_GLOBAL_LIST, true)? + .visit_field::>>("unique_include_domains", Self::VT_UNIQUE_INCLUDE_DOMAINS, true)? + .visit_field::>>("unique_exclude_domains", Self::VT_UNIQUE_EXCLUDE_DOMAINS, true)? .finish(); Ok(()) } } pub struct NetworkFilterListArgs<'a> { pub global_list: Option>>>>, + pub unique_include_domains: Option>>, + pub unique_exclude_domains: Option>>, } impl<'a> Default for NetworkFilterListArgs<'a> { #[inline] fn default() -> Self { NetworkFilterListArgs { global_list: None, // required field + unique_include_domains: None, // required field + unique_exclude_domains: None, // required field } } } @@ -391,6 +425,14 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> NetworkFilterListBuilder<'a, 'b self.fbb_.push_slot_always::>(NetworkFilterList::VT_GLOBAL_LIST, global_list); } #[inline] + pub fn add_unique_include_domains(&mut self, unique_include_domains: flatbuffers::WIPOffset>) { + self.fbb_.push_slot_always::>(NetworkFilterList::VT_UNIQUE_INCLUDE_DOMAINS, unique_include_domains); + } + #[inline] + pub fn add_unique_exclude_domains(&mut self, unique_exclude_domains: flatbuffers::WIPOffset>) { + self.fbb_.push_slot_always::>(NetworkFilterList::VT_UNIQUE_EXCLUDE_DOMAINS, unique_exclude_domains); + } + #[inline] pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> NetworkFilterListBuilder<'a, 'b, A> { let start = _fbb.start_table(); NetworkFilterListBuilder { @@ -402,6 +444,8 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> NetworkFilterListBuilder<'a, 'b pub fn finish(self) -> flatbuffers::WIPOffset> { let o = self.fbb_.end_table(self.start_); self.fbb_.required(o, NetworkFilterList::VT_GLOBAL_LIST,"global_list"); + self.fbb_.required(o, NetworkFilterList::VT_UNIQUE_INCLUDE_DOMAINS,"unique_include_domains"); + self.fbb_.required(o, NetworkFilterList::VT_UNIQUE_EXCLUDE_DOMAINS,"unique_exclude_domains"); flatbuffers::WIPOffset::new(o.value()) } } @@ -410,6 +454,8 @@ impl core::fmt::Debug for NetworkFilterList<'_> { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let mut ds = f.debug_struct("NetworkFilterList"); ds.field("global_list", &self.global_list()); + ds.field("unique_include_domains", &self.unique_include_domains()); + ds.field("unique_exclude_domains", &self.unique_exclude_domains()); ds.finish() } } @@ -417,11 +463,15 @@ impl core::fmt::Debug for NetworkFilterList<'_> { #[derive(Debug, Clone, PartialEq)] pub struct NetworkFilterListT { pub global_list: Vec, + pub unique_include_domains: Vec, + pub unique_exclude_domains: Vec, } impl Default for NetworkFilterListT { fn default() -> Self { Self { global_list: Default::default(), + unique_include_domains: Default::default(), + unique_exclude_domains: Default::default(), } } } @@ -434,8 +484,18 @@ impl NetworkFilterListT { let x = &self.global_list; let w: Vec<_> = x.iter().map(|t| t.pack(_fbb)).collect();_fbb.create_vector(&w) }); + let unique_include_domains = Some({ + let x = &self.unique_include_domains; + _fbb.create_vector(x) + }); + let unique_exclude_domains = Some({ + let x = &self.unique_exclude_domains; + _fbb.create_vector(x) + }); NetworkFilterList::create(_fbb, &NetworkFilterListArgs{ global_list, + unique_include_domains, + unique_exclude_domains, }) } } diff --git a/src/utils.rs b/src/utils.rs index 244c1e61..4adda40e 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -98,8 +98,41 @@ pub(crate) fn tokenize_filter( tokens_buffer } +// input should be asc sorted. +#[inline] +pub fn eytzinger_layout(input: &[u32]) -> Vec { + #[inline(always)] + fn eytzinger(a: &[u32], b: &mut [u32], mut i: usize, k: usize) -> usize { + if k <= a.len() { + i = eytzinger(a, b, i, 2 * k); + b[k] = a[i]; + i = eytzinger(a, b, i + 1, 2 * k + 1); + } + i + } + let mut result = Vec::with_capacity(input.len() + 1); + result.resize(result.capacity(), 0); + eytzinger(&input, &mut result, 0, 1); + + result +} + +#[inline(always)] +pub fn eytzinger_search(input: &[u32], target: u32) -> bool { + // branchless impl + let mut index: usize = 1; + while index < input.len() { + let el = input[index]; + index = 2 * index + usize::from(el < target); + } + index >>= index.trailing_ones() + 1; + + usize::from(input[index] == target) * index != 0 +} + pub(crate) fn bin_lookup(arr: &[T], elt: T) -> bool { - arr.binary_search(&elt).is_ok() + //arr.binary_search(&elt).is_ok() + arr.contains(&elt) } #[cfg(test)] diff --git a/tests/unit/filters/cosmetic.rs b/tests/unit/filters/cosmetic.rs index b0d5d933..a9a6ba53 100644 --- a/tests/unit/filters/cosmetic.rs +++ b/tests/unit/filters/cosmetic.rs @@ -151,7 +151,9 @@ mod parse_tests { check_parse_result( r#"soundtrackcollector.com,the-numbers.com##a[href^="http://affiliates.allposters.com/"]"#, CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"a[href^="http://affiliates.allposters.com/"]"#.to_string()), + selector: SelectorType::PlainCss( + r#"a[href^="http://affiliates.allposters.com/"]"#.to_string(), + ), hostnames: sort_hash_domains(vec!["soundtrackcollector.com", "the-numbers.com"]), ..Default::default() }, @@ -199,7 +201,9 @@ mod parse_tests { check_parse_result( r#"adforum.com,alliednews.com,americustimesrecorder.com,andovertownsman.com,athensreview.com,batesvilleheraldtribune.com,bdtonline.com,channel24.pk,chickashanews.com,claremoreprogress.com,cleburnetimesreview.com,clintonherald.com,commercejournal.com,commercial-news.com,coopercrier.com,cordeledispatch.com,corsicanadailysun.com,crossville-chronicle.com,cullmantimes.com,dailyiowegian.com,dailyitem.com,daltondailycitizen.com,derrynews.com,duncanbanner.com,eagletribune.com,edmondsun.com,effinghamdailynews.com,enewscourier.com,enidnews.com,farmtalknewspaper.com,fayettetribune.com,flasharcade.com,flashgames247.com,flyergroup.com,foxsportsasia.com,gainesvilleregister.com,gloucestertimes.com,goshennews.com,greensburgdailynews.com,heraldbanner.com,heraldbulletin.com,hgazette.com,homemagonline.com,itemonline.com,jacksonvilleprogress.com,jerusalemonline.com,joplinglobe.com,journal-times.com,journalexpress.net,kexp.org,kokomotribune.com,lockportjournal.com,mankatofreepress.com,mcalesternews.com,mccrearyrecord.com,mcleansborotimesleader.com,meadvilletribune.com,meridianstar.com,mineralwellsindex.com,montgomery-herald.com,mooreamerican.com,moultrieobserver.com,muskogeephoenix.com,ncnewsonline.com,newburyportnews.com,newsaegis.com,newsandtribune.com,niagara-gazette.com,njeffersonnews.com,normantranscript.com,opposingviews.com,orangeleader.com,oskaloosa.com,ottumwacourier.com,outlookmoney.com,palestineherald.com,panews.com,paulsvalleydailydemocrat.com,pellachronicle.com,pharostribune.com,pressrepublican.com,pryordailytimes.com,randolphguide.com,record-eagle.com,register-herald.com,register-news.com,reporter.net,rockwallheraldbanner.com,roysecityheraldbanner.com,rushvillerepublican.com,salemnews.com,sentinel-echo.com,sharonherald.com,shelbyvilledailyunion.com,siteslike.com,standardmedia.co.ke,starbeacon.com,stwnewspress.com,suwanneedemocrat.com,tahlequahdailypress.com,theadanews.com,theawesomer.com,thedailystar.com,thelandonline.com,themoreheadnews.com,thesnaponline.com,tiftongazette.com,times-news.com,timesenterprise.com,timessentinel.com,timeswv.com,tonawanda-news.com,tribdem.com,tribstar.com,unionrecorder.com,valdostadailytimes.com,washtimesherald.com,waurikademocrat.com,wcoutlook.com,weatherforddemocrat.com,woodwardnews.net,wrestlinginc.com##div[style="width:300px; height:250px;"]"#, CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"div[style="width:300px; height:250px;"]"#.to_string()), + selector: SelectorType::PlainCss( + r#"div[style="width:300px; height:250px;"]"#.to_string(), + ), hostnames: sort_hash_domains(vec![ "adforum.com", "alliednews.com", @@ -362,7 +366,9 @@ mod parse_tests { check_parse_result( r#"tf2maps.net##a[href="http://forums.tf2maps.net/payments.php"]"#, CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"a[href="http://forums.tf2maps.net/payments.php"]"#.to_string()), + selector: SelectorType::PlainCss( + r#"a[href="http://forums.tf2maps.net/payments.php"]"#.to_string(), + ), hostnames: sort_hash_domains(vec!["tf2maps.net"]), ..Default::default() }, @@ -370,7 +376,9 @@ mod parse_tests { check_parse_result( r#"rarbg.to,rarbg.unblockall.org,rarbgaccess.org,rarbgmirror.com,rarbgmirror.org,rarbgmirror.xyz,rarbgproxy.com,rarbgproxy.org,rarbgunblock.com##a[href][target="_blank"] > button"#, CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"a[href][target="_blank"] > button"#.to_string()), + selector: SelectorType::PlainCss( + r#"a[href][target="_blank"] > button"#.to_string(), + ), hostnames: sort_hash_domains(vec![ "rarbg.to", "rarbg.unblockall.org", @@ -406,7 +414,9 @@ mod parse_tests { check_parse_result( r#"haus-garten-test.de,sozialversicherung-kompetent.de##+js(set-constant.js, Object.keys, trueFunc)"#, CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"set-constant.js, Object.keys, trueFunc"#.to_string()), + selector: SelectorType::PlainCss( + r#"set-constant.js, Object.keys, trueFunc"#.to_string(), + ), hostnames: sort_hash_domains(vec![ "haus-garten-test.de", "sozialversicherung-kompetent.de", @@ -418,7 +428,9 @@ mod parse_tests { check_parse_result( r#"airliners.de,auszeit.bio,autorevue.at,clever-tanken.de,fanfiktion.de,finya.de,frag-mutti.de,frustfrei-lernen.de,fussballdaten.de,gameswelt.*,liga3-online.de,lz.de,mt.de,psychic.de,rimondo.com,spielen.de,weltfussball.at,weristdeinfreund.de##+js(abort-current-inline-script.js, Number.isNaN)"#, CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"abort-current-inline-script.js, Number.isNaN"#.to_string()), + selector: SelectorType::PlainCss( + r#"abort-current-inline-script.js, Number.isNaN"#.to_string(), + ), hostnames: sort_hash_domains(vec![ "airliners.de", "auszeit.bio", @@ -446,7 +458,9 @@ mod parse_tests { check_parse_result( r#"prad.de##+js(abort-on-property-read.js, document.cookie)"#, CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"abort-on-property-read.js, document.cookie"#.to_string()), + selector: SelectorType::PlainCss( + r#"abort-on-property-read.js, document.cookie"#.to_string(), + ), hostnames: sort_hash_domains(vec!["prad.de"]), script_inject: true, ..Default::default() @@ -455,7 +469,9 @@ mod parse_tests { check_parse_result( r#"computerbild.de##+js(abort-on-property-read.js, Date.prototype.toUTCString)"#, CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"abort-on-property-read.js, Date.prototype.toUTCString"#.to_string()), + selector: SelectorType::PlainCss( + r#"abort-on-property-read.js, Date.prototype.toUTCString"#.to_string(), + ), hostnames: sort_hash_domains(vec!["computerbild.de"]), script_inject: true, ..Default::default() @@ -494,7 +510,9 @@ mod parse_tests { check_parse_result( r#"monova.*#@#script + [class] > [class]:first-child"#, CosmeticFilterBreakdown { - selector: SelectorType::PlainCss(r#"script + [class] > [class]:first-child"#.to_string()), + selector: SelectorType::PlainCss( + r#"script + [class] > [class]:first-child"#.to_string(), + ), entities: sort_hash_domains(vec!["monova"]), unhide: true, ..Default::default() @@ -515,7 +533,9 @@ mod parse_tests { CosmeticFilterBreakdown { selector: SelectorType::PlainCss(r#".date:not(dt)"#.to_string()), entities: sort_hash_domains(vec!["downloadsource"]), - action: Some(CosmeticFilterAction::Style("display: block !important;".into())), + action: Some(CosmeticFilterAction::Style( + "display: block !important;".into(), + )), ..Default::default() }, ); @@ -528,7 +548,9 @@ mod parse_tests { CosmeticFilterBreakdown { selector: SelectorType::PlainCss(r#".video-wrapper > video[style]"#.to_string()), hostnames: sort_hash_domains(vec!["chip.de"]), - action: Some(CosmeticFilterAction::Style("display:block!important;padding-top:0!important;".into())), + action: Some(CosmeticFilterAction::Style( + "display:block!important;padding-top:0!important;".into(), + )), ..Default::default() }, ); @@ -537,7 +559,9 @@ mod parse_tests { CosmeticFilterBreakdown { selector: SelectorType::PlainCss(r#".advertising.medium-rectangle"#.to_string()), hostnames: sort_hash_domains(vec!["allmusic.com"]), - action: Some(CosmeticFilterAction::Style("min-height: 1px !important;".into())), + action: Some(CosmeticFilterAction::Style( + "min-height: 1px !important;".into(), + )), ..Default::default() }, ); @@ -556,7 +580,10 @@ mod parse_tests { CosmeticFilterBreakdown { selector: SelectorType::PlainCss(r#"body#styleguide-v2"#.to_string()), hostnames: sort_hash_domains(vec!["imdb.com"]), - action: Some(CosmeticFilterAction::Style("background-color: #e3e2dd !important; background-image: none !important;".into())), + action: Some(CosmeticFilterAction::Style( + "background-color: #e3e2dd !important; background-image: none !important;" + .into(), + )), ..Default::default() }, ); @@ -565,7 +592,9 @@ mod parse_tests { CosmeticFilterBreakdown { selector: SelectorType::PlainCss(r#"#login > div[style^="width"]"#.to_string()), hostnames: sort_hash_domains(vec!["streamcloud.eu"]), - action: Some(CosmeticFilterAction::Style("display: block !important".into())), + action: Some(CosmeticFilterAction::Style( + "display: block !important".into(), + )), ..Default::default() }, ); @@ -578,7 +607,9 @@ mod parse_tests { "moondoge.co.in", "moonliteco.in", ]), - action: Some(CosmeticFilterAction::Style("visibility: collapse !important".into())), + action: Some(CosmeticFilterAction::Style( + "visibility: collapse !important".into(), + )), ..Default::default() }, ); @@ -600,7 +631,7 @@ mod parse_tests { hostnames: sort_hash_domains(vec!["xn--lloworl-5ggb3f.com"]), unhide: true, ..Default::default() - } + }, ); } @@ -624,11 +655,9 @@ mod parse_tests { &format!("example.com##{}", raw), CosmeticFilterBreakdown { selector: SelectorType::Procedural(expected_selectors), - hostnames: sort_hash_domains(vec![ - "example.com", - ]), + hostnames: sort_hash_domains(vec!["example.com"]), ..Default::default() - } + }, ); } check_procedural( @@ -685,13 +714,18 @@ mod parse_tests { #[cfg(feature = "css-validation")] fn unsupported() { assert!(parse_cf("yandex.*##.serp-item:if(:scope > div.organic div.organic__subtitle:matches-css-after(content: /[Рр]еклама/))").is_err()); - assert!(parse_cf(r#"facebook.com,facebookcorewwwi.onion##.ego_column:if(a[href^="/campaign/landing"])"#).is_err()); + assert!(parse_cf( + r#"facebook.com,facebookcorewwwi.onion##.ego_column:if(a[href^="/campaign/landing"])"# + ) + .is_err()); assert!(parse_cf(r#"readcomiconline.to##^script:has-text(this[atob)"#).is_err()); assert!(parse_cf("##").is_err()); assert!(parse_cf("").is_err()); // `:has` was previously limited to procedural filtering, but is now a native CSS feature. - assert!(parse_cf(r#"thedailywtf.com##.article-body > div:has(a[href*="utm_medium"])"#).is_ok()); + assert!( + parse_cf(r#"thedailywtf.com##.article-body > div:has(a[href*="utm_medium"])"#).is_ok() + ); // `:has-text` and `:xpath` are now supported procedural filters assert!(parse_cf("twitter.com##article:has-text(/Promoted|Gesponsert|Реклама|Promocionado/):xpath(../..)").is_ok()); @@ -780,10 +814,38 @@ mod util_tests { #[test] fn label_hashing() { - assert_eq!(get_hashes_from_labels("foo.bar.baz", 11, 11), vec![fast_hash("baz"), fast_hash("bar.baz"), fast_hash("foo.bar.baz")]); - assert_eq!(get_hashes_from_labels("foo.bar.baz.com", 15, 8), vec![fast_hash("baz.com"), fast_hash("bar.baz.com"), fast_hash("foo.bar.baz.com")]); - assert_eq!(get_hashes_from_labels("foo.bar.baz.com", 11, 11), vec![fast_hash("baz"), fast_hash("bar.baz"), fast_hash("foo.bar.baz")]); - assert_eq!(get_hashes_from_labels("foo.bar.baz.com", 11, 8), vec![fast_hash("baz"), fast_hash("bar.baz"), fast_hash("foo.bar.baz")]); + assert_eq!( + get_hashes_from_labels("foo.bar.baz", 11, 11), + vec![ + fast_hash("baz"), + fast_hash("bar.baz"), + fast_hash("foo.bar.baz") + ] + ); + assert_eq!( + get_hashes_from_labels("foo.bar.baz.com", 15, 8), + vec![ + fast_hash("baz.com"), + fast_hash("bar.baz.com"), + fast_hash("foo.bar.baz.com") + ] + ); + assert_eq!( + get_hashes_from_labels("foo.bar.baz.com", 11, 11), + vec![ + fast_hash("baz"), + fast_hash("bar.baz"), + fast_hash("foo.bar.baz") + ] + ); + assert_eq!( + get_hashes_from_labels("foo.bar.baz.com", 11, 8), + vec![ + fast_hash("baz"), + fast_hash("bar.baz"), + fast_hash("foo.bar.baz") + ] + ); } #[test] @@ -791,16 +853,24 @@ mod util_tests { assert_eq!(get_hostname_without_public_suffix("", ""), None); assert_eq!(get_hostname_without_public_suffix("com", ""), None); assert_eq!(get_hostname_without_public_suffix("com", "com"), None); - assert_eq!(get_hostname_without_public_suffix("foo.com", "foo.com"), Some(("foo", "com"))); - assert_eq!(get_hostname_without_public_suffix("foo.bar.com", "bar.com"), Some(("foo.bar", "com"))); - assert_eq!(get_hostname_without_public_suffix("test.github.io", "test.github.io"), Some(("test", "github.io"))); + assert_eq!( + get_hostname_without_public_suffix("foo.com", "foo.com"), + Some(("foo", "com")) + ); + assert_eq!( + get_hostname_without_public_suffix("foo.bar.com", "bar.com"), + Some(("foo.bar", "com")) + ); + assert_eq!( + get_hostname_without_public_suffix("test.github.io", "test.github.io"), + Some(("test", "github.io")) + ); } } #[cfg(test)] mod matching_tests { use super::super::*; - use crate::utils::bin_lookup; trait MatchByStr { fn matches(&self, request_entities: &[Hash], request_hostnames: &[Hash]) -> bool; @@ -841,7 +911,7 @@ mod matching_tests { if let Some(ref filter_not_hostnames) = self.not_hostnames { if request_hostnames .iter() - .any(|hash| bin_lookup(filter_not_hostnames, *hash)) + .any(|hash| utils::bin_lookup(filter_not_hostnames, *hash)) { return false; } @@ -850,7 +920,7 @@ mod matching_tests { if let Some(ref filter_not_entities) = self.not_entities { if request_entities .iter() - .any(|hash| bin_lookup(filter_not_entities, *hash)) + .any(|hash| utils::bin_lookup(filter_not_entities, *hash)) { return false; } @@ -860,7 +930,7 @@ mod matching_tests { if let Some(ref filter_hostnames) = self.hostnames { if request_hostnames .iter() - .any(|hash| bin_lookup(filter_hostnames, *hash)) + .any(|hash| utils::bin_lookup(filter_hostnames, *hash)) { return true; } @@ -869,7 +939,7 @@ mod matching_tests { if let Some(ref filter_entities) = self.entities { if request_entities .iter() - .any(|hash| bin_lookup(filter_entities, *hash)) + .any(|hash| utils::bin_lookup(filter_entities, *hash)) { return true; } @@ -1020,7 +1090,9 @@ mod matching_tests { #[test] fn multiple_selectors() { - assert!(parse_cf("youtube.com##.masthead-ad-control,.ad-div,.pyv-afc-ads-container").is_ok()); + assert!( + parse_cf("youtube.com##.masthead-ad-control,.ad-div,.pyv-afc-ads-container").is_ok() + ); assert!(parse_cf("m.economictimes.com###appBanner,#stickyBanner").is_ok()); assert!(parse_cf("googledrivelinks.com###wpsafe-generate, #wpsafe-link:style(display: block !important;)").is_ok()); } @@ -1048,16 +1120,35 @@ mod matching_tests { #[test] #[cfg(feature = "css-validation")] fn abp_has_conversion() { - let rule = parse_cf("imgur.com#?#div.Gallery-Sidebar-PostContainer:-abp-has(div.promoted-hover)").unwrap(); - assert_eq!(rule.plain_css_selector(), Some("div.Gallery-Sidebar-PostContainer:has(div.promoted-hover)")); - let rule = parse_cf(r##"webtools.fineaty.com#?#div[class*=" hidden-"]:-abp-has(.adsbygoogle)"##).unwrap(); - assert_eq!(rule.plain_css_selector(), Some(r#"div[class*=" hidden-"]:has(.adsbygoogle)"#)); + let rule = + parse_cf("imgur.com#?#div.Gallery-Sidebar-PostContainer:-abp-has(div.promoted-hover)") + .unwrap(); + assert_eq!( + rule.plain_css_selector(), + Some("div.Gallery-Sidebar-PostContainer:has(div.promoted-hover)") + ); + let rule = + parse_cf(r##"webtools.fineaty.com#?#div[class*=" hidden-"]:-abp-has(.adsbygoogle)"##) + .unwrap(); + assert_eq!( + rule.plain_css_selector(), + Some(r#"div[class*=" hidden-"]:has(.adsbygoogle)"#) + ); let rule = parse_cf(r##"facebook.com,facebookcorewwwi.onion#?#._6y8t:-abp-has(a[href="/ads/about/?entry_product=ad_preferences"])"##).unwrap(); - assert_eq!(rule.plain_css_selector(), Some(r#"._6y8t:has(a[href="/ads/about/?entry_product=ad_preferences"])"#)); - let rule = parse_cf(r##"mtgarena.pro#?##root > div > div:-abp-has(> .vm-placement)"##).unwrap(); - assert_eq!(rule.plain_css_selector(), Some(r#"#root > div > div:has(> .vm-placement)"#)); + assert_eq!( + rule.plain_css_selector(), + Some(r#"._6y8t:has(a[href="/ads/about/?entry_product=ad_preferences"])"#) + ); + let rule = + parse_cf(r##"mtgarena.pro#?##root > div > div:-abp-has(> .vm-placement)"##).unwrap(); + assert_eq!( + rule.plain_css_selector(), + Some(r#"#root > div > div:has(> .vm-placement)"#) + ); // Error without `#?#`: - assert!(parse_cf(r##"mtgarena.pro###root > div > div:-abp-has(> .vm-placement)"##).is_err()); + assert!( + parse_cf(r##"mtgarena.pro###root > div > div:-abp-has(> .vm-placement)"##).is_err() + ); } } @@ -1069,21 +1160,42 @@ mod css_validation_tests { #[test] fn bad_selector_inputs() { assert!(validate_css_selector(r#"rm -rf ./*"#, false).is_err()); - assert!(validate_css_selector(r#"javascript:alert("All pseudo-classes are valid")"#, false).is_ok()); - assert!(validate_css_selector(r#"javascript:alert("But opening comments are still forbidden" /*)"#, false).is_err()); + assert!(validate_css_selector( + r#"javascript:alert("All pseudo-classes are valid")"#, + false + ) + .is_ok()); + assert!(validate_css_selector( + r#"javascript:alert("But opening comments are still forbidden" /*)"#, + false + ) + .is_err()); assert!(validate_css_selector(r#"This is not a CSS selector."#, false).is_err()); assert!(validate_css_selector(r#"./malware.sh"#, false).is_err()); assert!(validate_css_selector(r#"https://safesite.ru"#, false).is_err()); - assert!(validate_css_selector(r#"(function(){var e=60;return String.fromCharCode(e.charCodeAt(0))})();"#, false).is_err()); + assert!(validate_css_selector( + r#"(function(){var e=60;return String.fromCharCode(e.charCodeAt(0))})();"#, + false + ) + .is_err()); assert!(validate_css_selector(r#"#!/usr/bin/sh"#, false).is_err()); assert!(validate_css_selector(r#"input,input/*"#, false).is_err()); // Accept a closing comment within a string. It should still be impossible to create an // opening comment to match it. - assert!(validate_css_selector(r#"input[x="*/{}*{background:url(https://hackvertor.co.uk/images/logo.gif)}"]"#, false).is_ok()); + assert!(validate_css_selector( + r#"input[x="*/{}*{background:url(https://hackvertor.co.uk/images/logo.gif)}"]"#, + false + ) + .is_ok()); } #[test] fn escaped_quote_in_tag_name() { - assert_eq!(validate_css_selector(r#"head\""#, false), Ok(vec![CosmeticFilterOperator::CssSelector(r#"head\""#.to_string())])); + assert_eq!( + validate_css_selector(r#"head\""#, false), + Ok(vec![CosmeticFilterOperator::CssSelector( + r#"head\""#.to_string() + )]) + ); } -} \ No newline at end of file +} diff --git a/tests/unit/filters/network_matchers.rs b/tests/unit/filters/network_matchers.rs index f3d746c1..6e25030d 100644 --- a/tests/unit/filters/network_matchers.rs +++ b/tests/unit/filters/network_matchers.rs @@ -386,14 +386,12 @@ mod match_tests { } fn check_options(filter: &NetworkFilter, request: &request::Request) -> bool { - super::super::check_options( - filter.mask, - filter.opt_domains.as_deref(), - filter.opt_domains_union, - filter.opt_not_domains.as_deref(), - filter.opt_not_domains_union, - request, - ) + super::super::check_options(filter.mask, request) + && super::super::check_domains( + filter.opt_domains.as_deref(), + filter.opt_not_domains.as_deref(), + request, + ) } #[test] diff --git a/tests/unit/utils.rs b/tests/unit/utils.rs index c8c938b5..c25fd04c 100644 --- a/tests/unit/utils.rs +++ b/tests/unit/utils.rs @@ -91,14 +91,15 @@ mod tests { } #[test] - fn bin_lookup_works() { - assert_eq!(bin_lookup(&[], 42), false); - assert_eq!(bin_lookup(&[42], 42), true); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 42), true); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 1), true); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 3), true); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 43), false); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 0), false); - assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 5), false); + fn eytzinger_search_works() { + let input = vec![1, 2, 3, 4, 5, 6, 7, 8, 10]; + let expected = vec![0, 6, 4, 8, 2, 5, 7, 10, 1, 3]; + let result = eytzinger_layout(&input[..]); + assert_eq!(expected, result.as_slice()); + + assert!(eytzinger_search(result.as_ref(), 2)); + assert!(eytzinger_search(result.as_ref(), 4)); + assert!(!eytzinger_search(result.as_ref(), 9)); + assert!(!eytzinger_search(result.as_ref(), 0)); } }