Skip to content

Commit

Permalink
u16
Browse files Browse the repository at this point in the history
  • Loading branch information
boocmp committed Jan 11, 2025
1 parent 44246e6 commit b8591d9
Show file tree
Hide file tree
Showing 13 changed files with 586 additions and 239 deletions.
30 changes: 13 additions & 17 deletions benches/bench_matching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ use criterion::*;

use serde::{Deserialize, Serialize};

use adblock::Engine;
use adblock::blocker::{Blocker, BlockerOptions};
use adblock::request::Request;
use adblock::resources::ResourceStorage;
use adblock::url_parser::parse_url;
use adblock::Engine;

#[path = "../tests/test_utils.rs"]
mod test_utils;
Expand Down Expand Up @@ -36,11 +36,11 @@ fn load_requests() -> Vec<TestRequest> {
reqs
}

fn get_blocker(rules: impl IntoIterator<Item=impl AsRef<str>>) -> Blocker {
fn get_blocker(rules: impl IntoIterator<Item = impl AsRef<str>>) -> Blocker {
let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default());

let blocker_options = BlockerOptions {
enable_optimizations: true,
enable_optimizations: false,
};

Blocker::new(network_filters, &blocker_options)
Expand All @@ -57,11 +57,15 @@ fn bench_rule_matching(engine: &Engine, requests: &Vec<TestRequest>) -> (u32, u3
passes += 1;
}
});
// println!("Got {} matches, {} passes, {} errors", matches, passes, errors);
println!("Got {} matches, {} passes", matches, passes);
(matches, passes)
}

fn bench_matching_only(blocker: &Blocker, resources: &ResourceStorage, requests: &Vec<Request>) -> (u32, u32) {
fn bench_matching_only(
blocker: &Blocker,
resources: &ResourceStorage,
requests: &Vec<Request>,
) -> (u32, u32) {
let mut matches = 0;
let mut passes = 0;
requests.iter().for_each(|parsed| {
Expand Down Expand Up @@ -139,9 +143,7 @@ fn rule_match(c: &mut Criterion) {
fn rule_match_parsed_el(c: &mut Criterion) {
let mut group = c.benchmark_group("rule-match-parsed");

let rules = rules_from_lists(&[
"data/easylist.to/easylist/easylist.txt",
]);
let rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);
let requests = load_requests();
let requests_parsed: Vec<_> = requests
.into_iter()
Expand Down Expand Up @@ -219,9 +221,7 @@ fn serialization(c: &mut Criterion) {
b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0))
});
group.bench_function("el", move |b| {
let full_rules = rules_from_lists(&[
"data/easylist.to/easylist/easylist.txt",
]);
let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);

let engine = Engine::from_rules(full_rules, Default::default());
b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0))
Expand Down Expand Up @@ -256,9 +256,7 @@ fn deserialization(c: &mut Criterion) {
})
});
group.bench_function("el", move |b| {
let full_rules = rules_from_lists(&[
"data/easylist.to/easylist/easylist.txt",
]);
let full_rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]);

let engine = Engine::from_rules(full_rules, Default::default());
let serialized = engine.serialize_raw().unwrap();
Expand Down Expand Up @@ -292,9 +290,7 @@ fn rule_match_browserlike_comparable(c: &mut Criterion) {
group.throughput(Throughput::Elements(requests_len));
group.sample_size(20);

fn requests_parsed(
requests: &[TestRequest],
) -> Vec<(String, String, String, String, bool)> {
fn requests_parsed(requests: &[TestRequest]) -> Vec<(String, String, String, String, bool)> {
requests
.iter()
.map(|r| {
Expand Down
2 changes: 1 addition & 1 deletion examples/example.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use adblock::{

fn main() {
let rules = vec![
String::from("-advertisement-icon."),
String::from("-advertisement-icon.$domain=example.com|hui.ru|pizda.com"),
String::from("-advertisement-management/"),
String::from("-advertisement."),
String::from("-advertisement/script."),
Expand Down
78 changes: 44 additions & 34 deletions src/blocker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,8 @@ pub(crate) struct NetworkFilterList {
pub(crate) filter_map: HashMap<Hash, Vec<Arc<NetworkFilter>>>,
pub(crate) flat_filters_buffer: Vec<u8>,
pub(crate) flat_filter_map: HashMap<Hash, Vec<u32>>,
pub(crate) include_domains_map: HashMap<Hash, u16>,
pub(crate) exclude_domains_map: HashMap<Hash, u16>,
}

impl NetworkFilterList {
Expand All @@ -766,9 +768,6 @@ impl NetworkFilterList {

// Build a HashMap of tokens to Network Filters (held through Arc, Atomic Reference Counter)
let mut filter_map = HashMap::with_capacity(filter_tokens.len());

let mut flat_filter_map = HashMap::with_capacity(filter_tokens.len());
let mut flat_builder = crate::filters::fb_network::FlatNetworkFiltersListBuilder::new();
{
for (filter_pointer, multi_tokens) in filter_tokens {
for tokens in multi_tokens {
Expand All @@ -787,20 +786,17 @@ impl NetworkFilterList {
_ => {}
}
}
insert_dup(
&mut flat_filter_map,
best_token,
flat_builder.add((*filter_pointer).clone()),
);
insert_dup(&mut filter_map, best_token, Arc::clone(&filter_pointer));
}
}
}

let mut self_ = NetworkFilterList {
filter_map,
flat_filters_buffer: flat_builder.finish(),
flat_filter_map: flat_filter_map,
flat_filters_buffer: vec![],
flat_filter_map: HashMap::new(),
include_domains_map: HashMap::new(),
exclude_domains_map: HashMap::new(),
};

if optimize {
Expand All @@ -809,6 +805,32 @@ impl NetworkFilterList {
self_.filter_map.shrink_to_fit();
}

let mut flat_builder = crate::filters::fb_network::FlatNetworkFiltersListBuilder::new();

for (key, value) in &self_.filter_map {
for v in value {
let nf = (*(*v)).clone();
let index = flat_builder.add(nf);
insert_dup(&mut self_.flat_filter_map, *key, index);
}
}
self_.flat_filters_buffer = flat_builder.finish();

let root = unsafe { fb::root_as_network_filter_list_unchecked(&self_.flat_filters_buffer) };

for (index, item) in root.unique_include_domains().iter().enumerate() {
self_
.include_domains_map
.insert(item, u16::try_from(index).expect("ok"));
}
for (index, item) in root.unique_exclude_domains().iter().enumerate() {
self_
.exclude_domains_map
.insert(item, u16::try_from(index).expect("ok"));
}
self_.include_domains_map.shrink_to_fit();
self_.exclude_domains_map.shrink_to_fit();

self_
}

Expand Down Expand Up @@ -916,32 +938,20 @@ impl NetworkFilterList {
unsafe { fb::root_as_network_filter_list_unchecked(&self.flat_filters_buffer) };
let filters = storage.global_list();

if let Some(source_hostname_hashes) = request.source_hostname_hashes.as_ref() {
for token in source_hostname_hashes {
if let Some(filter_bucket) = self.flat_filter_map.get(token) {
for filter_index in filter_bucket {
let flat_filter = filters.get(*filter_index as usize);
let mut filter = FlatNetworkFilterView::from(&flat_filter);
filter.key = *filter_index as u64;

if filter.matches(request, regex_manager)
&& filter.tag.map_or(true, |t| active_tags.contains(t))
{
return Some(filter.mask);
}
}
}
}
}

for token in request_tokens {
for token in request
.source_hostname_hashes
.as_ref()
.into_iter()
.flatten()
.chain(request_tokens.into_iter())
{
if let Some(filter_bucket) = self.flat_filter_map.get(token) {
for filter_index in filter_bucket {
let flat_filter = filters.get(*filter_index as usize);
let mut filter = FlatNetworkFilterView::from(&flat_filter);
filter.key = *filter_index as u64;

if filter.matches(request, regex_manager)
if filter.matches(request, self, regex_manager)
&& filter.tag.map_or(true, |t| active_tags.contains(t))
{
return Some(filter.mask);
Expand Down Expand Up @@ -973,7 +983,7 @@ impl NetworkFilterList {
if let Some(filter_bucket) = self.filter_map.get(token) {
for filter in filter_bucket {
// if matched, also needs to be tagged with an active tag (or not tagged at all)
if filter.matches(request, regex_manager)
if filter.matches(request, self, regex_manager)
&& filter
.tag
.as_ref()
Expand All @@ -991,7 +1001,7 @@ impl NetworkFilterList {
if let Some(filter_bucket) = self.filter_map.get(token) {
for filter in filter_bucket {
// if matched, also needs to be tagged with an active tag (or not tagged at all)
if filter.matches(request, regex_manager)
if filter.matches(request, self, regex_manager)
&& filter
.tag
.as_ref()
Expand Down Expand Up @@ -1048,7 +1058,7 @@ impl NetworkFilterList {
if let Some(filter_bucket) = self.filter_map.get(token) {
for filter in filter_bucket {
// if matched, also needs to be tagged with an active tag (or not tagged at all)
if filter.matches(request, regex_manager)
if filter.matches(request, self, regex_manager)
&& filter
.tag
.as_ref()
Expand All @@ -1066,7 +1076,7 @@ impl NetworkFilterList {
if let Some(filter_bucket) = self.filter_map.get(token) {
for filter in filter_bucket {
// if matched, also needs to be tagged with an active tag (or not tagged at all)
if filter.matches(request, regex_manager)
if filter.matches(request, self, regex_manager)
&& filter
.tag
.as_ref()
Expand Down
62 changes: 48 additions & 14 deletions src/data_format/v0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,12 @@ impl From<&HostnameRuleDb> for LegacyHostnameRuleDb {
for (hash, bin) in v.uninject_script.0.iter() {
for f in bin {
db.entry(*hash)
.and_modify(|v| v.push(LegacySpecificFilterType::UnhideScriptInject(f.to_owned())))
.or_insert_with(|| vec![LegacySpecificFilterType::UnhideScriptInject(f.to_owned())]);
.and_modify(|v| {
v.push(LegacySpecificFilterType::UnhideScriptInject(f.to_owned()))
})
.or_insert_with(|| {
vec![LegacySpecificFilterType::UnhideScriptInject(f.to_owned())]
});
}
}
for (hash, bin) in v.procedural_action.0.iter() {
Expand All @@ -71,8 +75,15 @@ impl From<&HostnameRuleDb> for LegacyHostnameRuleDb {
Ok(f) => {
if let Some((selector, style)) = f.as_css() {
db.entry(*hash)
.and_modify(|v| v.push(LegacySpecificFilterType::Style(selector.clone(), style.clone())))
.or_insert_with(|| vec![LegacySpecificFilterType::Style(selector, style)]);
.and_modify(|v| {
v.push(LegacySpecificFilterType::Style(
selector.clone(),
style.clone(),
))
})
.or_insert_with(|| {
vec![LegacySpecificFilterType::Style(selector, style)]
});
}
}
_ => (),
Expand All @@ -85,17 +96,25 @@ impl From<&HostnameRuleDb> for LegacyHostnameRuleDb {
Ok(f) => {
if let Some((selector, style)) = f.as_css() {
db.entry(*hash)
.and_modify(|v| v.push(LegacySpecificFilterType::UnhideStyle(selector.to_owned(), style.to_owned())))
.or_insert_with(|| vec![LegacySpecificFilterType::UnhideStyle(selector.to_owned(), style.to_owned())]);
.and_modify(|v| {
v.push(LegacySpecificFilterType::UnhideStyle(
selector.to_owned(),
style.to_owned(),
))
})
.or_insert_with(|| {
vec![LegacySpecificFilterType::UnhideStyle(
selector.to_owned(),
style.to_owned(),
)]
});
}
}
_ => (),
}
}
}
LegacyHostnameRuleDb {
db,
}
LegacyHostnameRuleDb { db }
}
}

Expand All @@ -115,10 +134,22 @@ impl Into<HostnameRuleDb> for LegacyHostnameRuleDb {
match rule {
LegacySpecificFilterType::Hide(s) => hide.insert(&hash, s),
LegacySpecificFilterType::Unhide(s) => unhide.insert(&hash, s),
LegacySpecificFilterType::Style(s, st) => procedural_action.insert_procedural_action_filter(&hash, &ProceduralOrActionFilter::from_css(s, st)),
LegacySpecificFilterType::UnhideStyle(s, st) => procedural_action_exception.insert_procedural_action_filter(&hash, &ProceduralOrActionFilter::from_css(s, st)),
LegacySpecificFilterType::ScriptInject(s) => inject_script.insert(&hash, (s, Default::default())),
LegacySpecificFilterType::UnhideScriptInject(s) => uninject_script.insert(&hash, s),
LegacySpecificFilterType::Style(s, st) => procedural_action
.insert_procedural_action_filter(
&hash,
&ProceduralOrActionFilter::from_css(s, st),
),
LegacySpecificFilterType::UnhideStyle(s, st) => procedural_action_exception
.insert_procedural_action_filter(
&hash,
&ProceduralOrActionFilter::from_css(s, st),
),
LegacySpecificFilterType::ScriptInject(s) => {
inject_script.insert(&hash, (s, Default::default()))
}
LegacySpecificFilterType::UnhideScriptInject(s) => {
uninject_script.insert(&hash, s)
}
}
}
}
Expand Down Expand Up @@ -358,6 +389,8 @@ impl From<NetworkFilterListV0DeserializeFmt> for NetworkFilterList {
// TODO(boocmp): ???
flat_filters_buffer: vec![],
flat_filter_map: HashMap::new(),
include_domains_map: HashMap::new(),
exclude_domains_map: HashMap::new(),
}
}
}
Expand Down Expand Up @@ -448,7 +481,8 @@ impl From<DeserializeFormat> for (Blocker, CosmeticFilterCache) {

let mut specific_rules: HostnameRuleDb = v.specific_rules.into();
specific_rules.procedural_action = HostnameFilterBin(v.procedural_action);
specific_rules.procedural_action_exception = HostnameFilterBin(v.procedural_action_exception);
specific_rules.procedural_action_exception =
HostnameFilterBin(v.procedural_action_exception);

(
Blocker {
Expand Down
Loading

0 comments on commit b8591d9

Please sign in to comment.