diff --git a/Cargo.lock b/Cargo.lock index 9a7a6b8cc..48fa88f8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -30,6 +30,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" dependencies = [ "cfg-if", + "getrandom", "once_cell", "version_check", "zerocopy", @@ -44,12 +45,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "allocator-api2" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" - [[package]] name = "anes" version = "0.1.6" @@ -70,7 +65,7 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn", ] [[package]] @@ -167,39 +162,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" -[[package]] -name = "cached" -version = "0.47.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69b0116662497bc24e4b177c90eaf8870e39e2714c3fcfa296327a93f593fc21" -dependencies = [ - "ahash", - "cached_proc_macro", - "cached_proc_macro_types", - "hashbrown", - "instant", - "once_cell", - "thiserror", -] - -[[package]] -name = "cached_proc_macro" -version = "0.18.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c878c71c2821aa2058722038a59a67583a4240524687c6028571c9b395ded61f" -dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "cached_proc_macro_types" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a4f925191b4367301851c6d99b09890311d74b0d43f274c0b34c86d308a3663" - [[package]] name = "cast" version = "0.3.0" @@ -290,6 +252,7 @@ dependencies = [ "once_cell", "oorandom", "plotters", + "rayon", "regex", "serde", "serde_derive", @@ -309,39 +272,29 @@ dependencies = [ ] [[package]] -name = "darling" -version = "0.14.4" +name = "crossbeam-deque" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" dependencies = [ - "darling_core", - "darling_macro", + "crossbeam-epoch", + "crossbeam-utils", ] [[package]] -name = "darling_core" -version = "0.14.4" +name = "crossbeam-epoch" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 1.0.109", + "crossbeam-utils", ] [[package]] -name = "darling_macro" -version = "0.14.4" +name = "crossbeam-utils" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" -dependencies = [ - "darling_core", - "quote", - "syn 1.0.109", -] +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" [[package]] name = "either" @@ -407,6 +360,17 @@ dependencies = [ "pin-utils", ] +[[package]] +name = "getrandom" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "gimli" version = "0.28.0" @@ -419,16 +383,6 @@ version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" -[[package]] -name = "hashbrown" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" -dependencies = [ - "ahash", - "allocator-api2", -] - [[package]] name = "hermit-abi" version = "0.3.3" @@ -492,21 +446,6 @@ dependencies = [ "want", ] -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - [[package]] name = "is-macro" version = "0.3.0" @@ -517,7 +456,7 @@ dependencies = [ "pmutil", "proc-macro2", "quote", - "syn 2.0.47", + "syn", ] [[package]] @@ -602,11 +541,13 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" name = "lt-core" version = "0.1.0" dependencies = [ - "cached", + "ahash", "criterion", "is-macro", "itertools 0.11.0", + "once_cell", "serde", + "smallvec", ] [[package]] @@ -700,9 +641,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" @@ -762,7 +703,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn", ] [[package]] @@ -813,7 +754,7 @@ checksum = "52a40bc70c2c58040d2d8b167ba9a5ff59fc9dab7ad44771cfde3dcfde7a09c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn", ] [[package]] @@ -834,6 +775,26 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rayon" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -935,7 +896,7 @@ checksum = "a3385e45322e8f9931410f01b3031ec534c3947d0e94c18049af4d9f9907d4e0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn", ] [[package]] @@ -991,9 +952,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.11.1" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" +checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e" [[package]] name = "socket2" @@ -1015,23 +976,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "strsim" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccbca6f34534eb78dbee83f6b2c9442fea7113f43d9e80ea320f0972ae5dc08d" - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.47" @@ -1049,26 +993,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" -[[package]] -name = "thiserror" -version = "1.0.56" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.56" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.47", -] - [[package]] name = "thread_local" version = "1.1.7" @@ -1116,7 +1040,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn", ] [[package]] @@ -1167,7 +1091,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn", ] [[package]] @@ -1275,7 +1199,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.47", + "syn", "wasm-bindgen-shared", ] @@ -1297,7 +1221,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1432,5 +1356,5 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.47", + "syn", ] diff --git a/lt-core/Cargo.toml b/lt-core/Cargo.toml index 26dd32c62..38a658447 100644 --- a/lt-core/Cargo.toml +++ b/lt-core/Cargo.toml @@ -4,13 +4,15 @@ version = "0.1.0" edition = "2021" [dependencies] -cached = "0.47.0" +ahash = "0.8.7" is-macro = "0.3.0" itertools = "0.11.0" +once_cell = "1.19.0" serde = { version = "1.0.190", features = ["derive"] } +smallvec = "1.12.0" [dev-dependencies] -criterion = { version = "0.5.1", features = ["html_reports", "plotters", "cargo_bench_support"], default_features = false } +criterion = { version = "0.5.1" } [[bench]] name = "spellcheck" diff --git a/lt-core/benches/spellcheck.rs b/lt-core/benches/spellcheck.rs index 1705893a1..efa6165f5 100644 --- a/lt-core/benches/spellcheck.rs +++ b/lt-core/benches/spellcheck.rs @@ -7,10 +7,10 @@ fn spellcheck(dictionary: &Dictionary) { fn criterion_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("spellcheck"); - group.sample_size(10000); let dictionary = Dictionary::new(); + group.bench_function("dict create", |b| b.iter(Dictionary::new)); group.bench_function("hello 5", |b| b.iter(|| spellcheck(&dictionary))); } diff --git a/lt-core/src/linting/lint.rs b/lt-core/src/linting/lint.rs index c90eb062b..3df4b96d7 100644 --- a/lt-core/src/linting/lint.rs +++ b/lt-core/src/linting/lint.rs @@ -1,6 +1,6 @@ use serde::{Deserialize, Serialize}; -use crate::{document::Document, span::Span}; +use crate::{document::Document, span::Span, Dictionary}; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Lint { @@ -23,4 +23,4 @@ pub enum Suggestion { ReplaceWith(Vec), } -pub type Linter = fn(document: &Document) -> Vec; +pub type Linter = fn(document: &Document, dictionary: &Dictionary) -> Vec; diff --git a/lt-core/src/linting/mod.rs b/lt-core/src/linting/mod.rs index 268dd59f3..70147b38f 100644 --- a/lt-core/src/linting/mod.rs +++ b/lt-core/src/linting/mod.rs @@ -6,11 +6,11 @@ mod wrong_quotes; pub use lint::{Lint, LintKind, Suggestion}; -use crate::Document; +use crate::{Dictionary, Document}; use self::lint::Linter; -pub fn all_linters(document: &Document) -> Vec { +pub fn all_linters(document: &Document, dictionary: &Dictionary) -> Vec { let mut lints = Vec::new(); let linters: [Linter; 4] = [ @@ -21,7 +21,7 @@ pub fn all_linters(document: &Document) -> Vec { ]; for linter in linters { - lints.append(&mut linter(document)); + lints.append(&mut linter(document, dictionary)); } lints diff --git a/lt-core/src/linting/sentence_capitalization.rs b/lt-core/src/linting/sentence_capitalization.rs index 8479c12b1..b521759af 100644 --- a/lt-core/src/linting/sentence_capitalization.rs +++ b/lt-core/src/linting/sentence_capitalization.rs @@ -1,11 +1,11 @@ use itertools::Itertools; -use crate::{document::Document, parsing::TokenStringExt, Lint, LintKind}; +use crate::{document::Document, parsing::TokenStringExt, Dictionary, Lint, LintKind}; use super::lint::Suggestion; /// A linter that checks to make sure the first word of each sentence is capitalized. -pub fn sentence_capitalization_lint(document: &Document) -> Vec { +pub fn sentence_capitalization_lint(document: &Document, _dictionary: &Dictionary) -> Vec { let mut lints = Vec::new(); for sentence in document.sentences() { diff --git a/lt-core/src/linting/spell_check.rs b/lt-core/src/linting/spell_check.rs index 173f46b35..b028d4878 100644 --- a/lt-core/src/linting/spell_check.rs +++ b/lt-core/src/linting/spell_check.rs @@ -2,7 +2,7 @@ use crate::{document::Document, suggest_correct_spelling, Dictionary, Lint, Lint use super::lint::Suggestion; -pub fn spell_check(document: &Document) -> Vec { +pub fn spell_check(document: &Document, _dictionary: &Dictionary) -> Vec { let mut lints = Vec::new(); let dictionary = Dictionary::new(); diff --git a/lt-core/src/linting/unclosed_quotes.rs b/lt-core/src/linting/unclosed_quotes.rs index 3276c9132..9bffe7c7b 100644 --- a/lt-core/src/linting/unclosed_quotes.rs +++ b/lt-core/src/linting/unclosed_quotes.rs @@ -1,6 +1,8 @@ -use crate::{document::Document, parsing::Quote, Lint, LintKind, Punctuation, TokenKind}; +use crate::{ + document::Document, parsing::Quote, Dictionary, Lint, LintKind, Punctuation, TokenKind, +}; -pub fn unclosed_quotes(document: &Document) -> Vec { +pub fn unclosed_quotes(document: &Document, _dictionary: &Dictionary) -> Vec { let mut lints = Vec::new(); // TODO: Try zipping quote positions diff --git a/lt-core/src/linting/wrong_quotes.rs b/lt-core/src/linting/wrong_quotes.rs index 80bb9027e..8a29174bd 100644 --- a/lt-core/src/linting/wrong_quotes.rs +++ b/lt-core/src/linting/wrong_quotes.rs @@ -1,8 +1,9 @@ use crate::{ - document::Document, parsing::Quote, Lint, LintKind, Punctuation, Suggestion, Token, TokenKind, + document::Document, parsing::Quote, Dictionary, Lint, LintKind, Punctuation, Suggestion, Token, + TokenKind, }; -pub fn wrong_quotes(document: &Document) -> Vec { +pub fn wrong_quotes(document: &Document, _dictionary: &Dictionary) -> Vec { document .iter_quote_indices() .zip(document.iter_quotes()) diff --git a/lt-core/src/spell/dictionary.rs b/lt-core/src/spell/dictionary.rs index 8db100edd..f59c5e328 100644 --- a/lt-core/src/spell/dictionary.rs +++ b/lt-core/src/spell/dictionary.rs @@ -1,6 +1,10 @@ -use std::{collections::HashSet, iter}; +use std::{borrow::Borrow, hash::Hasher}; -use cached::proc_macro::cached; +use ahash::{AHashSet, AHasher}; +use once_cell::sync::Lazy; +use smallvec::SmallVec; + +type DictWord = SmallVec<[char; 16]>; #[derive(Debug, Clone)] pub struct Dictionary { @@ -10,19 +14,18 @@ pub struct Dictionary { /// This is likely due to increased locality :shrug:. /// /// This list is sorted by word length (i.e. the shortest words are first). - words: Vec>, + words: Vec, /// A lookup list for each word length. - /// Each index of this list will return the first index of [`Self::words`] that has a word of - /// that length. + /// Each index of this list will return the first index of [`Self::words`] that has a word + /// whose index is that length. word_len_starts: Vec, - word_set: HashSet>, + word_set: AHashSet, } -#[cached] -fn cached_inner_new() -> Dictionary { +fn uncached_inner_new() -> Dictionary { let english_words_raw = include_str!("../../../english_words.txt").replace('\r', ""); - let mut words: Vec> = english_words_raw + let mut words: Vec = english_words_raw .split('\n') .filter(|word| !word.is_empty()) .map(|word| word.chars().collect()) @@ -32,28 +35,38 @@ fn cached_inner_new() -> Dictionary { let mut word_len_starts = vec![0, 0]; - for (index, len) in words.iter().map(Vec::len).enumerate() { + for (index, len) in words.iter().map(SmallVec::len).enumerate() { if word_len_starts.len() == len { word_len_starts.push(index); } } Dictionary { - word_set: HashSet::from_iter(words.iter().cloned()), + word_set: AHashSet::from_iter(words.iter().map(|v| hash_word(v.as_slice()))), word_len_starts, words, } } +fn hash_word(word: &[char]) -> u64 { + let mut hasher = AHasher::default(); + for c in word { + hasher.write_u32(*c as u32); + } + hasher.finish() +} + +static DICT: Lazy = Lazy::new(uncached_inner_new); + impl Dictionary { - pub fn new() -> Self { - cached_inner_new() + pub fn new() -> &'static Self { + &DICT } /// Iterate over all the words in the dicitonary of a given length pub fn words_with_len_iter(&self, len: usize) -> Box + '_> { if len == 0 || len >= self.word_len_starts.len() { - return Box::new(iter::empty()); + return Box::new(std::iter::empty()); } let start = self.word_len_starts[len]; @@ -73,12 +86,6 @@ impl Dictionary { pub fn contains_word(&self, word: &[char]) -> bool { let lowercase: Vec<_> = word.iter().flat_map(|c| c.to_lowercase()).collect(); - self.word_set.contains(word) || self.word_set.contains(&lowercase) - } -} - -impl Default for Dictionary { - fn default() -> Self { - Self::new() + self.word_set.contains(&hash_word(word)) || self.word_set.contains(&hash_word(&lowercase)) } } diff --git a/lt-serve/src/main.rs b/lt-serve/src/main.rs index 019386149..653f32dca 100644 --- a/lt-serve/src/main.rs +++ b/lt-serve/src/main.rs @@ -1,6 +1,6 @@ #![allow(dead_code)] -use lt_core::{all_linters, Document, FatToken, Lint, Span, Suggestion}; +use lt_core::{all_linters, Dictionary, Document, FatToken, Lint, Span, Suggestion}; use std::net::SocketAddr; use tokio::time::Instant; use tracing::{debug, info, Level}; @@ -88,9 +88,10 @@ struct ParseResponse { async fn lint(Json(payload): Json) -> (StatusCode, Json) { let text = payload.text; + let dictionary = Dictionary::new(); let document = Document::new(&text); - let lints = all_linters(&document); + let lints = all_linters(&document, dictionary); (StatusCode::ACCEPTED, Json(LintResponse { lints })) }