Skip to content

Commit

Permalink
Merge pull request #17 from github/depend-on-daachrose-fork
Browse files Browse the repository at this point in the history
Depend on published daachorse fork
  • Loading branch information
hendrikvanantwerpen authored Sep 26, 2024
2 parents 3a14632 + 3135125 commit 3ebe786
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 4 deletions.
3 changes: 1 addition & 2 deletions crates/bpe/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ crate-type = ["lib", "staticlib"]
bench = false

[dependencies]
#daachorse = "1"
daachorse = { git = "https://github.com/aneubeck/daachorse.git", rev = "ac44a471a7be5a139535173073b8f1cd2e33bcbd" }
aneubeck-daachorse = "1.1.1"
fnv = "1.0"
itertools = "0.12"
once_cell = "1"
Expand Down
4 changes: 2 additions & 2 deletions crates/bpe/src/byte_pair_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::collections::BinaryHeap;
use std::hash::{Hash, Hasher};
use std::ops::Range;

use daachorse::{DoubleArrayAhoCorasick, DoubleArrayAhoCorasickBuilder};
use aneubeck_daachorse::{DoubleArrayAhoCorasick, DoubleArrayAhoCorasickBuilder};
use fnv::{FnvHashMap, FnvHasher};
use itertools::Itertools;
use once_cell::sync::Lazy;
Expand Down Expand Up @@ -213,7 +213,7 @@ impl BytePairEncoding {
assert_eq!(bytes_hash_to_token.len() + 1, token_starts.len());

let longest_searcher = DoubleArrayAhoCorasickBuilder::new()
.match_kind(daachorse::MatchKind::LeftmostLongest)
.match_kind(aneubeck_daachorse::MatchKind::LeftmostLongest)
.build(token_iter(&all_tokens, &token_starts))
.expect("failed to build AhoCorasick");

Expand Down

0 comments on commit 3ebe786

Please sign in to comment.