From 31351258189eaaf448fcac0dd2ac29e89de4f08a Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Thu, 26 Sep 2024 16:45:13 +0200 Subject: [PATCH] Depend on published daachorse fork --- crates/bpe/Cargo.toml | 3 +-- crates/bpe/src/byte_pair_encoding.rs | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/bpe/Cargo.toml b/crates/bpe/Cargo.toml index 8c72651..1d169d8 100644 --- a/crates/bpe/Cargo.toml +++ b/crates/bpe/Cargo.toml @@ -8,8 +8,7 @@ crate-type = ["lib", "staticlib"] bench = false [dependencies] -#daachorse = "1" -daachorse = { git = "https://github.com/aneubeck/daachorse.git", rev = "ac44a471a7be5a139535173073b8f1cd2e33bcbd" } +aneubeck-daachorse = "1.1.1" fnv = "1.0" itertools = "0.12" once_cell = "1" diff --git a/crates/bpe/src/byte_pair_encoding.rs b/crates/bpe/src/byte_pair_encoding.rs index 873d7be..053bc0c 100644 --- a/crates/bpe/src/byte_pair_encoding.rs +++ b/crates/bpe/src/byte_pair_encoding.rs @@ -3,7 +3,7 @@ use std::collections::BinaryHeap; use std::hash::{Hash, Hasher}; use std::ops::Range; -use daachorse::{DoubleArrayAhoCorasick, DoubleArrayAhoCorasickBuilder}; +use aneubeck_daachorse::{DoubleArrayAhoCorasick, DoubleArrayAhoCorasickBuilder}; use fnv::{FnvHashMap, FnvHasher}; use itertools::Itertools; use once_cell::sync::Lazy; @@ -213,7 +213,7 @@ impl BytePairEncoding { assert_eq!(bytes_hash_to_token.len() + 1, token_starts.len()); let longest_searcher = DoubleArrayAhoCorasickBuilder::new() - .match_kind(daachorse::MatchKind::LeftmostLongest) + .match_kind(aneubeck_daachorse::MatchKind::LeftmostLongest) .build(token_iter(&all_tokens, &token_starts)) .expect("failed to build AhoCorasick");