Skip to content

Commit

Permalink
fix lint
Browse files Browse the repository at this point in the history
  • Loading branch information
aneubeck committed Oct 18, 2024
1 parent 3a66cb1 commit 93fd3ac
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions crates/bpe-openai/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::sync::LazyLock;

use bpe::byte_pair_encoding::BytePairEncoding;
use either::Either;
use regex_automata::{meta::Regex, util::captures::Captures, Anchored, Input};
use regex_automata::{meta::{BuildError, Regex}, util::captures::Captures, Anchored, Input};

static BPE_R50K_BASE: LazyLock<Tokenizer> = LazyLock::new(|| {
let bytes = include_bytes!(concat!(env!("OUT_DIR"), "/bpe_r50k_base.dict"));
Expand Down Expand Up @@ -65,15 +65,15 @@ pub struct Tokenizer {

impl Tokenizer {
#[allow(clippy::result_large_err)]
pub fn new(bpe: BytePairEncoding, pat: Option<&str>) -> Result<Self, ()> {
let pat = pat.map(Regex::new).transpose().map_err(|_| ())?;
pub fn new(bpe: BytePairEncoding, pat: Option<&str>) -> Result<Self, BuildError> {
let pat = pat.map(Regex::new).transpose()?;
Ok(Self { bpe, pat })
}

/// When using multiple patterns, the second pattern is assumed to be a look-ahead pattern with
/// exactly one look-ahead character!
pub fn with_many(bpe: BytePairEncoding, patterns: &[&str]) -> Result<Self, ()> {
let pat = Some(Regex::new_many(patterns).map_err(|_| ())?);
pub fn with_many(bpe: BytePairEncoding, patterns: &[&str]) -> Result<Self, BuildError> {
let pat = Some(Regex::new_many(patterns)?);
Ok(Self { bpe, pat })
}

Expand Down Expand Up @@ -132,7 +132,7 @@ impl<'a> Iterator for SpecialRegexp<'a> {
let start = self.last;
let mut end = self.last + m.range().end;
if m.pattern() == 1.into() {
let last = self.input[start..end].chars().rev().next().unwrap();
let last = self.input[start..end].chars().next_back().expect("Expected at least a look-ahead character!");
end -= last.len_utf8();
assert_ne!(end, start, "a look-ahead pattern must ALWAYS consume at least one character excluding the look-ahead character!");
}
Expand Down

0 comments on commit 93fd3ac

Please sign in to comment.