Skip to content

Commit 09743d7

Browse files
committed
Execute tantivy quries in a blocking context, tweaks to remote dir.
1 parent c73c555 commit 09743d7

File tree

3 files changed

+30
-104
lines changed

3 files changed

+30
-104
lines changed

airmail/src/directory.rs

Lines changed: 11 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::{
55
ops::Range,
66
path::{Path, PathBuf},
77
sync::{Arc, Mutex, OnceLock},
8+
time::Duration,
89
};
910

1011
use log::{error, info, warn};
@@ -17,7 +18,6 @@ use tantivy::{
1718
Directory,
1819
};
1920
use tantivy_common::{file_slice::FileHandle, AntiCallToken, HasLen, OwnedBytes, TerminatingWrite};
20-
use tokio::spawn;
2121

2222
thread_local! {
2323
static BLOCKING_HTTP_CLIENT: reqwest::blocking::Client = reqwest::blocking::Client::new();
@@ -89,6 +89,9 @@ impl FileHandle for HttpFileHandle {
8989
let response = BLOCKING_HTTP_CLIENT.with(|client| {
9090
client
9191
.get(&self.url)
92+
.timeout(Duration::from_millis(
93+
500 + (range.end - range.start) as u64 / 1024,
94+
))
9295
.header(
9396
"Range",
9497
dbg!(format!(
@@ -145,96 +148,6 @@ impl FileHandle for HttpFileHandle {
145148
.to_vec(),
146149
))
147150
}
148-
149-
async fn read_bytes_async(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
150-
let chunk_start = range.start / CHUNK_SIZE;
151-
let chunk_end = range.end / CHUNK_SIZE;
152-
let cache =
153-
LRU_CACHE.get_or_init(|| Mutex::new(LruCache::new(NonZeroUsize::new(40_000).unwrap())));
154-
let mut accumulated_chunks = vec![0u8; (chunk_end - chunk_start + 1) * CHUNK_SIZE];
155-
info!(
156-
"Reading bytes: {:?} in chunks from {} to {}",
157-
range, chunk_start, chunk_end
158-
);
159-
let mut handles = Vec::new();
160-
for chunk in chunk_start..=chunk_end {
161-
let key = CacheKey {
162-
base_url: self.url.clone(),
163-
path: self.url.clone(),
164-
chunk,
165-
};
166-
{
167-
let mut cache = cache.lock().unwrap();
168-
if let Some(data) = cache.get(&key) {
169-
accumulated_chunks[chunk * CHUNK_SIZE..(chunk + 1) * CHUNK_SIZE]
170-
.copy_from_slice(data);
171-
continue;
172-
}
173-
}
174-
let url = self.url.clone();
175-
let handle = spawn(async move {
176-
let response = HTTP_CLIENT.with(|client| {
177-
client
178-
.get(&url)
179-
.header(
180-
"Range",
181-
format!("{}-{}", chunk * CHUNK_SIZE, (chunk + 1) * CHUNK_SIZE),
182-
)
183-
.send()
184-
});
185-
let response = match response.await {
186-
Ok(response) => response,
187-
Err(e) => {
188-
error!("Error: {:?}", e);
189-
return Err(std::io::Error::new(
190-
std::io::ErrorKind::Other,
191-
"Error fetching chunk",
192-
));
193-
}
194-
};
195-
if response.status() != 200 {
196-
error!("Response: {:?}", response);
197-
return Err(std::io::Error::new(
198-
std::io::ErrorKind::Other,
199-
"Error fetching chunk: non-200 status",
200-
));
201-
} else {
202-
let data = response.bytes().await.unwrap();
203-
let data = data.to_vec();
204-
{
205-
let mut cache = cache.lock().unwrap();
206-
cache.put(key, data.to_vec());
207-
}
208-
if data.len() < CHUNK_SIZE && chunk != chunk_end {
209-
warn!("Short chunk: {}", data.len());
210-
return Err(std::io::Error::new(
211-
std::io::ErrorKind::Other,
212-
"Error fetching chunk: short response length",
213-
));
214-
}
215-
Ok((chunk, data))
216-
}
217-
});
218-
handles.push(handle);
219-
}
220-
for handle in handles {
221-
if let Ok(Ok((chunk, data))) = handle.await {
222-
accumulated_chunks[chunk * CHUNK_SIZE..(chunk + 1) * CHUNK_SIZE]
223-
.copy_from_slice(&data);
224-
} else {
225-
return Err(std::io::Error::new(
226-
std::io::ErrorKind::Other,
227-
"Error fetching chunk",
228-
));
229-
}
230-
}
231-
info!("Accumulated chunks: {}", accumulated_chunks.len());
232-
let chunk_start_offset = range.start % CHUNK_SIZE;
233-
let chunk_end_offset = (chunk_end - chunk_start) * CHUNK_SIZE + range.end % CHUNK_SIZE;
234-
Ok(OwnedBytes::new(
235-
accumulated_chunks[chunk_start_offset..chunk_end_offset].to_vec(),
236-
))
237-
}
238151
}
239152

240153
impl HasLen for HttpFileHandle {
@@ -249,24 +162,25 @@ impl HasLen for HttpFileHandle {
249162

250163
let url = format!("{}", self.url);
251164
info!("Fetching length from: {}", url);
252-
let response = BLOCKING_HTTP_CLIENT.with(|client| client.head(&url).send());
165+
let response = BLOCKING_HTTP_CLIENT
166+
.with(|client| client.head(&url).timeout(Duration::from_millis(500)).send());
253167
if let Err(e) = response {
254-
error!("Error: {:?}", e);
255-
return 0;
168+
error!("Error fetching length: {:?}", e);
169+
panic!();
256170
}
257171
let response = response.unwrap();
258172
if response.status() != 200 {
259173
error!("Response: {:?}", response);
260-
return 0;
174+
panic!();
261175
} else {
262176
let length = response
263177
.headers()
264178
.get("Content-Length")
265179
.unwrap()
266180
.to_str()
267-
.unwrap_or_default()
181+
.unwrap()
268182
.parse()
269-
.unwrap_or_default();
183+
.unwrap();
270184
info!("Length: {}", length);
271185
let mut lengths = lengths.lock().unwrap();
272186
lengths.insert(PathBuf::from(&self.url), length);

airmail_parser/src/component.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ pub trait TriviallyConstructibleComponent: QueryComponent {
2929
fn new(text: String) -> Self;
3030
}
3131

32-
pub trait QueryComponent {
32+
pub trait QueryComponent: Send + Sync {
3333
fn text(&self) -> &str;
3434

3535
fn penalty_mult(&self) -> f32;

airmail_service/src/main.rs

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use deunicode::deunicode;
1212
use log::trace;
1313
use serde::{Deserialize, Serialize};
1414
use serde_json::Value;
15+
use tokio::task::spawn_blocking;
1516

1617
#[derive(Debug, Parser)]
1718
struct Args {
@@ -41,7 +42,13 @@ async fn search(
4142
if all_results.len() > 20 {
4243
break;
4344
}
44-
let results = index.search(scenario).unwrap();
45+
let results = {
46+
let scenario = scenario.clone();
47+
let index = index.clone();
48+
spawn_blocking(move || index.search(&scenario).unwrap())
49+
.await
50+
.unwrap()
51+
};
4552
if results.is_empty() {
4653
continue;
4754
} else {
@@ -89,11 +96,16 @@ async fn main() -> Result<(), Box<dyn Error>> {
8996
env_logger::init();
9097
let args = Args::parse();
9198
let index_path = args.index.clone();
92-
let index = if index_path.starts_with("http") {
93-
Arc::new(AirmailIndex::new_remote(&index_path)?)
94-
} else {
95-
Arc::new(AirmailIndex::new(&index_path)?)
96-
};
99+
100+
let index = spawn_blocking(move || {
101+
if index_path.starts_with("http") {
102+
Arc::new(AirmailIndex::new_remote(&index_path).unwrap())
103+
} else {
104+
Arc::new(AirmailIndex::new(&index_path).unwrap())
105+
}
106+
})
107+
.await
108+
.unwrap();
97109
let app = Router::new().route("/search", get(search).with_state(index));
98110
let listener = tokio::net::TcpListener::bind("0.0.0.0:3000").await.unwrap();
99111
axum::serve(listener, app).await.unwrap();

0 commit comments

Comments
 (0)