@@ -5,6 +5,7 @@ use std::{
5
5
ops:: Range ,
6
6
path:: { Path , PathBuf } ,
7
7
sync:: { Arc , Mutex , OnceLock } ,
8
+ time:: Duration ,
8
9
} ;
9
10
10
11
use log:: { error, info, warn} ;
@@ -17,7 +18,6 @@ use tantivy::{
17
18
Directory ,
18
19
} ;
19
20
use tantivy_common:: { file_slice:: FileHandle , AntiCallToken , HasLen , OwnedBytes , TerminatingWrite } ;
20
- use tokio:: spawn;
21
21
22
22
thread_local ! {
23
23
static BLOCKING_HTTP_CLIENT : reqwest:: blocking:: Client = reqwest:: blocking:: Client :: new( ) ;
@@ -89,6 +89,9 @@ impl FileHandle for HttpFileHandle {
89
89
let response = BLOCKING_HTTP_CLIENT . with ( |client| {
90
90
client
91
91
. get ( & self . url )
92
+ . timeout ( Duration :: from_millis (
93
+ 500 + ( range. end - range. start ) as u64 / 1024 ,
94
+ ) )
92
95
. header (
93
96
"Range" ,
94
97
dbg ! ( format!(
@@ -145,96 +148,6 @@ impl FileHandle for HttpFileHandle {
145
148
. to_vec ( ) ,
146
149
) )
147
150
}
148
-
149
- async fn read_bytes_async ( & self , range : Range < usize > ) -> io:: Result < OwnedBytes > {
150
- let chunk_start = range. start / CHUNK_SIZE ;
151
- let chunk_end = range. end / CHUNK_SIZE ;
152
- let cache =
153
- LRU_CACHE . get_or_init ( || Mutex :: new ( LruCache :: new ( NonZeroUsize :: new ( 40_000 ) . unwrap ( ) ) ) ) ;
154
- let mut accumulated_chunks = vec ! [ 0u8 ; ( chunk_end - chunk_start + 1 ) * CHUNK_SIZE ] ;
155
- info ! (
156
- "Reading bytes: {:?} in chunks from {} to {}" ,
157
- range, chunk_start, chunk_end
158
- ) ;
159
- let mut handles = Vec :: new ( ) ;
160
- for chunk in chunk_start..=chunk_end {
161
- let key = CacheKey {
162
- base_url : self . url . clone ( ) ,
163
- path : self . url . clone ( ) ,
164
- chunk,
165
- } ;
166
- {
167
- let mut cache = cache. lock ( ) . unwrap ( ) ;
168
- if let Some ( data) = cache. get ( & key) {
169
- accumulated_chunks[ chunk * CHUNK_SIZE ..( chunk + 1 ) * CHUNK_SIZE ]
170
- . copy_from_slice ( data) ;
171
- continue ;
172
- }
173
- }
174
- let url = self . url . clone ( ) ;
175
- let handle = spawn ( async move {
176
- let response = HTTP_CLIENT . with ( |client| {
177
- client
178
- . get ( & url)
179
- . header (
180
- "Range" ,
181
- format ! ( "{}-{}" , chunk * CHUNK_SIZE , ( chunk + 1 ) * CHUNK_SIZE ) ,
182
- )
183
- . send ( )
184
- } ) ;
185
- let response = match response. await {
186
- Ok ( response) => response,
187
- Err ( e) => {
188
- error ! ( "Error: {:?}" , e) ;
189
- return Err ( std:: io:: Error :: new (
190
- std:: io:: ErrorKind :: Other ,
191
- "Error fetching chunk" ,
192
- ) ) ;
193
- }
194
- } ;
195
- if response. status ( ) != 200 {
196
- error ! ( "Response: {:?}" , response) ;
197
- return Err ( std:: io:: Error :: new (
198
- std:: io:: ErrorKind :: Other ,
199
- "Error fetching chunk: non-200 status" ,
200
- ) ) ;
201
- } else {
202
- let data = response. bytes ( ) . await . unwrap ( ) ;
203
- let data = data. to_vec ( ) ;
204
- {
205
- let mut cache = cache. lock ( ) . unwrap ( ) ;
206
- cache. put ( key, data. to_vec ( ) ) ;
207
- }
208
- if data. len ( ) < CHUNK_SIZE && chunk != chunk_end {
209
- warn ! ( "Short chunk: {}" , data. len( ) ) ;
210
- return Err ( std:: io:: Error :: new (
211
- std:: io:: ErrorKind :: Other ,
212
- "Error fetching chunk: short response length" ,
213
- ) ) ;
214
- }
215
- Ok ( ( chunk, data) )
216
- }
217
- } ) ;
218
- handles. push ( handle) ;
219
- }
220
- for handle in handles {
221
- if let Ok ( Ok ( ( chunk, data) ) ) = handle. await {
222
- accumulated_chunks[ chunk * CHUNK_SIZE ..( chunk + 1 ) * CHUNK_SIZE ]
223
- . copy_from_slice ( & data) ;
224
- } else {
225
- return Err ( std:: io:: Error :: new (
226
- std:: io:: ErrorKind :: Other ,
227
- "Error fetching chunk" ,
228
- ) ) ;
229
- }
230
- }
231
- info ! ( "Accumulated chunks: {}" , accumulated_chunks. len( ) ) ;
232
- let chunk_start_offset = range. start % CHUNK_SIZE ;
233
- let chunk_end_offset = ( chunk_end - chunk_start) * CHUNK_SIZE + range. end % CHUNK_SIZE ;
234
- Ok ( OwnedBytes :: new (
235
- accumulated_chunks[ chunk_start_offset..chunk_end_offset] . to_vec ( ) ,
236
- ) )
237
- }
238
151
}
239
152
240
153
impl HasLen for HttpFileHandle {
@@ -249,24 +162,25 @@ impl HasLen for HttpFileHandle {
249
162
250
163
let url = format ! ( "{}" , self . url) ;
251
164
info ! ( "Fetching length from: {}" , url) ;
252
- let response = BLOCKING_HTTP_CLIENT . with ( |client| client. head ( & url) . send ( ) ) ;
165
+ let response = BLOCKING_HTTP_CLIENT
166
+ . with ( |client| client. head ( & url) . timeout ( Duration :: from_millis ( 500 ) ) . send ( ) ) ;
253
167
if let Err ( e) = response {
254
- error ! ( "Error: {:?}" , e) ;
255
- return 0 ;
168
+ error ! ( "Error fetching length : {:?}" , e) ;
169
+ panic ! ( ) ;
256
170
}
257
171
let response = response. unwrap ( ) ;
258
172
if response. status ( ) != 200 {
259
173
error ! ( "Response: {:?}" , response) ;
260
- return 0 ;
174
+ panic ! ( ) ;
261
175
} else {
262
176
let length = response
263
177
. headers ( )
264
178
. get ( "Content-Length" )
265
179
. unwrap ( )
266
180
. to_str ( )
267
- . unwrap_or_default ( )
181
+ . unwrap ( )
268
182
. parse ( )
269
- . unwrap_or_default ( ) ;
183
+ . unwrap ( ) ;
270
184
info ! ( "Length: {}" , length) ;
271
185
let mut lengths = lengths. lock ( ) . unwrap ( ) ;
272
186
lengths. insert ( PathBuf :: from ( & self . url ) , length) ;
0 commit comments