Skip to content

Commit 1c32229

Browse files
Merge pull request #3 from disasterscience/structure
Structure
2 parents 500283b + a451220 commit 1c32229

23 files changed

+1146
-627
lines changed

Cargo.lock

Lines changed: 641 additions & 125 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
[workspace]
22
resolver = "2"
3-
members = [
4-
"airmail", "airmail_import_osm", "airmail_indexer", "airmail_service",
5-
]
3+
members = ["airmail", "airmail_indexer", "airmail_service"]
64

75
[profile.release]
86
debug = 1

Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ RUN apt update && apt install -y libssl-dev clang pkg-config
55
WORKDIR /usr/src/airmail
66
COPY ./airmail ./airmail
77
COPY ./airmail_indexer ./airmail_indexer
8-
COPY ./airmail_import_osm ./airmail_import_osm
98
COPY ./airmail_service ./airmail_service
109
COPY ./Cargo.toml ./Cargo.toml
1110
COPY ./Cargo.lock ./Cargo.lock

Dockerfile.build

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,10 @@ RUN apt-get update && apt-get install -y libssl-dev capnproto clang pkg-config l
55
WORKDIR /usr/src/airmail
66
COPY ./airmail ./airmail
77
COPY ./airmail_indexer ./airmail_indexer
8-
COPY ./airmail_import_osm ./airmail_import_osm
98
COPY ./airmail_service ./airmail_service
109
COPY ./Cargo.toml ./Cargo.toml
1110
COPY ./Cargo.lock ./Cargo.lock
1211

13-
RUN cargo install --path ./airmail_import_osm
14-
15-
RUN apt-get update && apt-get install -y podman
12+
RUN cargo install --path ./airmail_indexer
1613

1714
WORKDIR /var/airmail

QUICKSTART.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ ls -lh ./data/whosonfirst-data-admin-latest.spatial.db ./data/australia-latest.o
5454

5555
# Build the index
5656
docker compose --profile index run build-index \
57-
airmail_import_osm --wof-db /data/whosonfirst-data-admin-latest.spatial.db \
57+
indexer --wof-db /data/whosonfirst-data-admin-latest.spatial.db \
5858
--index /data/index \
5959
--osmx /data/australia-latest.osm.osmx
6060
```

airmail/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ lazy_static = "1.4.0"
3333
regex = "1.10.3"
3434
geo = "0.27.0"
3535
tantivy-uffd = "0.1.1"
36+
anyhow = "1.0.86"
37+
thiserror = "1.0.63"
3638

3739
[features]
3840
invasive_logging = []

airmail/src/error.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
use thiserror::Error;
2+
3+
#[derive(Error, Debug)]
4+
pub enum AirmailError {
5+
#[error("unable to count")]
6+
UnableToCount,
7+
}

airmail/src/index.rs

Lines changed: 43 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
use std::path::PathBuf;
12
use std::sync::Arc;
23

4+
use anyhow::Result;
35
use futures_util::future::join_all;
46
use geo::Rect;
57
use itertools::Itertools;
@@ -24,6 +26,7 @@ use tantivy_uffd::RemoteDirectory;
2426
use tokio::task::spawn_blocking;
2527
use unicode_segmentation::UnicodeSegmentation;
2628

29+
use crate::error::AirmailError;
2730
use crate::{
2831
poi::{AirmailPoi, SchemafiedPoi},
2932
query::all_subsequences,
@@ -63,8 +66,8 @@ impl AirmailIndex {
6366
.set_indexed()
6467
.set_stored()
6568
.set_fast();
66-
assert_eq!(s2cell_parent_index_options.fieldnorms(), false);
67-
assert_eq!(s2cell_index_options.fieldnorms(), false);
69+
assert!(!s2cell_parent_index_options.fieldnorms());
70+
assert!(!s2cell_index_options.fieldnorms());
6871

6972
let _ = schema_builder.add_text_field(FIELD_CONTENT, text_options.clone());
7073
let _ = schema_builder.add_text_field(FIELD_INDEXED_TAG, tag_options);
@@ -109,7 +112,7 @@ impl AirmailIndex {
109112
self.tantivy_index.schema().get_field(FIELD_TAGS).unwrap()
110113
}
111114

112-
pub fn create(index_dir: &str) -> Result<Self, Box<dyn std::error::Error>> {
115+
pub fn create(index_dir: &PathBuf) -> Result<Self> {
113116
let schema = Self::schema();
114117
let tantivy_index =
115118
tantivy::Index::open_or_create(MmapDirectory::open(index_dir)?, schema)?;
@@ -119,15 +122,15 @@ impl AirmailIndex {
119122
})
120123
}
121124

122-
pub fn new(index_dir: &str) -> Result<Self, Box<dyn std::error::Error>> {
125+
pub fn new(index_dir: &str) -> Result<Self> {
123126
let tantivy_index = tantivy::Index::open_in_dir(index_dir)?;
124127
Ok(Self {
125128
tantivy_index: Arc::new(tantivy_index),
126129
is_remote: false,
127130
})
128131
}
129132

130-
pub fn new_remote(base_url: &str) -> Result<Self, Box<dyn std::error::Error>> {
133+
pub fn new_remote(base_url: &str) -> Result<Self> {
131134
let tantivy_index =
132135
tantivy::Index::open(RemoteDirectory::<{ 2 * 1024 * 1024 }>::new(base_url))?;
133136
Ok(Self {
@@ -136,7 +139,7 @@ impl AirmailIndex {
136139
})
137140
}
138141

139-
pub fn writer(&mut self) -> Result<AirmailIndexWriter, Box<dyn std::error::Error>> {
142+
pub fn writer(&mut self) -> Result<AirmailIndexWriter> {
140143
let tantivy_writer = self
141144
.tantivy_index
142145
.writer::<TantivyDocument>(2_000_000_000)?;
@@ -147,7 +150,7 @@ impl AirmailIndex {
147150
Ok(writer)
148151
}
149152

150-
pub async fn merge(&mut self) -> Result<(), Box<dyn std::error::Error>> {
153+
pub async fn merge(&mut self) -> Result<()> {
151154
let ids = self.tantivy_index.searchable_segment_ids()?;
152155
self.tantivy_index
153156
.writer::<TantivyDocument>(2_000_000_000)?
@@ -156,7 +159,7 @@ impl AirmailIndex {
156159
Ok(())
157160
}
158161

159-
pub async fn num_docs(&self) -> Result<u64, Box<dyn std::error::Error>> {
162+
pub async fn num_docs(&self) -> Result<u64> {
160163
let index = self.tantivy_index.clone();
161164
let count = spawn_blocking(move || {
162165
if let Ok(tantivy_reader) = index.reader() {
@@ -165,7 +168,7 @@ impl AirmailIndex {
165168
None
166169
}
167170
});
168-
Ok(count.await?.ok_or("Error getting count")?)
171+
Ok(count.await?.ok_or(AirmailError::UnableToCount)?)
169172
}
170173

171174
async fn construct_query(
@@ -244,37 +247,35 @@ impl AirmailIndex {
244247
boost,
245248
)));
246249
}
250+
} else if possible_query.len() >= 8 && lenient {
251+
let query = if tokens.ends_with(&[possible_query]) {
252+
FuzzyTermQuery::new_prefix(term, 1, true)
253+
} else {
254+
FuzzyTermQuery::new(term, 1, true)
255+
};
256+
if self.is_remote {
257+
let searcher = searcher.clone();
258+
let query = query.clone();
259+
spawn_blocking(move || {
260+
let _ = searcher.search(&query, &Count);
261+
});
262+
}
263+
mandatory_queries.push(Box::new(BoostQuery::new(Box::new(query), boost)));
247264
} else {
248-
if possible_query.len() >= 8 && lenient {
249-
let query = if tokens.ends_with(&[possible_query]) {
250-
FuzzyTermQuery::new_prefix(term, 1, true)
265+
let query: Box<dyn Query> =
266+
if self.is_remote || !lenient || !tokens.ends_with(&[possible_query]) {
267+
Box::new(TermQuery::new(term, IndexRecordOption::Basic))
251268
} else {
252-
FuzzyTermQuery::new(term, 1, true)
269+
Box::new(FuzzyTermQuery::new_prefix(term, 0, false))
253270
};
254-
if self.is_remote {
255-
let searcher = searcher.clone();
256-
let query = query.clone();
257-
spawn_blocking(move || {
258-
let _ = searcher.search(&query, &Count);
259-
});
260-
}
261-
mandatory_queries.push(Box::new(BoostQuery::new(Box::new(query), boost)));
262-
} else {
263-
let query: Box<dyn Query> =
264-
if self.is_remote || !lenient || !tokens.ends_with(&[possible_query]) {
265-
Box::new(TermQuery::new(term, IndexRecordOption::Basic))
266-
} else {
267-
Box::new(FuzzyTermQuery::new_prefix(term, 0, false))
268-
};
269-
if self.is_remote {
270-
let searcher = searcher.clone();
271-
let query = query.box_clone();
272-
spawn_blocking(move || {
273-
let _ = searcher.search(&query, &Count);
274-
});
275-
}
276-
mandatory_queries.push(Box::new(BoostQuery::new(query, boost)));
277-
};
271+
if self.is_remote {
272+
let searcher = searcher.clone();
273+
let query = query.box_clone();
274+
spawn_blocking(move || {
275+
let _ = searcher.search(&query, &Count);
276+
});
277+
}
278+
mandatory_queries.push(Box::new(BoostQuery::new(query, boost)));
278279
}
279280
}
280281

@@ -328,7 +329,7 @@ impl AirmailIndex {
328329
]));
329330
}
330331

331-
return Box::new(final_query);
332+
Box::new(final_query)
332333
}
333334

334335
/// This is public because I don't want one big mega-crate but its API should not be considered even remotely stable.
@@ -339,7 +340,7 @@ impl AirmailIndex {
339340
tags: Option<Vec<String>>,
340341
bbox: Option<Rect<f64>>,
341342
boost_regions: &[(f32, Rect<f64>)],
342-
) -> Result<Vec<(AirmailPoi, f32)>, Box<dyn std::error::Error>> {
343+
) -> Result<Vec<(AirmailPoi, f32)>> {
343344
let tantivy_reader = self.tantivy_index.reader()?;
344345
let searcher = tantivy_reader.searcher();
345346
let query_string = query.trim().replace("'s", "s");
@@ -352,7 +353,7 @@ impl AirmailIndex {
352353
&query_string,
353354
tags,
354355
bbox,
355-
&boost_regions,
356+
boost_regions,
356357
request_leniency,
357358
)
358359
.await;
@@ -424,11 +425,7 @@ impl AirmailIndexWriter {
424425
doc.add_text(self.schema.get_field(FIELD_CONTENT).unwrap(), value);
425426
}
426427

427-
pub async fn add_poi(
428-
&mut self,
429-
poi: SchemafiedPoi,
430-
source: &str,
431-
) -> Result<(), Box<dyn std::error::Error>> {
428+
pub fn add_poi(&mut self, poi: SchemafiedPoi, source: &str) -> Result<()> {
432429
let mut doc = TantivyDocument::default();
433430
for content in poi.content {
434431
self.process_field(&mut doc, &content);
@@ -468,7 +465,7 @@ impl AirmailIndexWriter {
468465
Ok(())
469466
}
470467

471-
pub fn commit(mut self) -> Result<(), Box<dyn std::error::Error>> {
468+
pub fn commit(mut self) -> Result<()> {
472469
self.tantivy_writer.commit()?;
473470
Ok(())
474471
}

airmail/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#[macro_use]
22
extern crate lazy_static;
33

4+
pub mod error;
45
pub mod index;
56
pub mod poi;
67
pub mod query;

airmail/src/poi.rs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
use std::error::Error;
2-
1+
use anyhow::Result;
32
use lingua::Language;
43
use serde::{Deserialize, Serialize};
54

@@ -16,12 +15,7 @@ pub struct AirmailPoi {
1615
}
1716

1817
impl AirmailPoi {
19-
pub fn new(
20-
source: String,
21-
lat: f64,
22-
lng: f64,
23-
tags: Vec<(String, String)>,
24-
) -> Result<Self, Box<dyn Error>> {
18+
pub fn new(source: String, lat: f64, lng: f64, tags: Vec<(String, String)>) -> Result<Self> {
2519
let s2cell = s2::cellid::CellID::from(s2::latlng::LatLng::from_degrees(lat, lng)).0;
2620

2721
Ok(Self {
@@ -56,7 +50,7 @@ impl ToIndexPoi {
5650
lat: f64,
5751
lng: f64,
5852
tags: Vec<(String, String)>,
59-
) -> Result<Self, Box<dyn Error>> {
53+
) -> Result<Self> {
6054
let s2cell = s2::cellid::CellID::from(s2::latlng::LatLng::from_degrees(lat, lng)).0;
6155

6256
Ok(Self {

0 commit comments

Comments
 (0)