Skip to content

Commit ee2402b

Browse files
committed
Initial commit
0 parents  commit ee2402b

32 files changed

+51738
-0
lines changed

.gitignore

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Generated by Cargo
2+
# will have compiled files and executables
3+
debug/
4+
target/
5+
6+
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
7+
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
8+
Cargo.lock
9+
10+
# These are backup files generated by rustfmt
11+
**/*.rs.bk
12+
13+
# MSVC Windows builds of rustc generate these, which store debugging information
14+
*.pdb
15+
16+
**.geojson

Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[workspace]
2+
resolver = "2"
3+
members = [
4+
"airmail",
5+
"airmail_parser",
6+
]

LICENSE.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
The MIT License (MIT)
2+
=====================
3+
4+
Copyright 2024 Ellen Poe
5+
6+
Permission is hereby granted, free of charge, to any person
7+
obtaining a copy of this software and associated documentation
8+
files (the “Software”), to deal in the Software without
9+
restriction, including without limitation the rights to use,
10+
copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
copies of the Software, and to permit persons to whom the
12+
Software is furnished to do so, subject to the following
13+
conditions:
14+
15+
The above copyright notice and this permission notice shall be
16+
included in all copies or substantial portions of the Software.
17+
18+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
19+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25+
OTHER DEALINGS IN THE SOFTWARE.

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Ellen writes a geocoder
2+
3+
Airmail might eventually become a hybrid online/offline geocoder. This is a forever project and a pipe dream, you probably shouldn't use it yet and I may never finish it.
4+
5+
For now, the parser is reasonably good.

airmail/Cargo.toml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[package]
2+
name = "airmail"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7+
8+
[dependencies]
9+
clap = { version = "4.4.18", features = ["derive"] }
10+
geojson = "0.24.1"
11+
levenshtein_automata = "0.2.1"
12+
s2 = "0.0.12"
13+
tantivy = "0.21.1"
14+
tantivy-common = "0.6.0"
15+
tantivy-fst = "0.4.0"
16+
tempfile = "3.9.0"
17+
18+
[[bin]]
19+
name = "index"

airmail/src/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Here be dragons
2+
3+
This crate is a mess and does almost nothing useful.

airmail/src/bin/index.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
use clap::Parser;
2+
3+
#[derive(Parser, Debug)]
4+
struct Args {
5+
/// The GeoJSON file to index.
6+
#[clap(short, long)]
7+
geojson: Option<String>,
8+
/// The directory to output index tiles into.
9+
#[clap(short, long)]
10+
index_dir: String,
11+
}
12+
13+
fn main() -> Result<(), Box<dyn std::error::Error>> {
14+
Ok(())
15+
}

airmail/src/index.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
use tantivy::{
2+
collector::{Count, TopDocs},
3+
schema::{Schema, INDEXED, STORED, TEXT},
4+
};
5+
6+
use crate::{poi::AirmailPoi, query::AirmailQuery};
7+
8+
// Field name keys.
9+
pub const FIELD_NAME: &str = "name";
10+
pub const FIELD_CATEGORY: &str = "category";
11+
pub const FIELD_HOUSE_NUMBER: &str = "house_number";
12+
pub const FIELD_ROAD: &str = "road";
13+
pub const FIELD_UNIT: &str = "unit";
14+
pub const FIELD_LOCALITY: &str = "locality";
15+
pub const FIELD_REGION: &str = "region";
16+
pub const FIELD_S2CELL: &str = "s2cell";
17+
18+
pub struct AirmailIndex {
19+
tantivy_index: tantivy::Index,
20+
}
21+
22+
impl AirmailIndex {
23+
fn schema() -> tantivy::schema::Schema {
24+
let mut schema_builder = Schema::builder();
25+
let _ = schema_builder.add_text_field(FIELD_NAME, TEXT | STORED);
26+
let _ = schema_builder.add_text_field(FIELD_CATEGORY, TEXT | STORED);
27+
let _ = schema_builder.add_text_field(FIELD_HOUSE_NUMBER, TEXT | STORED);
28+
let _ = schema_builder.add_text_field(FIELD_ROAD, TEXT | STORED);
29+
let _ = schema_builder.add_text_field(FIELD_UNIT, TEXT | STORED);
30+
let _ = schema_builder.add_text_field(FIELD_LOCALITY, TEXT | STORED);
31+
let _ = schema_builder.add_text_field(FIELD_REGION, TEXT | STORED);
32+
let _ = schema_builder.add_u64_field(FIELD_S2CELL, INDEXED | STORED);
33+
schema_builder.build()
34+
}
35+
36+
pub fn field_name(&self) -> tantivy::schema::Field {
37+
self.tantivy_index.schema().get_field(FIELD_NAME).unwrap()
38+
}
39+
40+
pub fn field_house_number(&self) -> tantivy::schema::Field {
41+
self.tantivy_index
42+
.schema()
43+
.get_field(FIELD_HOUSE_NUMBER)
44+
.unwrap()
45+
}
46+
47+
pub fn field_road(&self) -> tantivy::schema::Field {
48+
self.tantivy_index.schema().get_field(FIELD_ROAD).unwrap()
49+
}
50+
51+
pub fn field_unit(&self) -> tantivy::schema::Field {
52+
self.tantivy_index.schema().get_field(FIELD_UNIT).unwrap()
53+
}
54+
55+
pub fn field_locality(&self) -> tantivy::schema::Field {
56+
self.tantivy_index
57+
.schema()
58+
.get_field(FIELD_LOCALITY)
59+
.unwrap()
60+
}
61+
62+
pub fn field_region(&self) -> tantivy::schema::Field {
63+
self.tantivy_index.schema().get_field(FIELD_REGION).unwrap()
64+
}
65+
66+
pub fn create(index_dir: &str) -> Result<Self, Box<dyn std::error::Error>> {
67+
let schema = Self::schema();
68+
let tantivy_index = tantivy::Index::create_in_dir(index_dir, schema)?;
69+
Ok(Self { tantivy_index })
70+
}
71+
72+
pub fn new(index_dir: &str) -> Result<Self, Box<dyn std::error::Error>> {
73+
let tantivy_index = tantivy::Index::open_in_dir(index_dir)?;
74+
Ok(Self { tantivy_index })
75+
}
76+
77+
pub fn writer(&mut self) -> Result<AirmailIndexWriter, Box<dyn std::error::Error>> {
78+
let tantivy_writer = self.tantivy_index.writer(50_000_000)?;
79+
let writer = AirmailIndexWriter { tantivy_writer };
80+
Ok(writer)
81+
}
82+
83+
pub fn search(&self, query: AirmailQuery) -> Result<Vec<String>, Box<dyn std::error::Error>> {
84+
let tantivy_reader = self.tantivy_index.reader()?;
85+
let searcher = tantivy_reader.searcher();
86+
let results = searcher.search(&query, &(TopDocs::with_limit(5), Count))?;
87+
let strings = results
88+
.0
89+
.iter()
90+
.map(|s| format!("{:?}", searcher.doc(s.1).unwrap()))
91+
.collect();
92+
Ok(strings)
93+
}
94+
}
95+
96+
pub struct AirmailIndexWriter {
97+
tantivy_writer: tantivy::IndexWriter,
98+
}
99+
100+
impl AirmailIndexWriter {
101+
pub fn add_poi(&mut self, poi: AirmailPoi) -> Result<(), Box<dyn std::error::Error>> {
102+
let mut document = tantivy::Document::new();
103+
let schema = self.tantivy_writer.index().schema();
104+
if let Some(name) = poi.name {
105+
document.add_text(schema.get_field(FIELD_NAME)?, name);
106+
}
107+
if let Some(category) = poi.category {
108+
document.add_text(schema.get_field(FIELD_CATEGORY)?, category);
109+
}
110+
document.add_u64(schema.get_field(FIELD_S2CELL)?, poi.s2cell);
111+
self.tantivy_writer.add_document(document)?;
112+
Ok(())
113+
}
114+
115+
pub fn commit(mut self) -> Result<(), Box<dyn std::error::Error>> {
116+
self.tantivy_writer.commit()?;
117+
Ok(())
118+
}
119+
}

airmail/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
pub mod index;
2+
pub mod parser;
3+
pub mod poi;
4+
pub mod query;

airmail/src/parser.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

0 commit comments

Comments
 (0)