-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Implementing some basic commands for DB * CREATEPREDINDEX command parsing * DROPPREDINDEX command parsing * NONLINEARALGORITHMINDEX command inclusions * GETKEY and DELKEY command inclusions * WIP predicate parsing, would need more tests * GETSIMN command inclusion * CREATESTORE command inclusion * Switch f32_array to square brackets * SET command inclusion
- Loading branch information
Showing
13 changed files
with
1,152 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[package] | ||
name = "dsl" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
[dependencies] | ||
ahnlich_types = { path = "../types", version = "*" } | ||
pest = "2.7.13" | ||
pest_derive = "2.7.13" | ||
thiserror.workspace = true | ||
ndarray.workspace = true | ||
hex = "0.4.3" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
use ahnlich_types::similarity::{Algorithm, NonLinearAlgorithm}; | ||
|
||
use crate::error::DslError; | ||
|
||
pub(crate) fn to_non_linear(input: &str) -> Option<NonLinearAlgorithm> { | ||
match input.to_lowercase().trim() { | ||
"kdtree" => Some(NonLinearAlgorithm::KDTree), | ||
_ => None, | ||
} | ||
} | ||
|
||
pub(crate) fn to_algorithm(input: &str) -> Result<Algorithm, DslError> { | ||
match input.to_lowercase().trim() { | ||
"kdtree" => Ok(Algorithm::KDTree), | ||
"cosinesimilarity" => Ok(Algorithm::CosineSimilarity), | ||
"dotproductsimilarity" => Ok(Algorithm::DotProductSimilarity), | ||
"euclideandistance" => Ok(Algorithm::EuclideanDistance), | ||
e => Err(DslError::UnsupportedAlgorithm(e.to_string())), | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
use crate::parser::Rule; | ||
use ahnlich_types::keyval::StoreKey; | ||
use ndarray::Array1; | ||
use pest::iterators::Pair; | ||
|
||
pub(crate) fn parse_multi_f32_array(f32_arrays_pair: Pair<Rule>) -> Vec<StoreKey> { | ||
f32_arrays_pair.into_inner().map(parse_f32_array).collect() | ||
} | ||
|
||
pub(crate) fn parse_f32_array(pair: Pair<Rule>) -> StoreKey { | ||
StoreKey(Array1::from_iter(pair.into_inner().map(|f32_pair| { | ||
f32_pair | ||
.as_str() | ||
.parse::<f32>() | ||
.expect("Cannot parse single f32 num") | ||
}))) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,304 @@ | ||
use std::{collections::HashSet, num::NonZeroUsize}; | ||
|
||
use crate::{ | ||
algorithm::{to_algorithm, to_non_linear}, | ||
array::{parse_f32_array, parse_multi_f32_array}, | ||
metadata::parse_store_keys_to_store_value, | ||
parser::{QueryParser, Rule}, | ||
}; | ||
use ahnlich_types::{db::DBQuery, keyval::StoreName, metadata::MetadataKey}; | ||
use pest::Parser; | ||
|
||
use crate::{error::DslError, predicate::parse_predicate_expression}; | ||
|
||
// Parse raw strings separated by ; into a Vec<DBQuery>. Examples include but are not restricted | ||
// to | ||
// | ||
// PING | ||
// LISTCLIENTS | ||
// LISTSTORES | ||
// INFOSERVER | ||
// DROPSTORE store_name IF EXISTS | ||
// CREATEPREDINDEX (key_1, key_2) in store_name | ||
// DROPPREDINDEX IF EXISTS (key1, key2) in store_name | ||
// CREATENONLINEARALGORITHMINDEX (kdtree) in store_name | ||
// DROPNONLINEARALGORITHMINDEX IF EXISTS (kdtree) in store_name | ||
// GETKEY ([1.0, 2.0], [3.0, 4.0]) IN my_store | ||
// DELKEY ([1.2, 3.0], [5.6, 7.8]) IN my_store | ||
// GETPRED ((author = dickens) OR (country != Nigeria)) IN my_store | ||
// GETSIMN 4 WITH [0.65, 2.78] USING cosinesimilarity IN my_store WHERE (author = dickens) | ||
// CREATESTORE IF NOT EXISTS my_store DIMENSION 21 PREDICATES (author, country) NONLINEARALGORITHMINDEX (kdtree) | ||
// SET (([1.0, 2.1, 3.2], {name: Haks, category: dev}), ([3.1, 4.8, 5.0], {name: Deven, category: dev})) in store | ||
pub fn parse_db_query(input: &str) -> Result<Vec<DBQuery>, DslError> { | ||
let pairs = QueryParser::parse(Rule::db_query, input).map_err(Box::new)?; | ||
let statements = pairs.into_iter().collect::<Vec<_>>(); | ||
let mut queries = Vec::with_capacity(statements.len()); | ||
for statement in statements { | ||
let start_pos = statement.as_span().start_pos().pos(); | ||
let end_pos = statement.as_span().end_pos().pos(); | ||
let query = match statement.as_rule() { | ||
Rule::ping => DBQuery::Ping, | ||
Rule::list_clients => DBQuery::ListClients, | ||
Rule::list_stores => DBQuery::ListStores, | ||
Rule::info_server => DBQuery::InfoServer, | ||
Rule::set_in_store => { | ||
let mut inner_pairs = statement.into_inner(); | ||
let store_keys_to_store_values = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; | ||
let store = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(); | ||
|
||
DBQuery::Set { | ||
store: StoreName(store.to_string()), | ||
inputs: parse_store_keys_to_store_value(store_keys_to_store_values)?, | ||
} | ||
} | ||
Rule::create_store => { | ||
let mut inner_pairs = statement.into_inner().peekable(); | ||
let mut error_if_exists = true; | ||
if let Some(next_pair) = inner_pairs.peek() { | ||
if next_pair.as_rule() == Rule::if_not_exists { | ||
inner_pairs.next(); // Consume rule | ||
error_if_exists = false; | ||
} | ||
}; | ||
let store = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(); | ||
let dimension = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str() | ||
.parse::<NonZeroUsize>()?; | ||
let mut create_predicates = HashSet::new(); | ||
if let Some(next_pair) = inner_pairs.peek() { | ||
if next_pair.as_rule() == Rule::metadata_keys { | ||
let index_name_pairs = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; // Consume rule | ||
create_predicates = index_name_pairs | ||
.into_inner() | ||
.map(|index_pair| MetadataKey::new(index_pair.as_str().to_string())) | ||
.collect(); | ||
} | ||
}; | ||
let mut non_linear_indices = HashSet::new(); | ||
if let Some(next_pair) = inner_pairs.peek() { | ||
if next_pair.as_rule() == Rule::non_linear_algorithms { | ||
let index_name_pairs = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; // Consume rule | ||
non_linear_indices = index_name_pairs | ||
.into_inner() | ||
.flat_map(|index_pair| to_non_linear(index_pair.as_str())) | ||
.collect(); | ||
} | ||
}; | ||
DBQuery::CreateStore { | ||
store: StoreName(store.to_string()), | ||
dimension, | ||
create_predicates, | ||
non_linear_indices, | ||
error_if_exists, | ||
} | ||
} | ||
Rule::get_sim_n => { | ||
let mut inner_pairs = statement.into_inner(); | ||
let closest_n = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str() | ||
.parse::<NonZeroUsize>()?; | ||
let f32_array = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; | ||
let search_input = parse_f32_array(f32_array); | ||
let algorithm = to_algorithm( | ||
inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(), | ||
)?; | ||
let store = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(); | ||
let condition = if let Some(predicate_conditions) = inner_pairs.next() { | ||
Some(parse_predicate_expression(predicate_conditions)?) | ||
} else { | ||
None | ||
}; | ||
DBQuery::GetSimN { | ||
store: StoreName(store.to_string()), | ||
search_input, | ||
closest_n, | ||
algorithm, | ||
condition, | ||
} | ||
} | ||
Rule::get_pred => { | ||
let mut inner_pairs = statement.into_inner(); | ||
let predicate_conditions = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; | ||
let store = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(); | ||
DBQuery::GetPred { | ||
store: StoreName(store.to_string()), | ||
condition: parse_predicate_expression(predicate_conditions)?, | ||
} | ||
} | ||
Rule::get_key => { | ||
let mut inner_pairs = statement.into_inner(); | ||
let f32_arrays_pair = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; | ||
let keys = parse_multi_f32_array(f32_arrays_pair); | ||
|
||
let store = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(); | ||
DBQuery::GetKey { | ||
store: StoreName(store.to_string()), | ||
keys, | ||
} | ||
} | ||
Rule::del_key => { | ||
let mut inner_pairs = statement.into_inner(); | ||
let f32_arrays_pair = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; | ||
let keys = parse_multi_f32_array(f32_arrays_pair); | ||
|
||
let store = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(); | ||
DBQuery::DelKey { | ||
store: StoreName(store.to_string()), | ||
keys, | ||
} | ||
} | ||
Rule::create_non_linear_algorithm_index => { | ||
let mut inner_pairs = statement.into_inner(); | ||
let index_name_pairs = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; | ||
let non_linear_indices = index_name_pairs | ||
.into_inner() | ||
.flat_map(|index_pair| to_non_linear(index_pair.as_str())) | ||
.collect(); | ||
let store = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(); | ||
DBQuery::CreateNonLinearAlgorithmIndex { | ||
store: StoreName(store.to_string()), | ||
non_linear_indices, | ||
} | ||
} | ||
Rule::create_pred_index => { | ||
let mut inner_pairs = statement.into_inner(); | ||
let index_name_pairs = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; | ||
let predicates = index_name_pairs | ||
.into_inner() | ||
.map(|index_pair| MetadataKey::new(index_pair.as_str().to_string())) | ||
.collect(); | ||
let store = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(); | ||
DBQuery::CreatePredIndex { | ||
store: StoreName(store.to_string()), | ||
predicates, | ||
} | ||
} | ||
Rule::drop_non_linear_algorithm_index => { | ||
let mut inner_pairs = statement.into_inner().peekable(); | ||
let mut if_exists = false; | ||
if let Some(next_pair) = inner_pairs.peek() { | ||
if next_pair.as_rule() == Rule::if_exists { | ||
inner_pairs.next(); // Consume rule | ||
if_exists = true; | ||
} | ||
}; | ||
let index_names_pair = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; | ||
let store = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(); | ||
let non_linear_indices = index_names_pair | ||
.into_inner() | ||
.flat_map(|index_pair| to_non_linear(index_pair.as_str())) | ||
.collect(); | ||
DBQuery::DropNonLinearAlgorithmIndex { | ||
store: StoreName(store.to_string()), | ||
non_linear_indices, | ||
error_if_not_exists: !if_exists, | ||
} | ||
} | ||
Rule::drop_pred_index => { | ||
let mut inner_pairs = statement.into_inner().peekable(); | ||
let mut if_exists = false; | ||
if let Some(next_pair) = inner_pairs.peek() { | ||
if next_pair.as_rule() == Rule::if_exists { | ||
inner_pairs.next(); | ||
if_exists = true; | ||
} | ||
}; | ||
let index_names_pair = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; | ||
let store = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(); | ||
let predicates = index_names_pair | ||
.into_inner() | ||
.map(|index_pair| MetadataKey::new(index_pair.as_str().to_string())) | ||
.collect(); | ||
DBQuery::DropPredIndex { | ||
store: StoreName(store.to_string()), | ||
predicates, | ||
error_if_not_exists: !if_exists, | ||
} | ||
} | ||
Rule::drop_store => { | ||
let mut inner_pairs = statement.into_inner(); | ||
let store = inner_pairs | ||
.next() | ||
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? | ||
.as_str(); | ||
let if_exists = match inner_pairs.next() { | ||
None => false, | ||
Some(p) => { | ||
if p.as_str().trim().to_lowercase() != "if exists" { | ||
let start_pos = p.as_span().start_pos().pos(); | ||
let end_pos = p.as_span().end_pos().pos(); | ||
return Err(DslError::UnexpectedSpan((start_pos, end_pos))); | ||
} | ||
true | ||
} | ||
}; | ||
DBQuery::DropStore { | ||
store: StoreName(store.to_string()), | ||
error_if_not_exists: !if_exists, | ||
} | ||
} | ||
_ => return Err(DslError::UnexpectedSpan((start_pos, end_pos))), | ||
}; | ||
queries.push(query); | ||
} | ||
Ok(queries) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
use std::num::ParseIntError; | ||
|
||
use crate::parser::Rule; | ||
use thiserror::Error; | ||
|
||
#[derive(Error, Debug)] | ||
pub enum DslError { | ||
#[error("Error parsing rule {0}")] | ||
RuleParse(#[from] Box<pest::error::Error<Rule>>), | ||
#[error("Unexpected Statement found {0:?}")] | ||
UnexpectedSpan((usize, usize)), | ||
#[error("Could not parse Hex string into image {0:?}")] | ||
UnexpectedHex(String), | ||
#[error("Could not parse string into nonzerousize {0:?}")] | ||
NonZeroUsizeParse(#[from] ParseIntError), | ||
#[error("Found unsupported algorithm {0}")] | ||
UnsupportedAlgorithm(String), | ||
} |
Oops, something went wrong.