Skip to content

Commit

Permalink
Creating a DSL for ahnlich (#117)
Browse files Browse the repository at this point in the history
* Implementing some basic commands for DB

* CREATEPREDINDEX command parsing

* DROPPREDINDEX command parsing

* NONLINEARALGORITHMINDEX command inclusions

* GETKEY and DELKEY command inclusions

* WIP predicate parsing, would need more tests

* GETSIMN command inclusion

* CREATESTORE command inclusion

* Switch f32_array to square brackets

* SET command inclusion
  • Loading branch information
deven96 authored Sep 27, 2024
1 parent 7a72d9a commit caecb1f
Show file tree
Hide file tree
Showing 13 changed files with 1,152 additions and 3 deletions.
8 changes: 5 additions & 3 deletions ahnlich/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
[workspace]
members = [
"ai",
"db",
"types",
"dsl",
"client",
"similarity",
"task-manager",
"tracer",
"typegen",
"types",
"utils",
"similarity",
"ai", "task-manager",
]
resolver = "2"

Expand Down
13 changes: 13 additions & 0 deletions ahnlich/dsl/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[package]
name = "dsl"
version = "0.1.0"
edition = "2021"

[dependencies]
ahnlich_types = { path = "../types", version = "*" }
pest = "2.7.13"
pest_derive = "2.7.13"
thiserror.workspace = true
ndarray.workspace = true
hex = "0.4.3"

20 changes: 20 additions & 0 deletions ahnlich/dsl/src/algorithm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
use ahnlich_types::similarity::{Algorithm, NonLinearAlgorithm};

use crate::error::DslError;

pub(crate) fn to_non_linear(input: &str) -> Option<NonLinearAlgorithm> {
match input.to_lowercase().trim() {
"kdtree" => Some(NonLinearAlgorithm::KDTree),
_ => None,
}
}

pub(crate) fn to_algorithm(input: &str) -> Result<Algorithm, DslError> {
match input.to_lowercase().trim() {
"kdtree" => Ok(Algorithm::KDTree),
"cosinesimilarity" => Ok(Algorithm::CosineSimilarity),
"dotproductsimilarity" => Ok(Algorithm::DotProductSimilarity),
"euclideandistance" => Ok(Algorithm::EuclideanDistance),
e => Err(DslError::UnsupportedAlgorithm(e.to_string())),
}
}
17 changes: 17 additions & 0 deletions ahnlich/dsl/src/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
use crate::parser::Rule;
use ahnlich_types::keyval::StoreKey;
use ndarray::Array1;
use pest::iterators::Pair;

pub(crate) fn parse_multi_f32_array(f32_arrays_pair: Pair<Rule>) -> Vec<StoreKey> {
f32_arrays_pair.into_inner().map(parse_f32_array).collect()
}

pub(crate) fn parse_f32_array(pair: Pair<Rule>) -> StoreKey {
StoreKey(Array1::from_iter(pair.into_inner().map(|f32_pair| {
f32_pair
.as_str()
.parse::<f32>()
.expect("Cannot parse single f32 num")
})))
}
304 changes: 304 additions & 0 deletions ahnlich/dsl/src/db.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,304 @@
use std::{collections::HashSet, num::NonZeroUsize};

use crate::{
algorithm::{to_algorithm, to_non_linear},
array::{parse_f32_array, parse_multi_f32_array},
metadata::parse_store_keys_to_store_value,
parser::{QueryParser, Rule},
};
use ahnlich_types::{db::DBQuery, keyval::StoreName, metadata::MetadataKey};
use pest::Parser;

use crate::{error::DslError, predicate::parse_predicate_expression};

// Parse raw strings separated by ; into a Vec<DBQuery>. Examples include but are not restricted
// to
//
// PING
// LISTCLIENTS
// LISTSTORES
// INFOSERVER
// DROPSTORE store_name IF EXISTS
// CREATEPREDINDEX (key_1, key_2) in store_name
// DROPPREDINDEX IF EXISTS (key1, key2) in store_name
// CREATENONLINEARALGORITHMINDEX (kdtree) in store_name
// DROPNONLINEARALGORITHMINDEX IF EXISTS (kdtree) in store_name
// GETKEY ([1.0, 2.0], [3.0, 4.0]) IN my_store
// DELKEY ([1.2, 3.0], [5.6, 7.8]) IN my_store
// GETPRED ((author = dickens) OR (country != Nigeria)) IN my_store
// GETSIMN 4 WITH [0.65, 2.78] USING cosinesimilarity IN my_store WHERE (author = dickens)
// CREATESTORE IF NOT EXISTS my_store DIMENSION 21 PREDICATES (author, country) NONLINEARALGORITHMINDEX (kdtree)
// SET (([1.0, 2.1, 3.2], {name: Haks, category: dev}), ([3.1, 4.8, 5.0], {name: Deven, category: dev})) in store
pub fn parse_db_query(input: &str) -> Result<Vec<DBQuery>, DslError> {
let pairs = QueryParser::parse(Rule::db_query, input).map_err(Box::new)?;
let statements = pairs.into_iter().collect::<Vec<_>>();
let mut queries = Vec::with_capacity(statements.len());
for statement in statements {
let start_pos = statement.as_span().start_pos().pos();
let end_pos = statement.as_span().end_pos().pos();
let query = match statement.as_rule() {
Rule::ping => DBQuery::Ping,
Rule::list_clients => DBQuery::ListClients,
Rule::list_stores => DBQuery::ListStores,
Rule::info_server => DBQuery::InfoServer,
Rule::set_in_store => {
let mut inner_pairs = statement.into_inner();
let store_keys_to_store_values = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?;
let store = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str();

DBQuery::Set {
store: StoreName(store.to_string()),
inputs: parse_store_keys_to_store_value(store_keys_to_store_values)?,
}
}
Rule::create_store => {
let mut inner_pairs = statement.into_inner().peekable();
let mut error_if_exists = true;
if let Some(next_pair) = inner_pairs.peek() {
if next_pair.as_rule() == Rule::if_not_exists {
inner_pairs.next(); // Consume rule
error_if_exists = false;
}
};
let store = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str();
let dimension = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str()
.parse::<NonZeroUsize>()?;
let mut create_predicates = HashSet::new();
if let Some(next_pair) = inner_pairs.peek() {
if next_pair.as_rule() == Rule::metadata_keys {
let index_name_pairs = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; // Consume rule
create_predicates = index_name_pairs
.into_inner()
.map(|index_pair| MetadataKey::new(index_pair.as_str().to_string()))
.collect();
}
};
let mut non_linear_indices = HashSet::new();
if let Some(next_pair) = inner_pairs.peek() {
if next_pair.as_rule() == Rule::non_linear_algorithms {
let index_name_pairs = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; // Consume rule
non_linear_indices = index_name_pairs
.into_inner()
.flat_map(|index_pair| to_non_linear(index_pair.as_str()))
.collect();
}
};
DBQuery::CreateStore {
store: StoreName(store.to_string()),
dimension,
create_predicates,
non_linear_indices,
error_if_exists,
}
}
Rule::get_sim_n => {
let mut inner_pairs = statement.into_inner();
let closest_n = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str()
.parse::<NonZeroUsize>()?;
let f32_array = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?;
let search_input = parse_f32_array(f32_array);
let algorithm = to_algorithm(
inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str(),
)?;
let store = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str();
let condition = if let Some(predicate_conditions) = inner_pairs.next() {
Some(parse_predicate_expression(predicate_conditions)?)
} else {
None
};
DBQuery::GetSimN {
store: StoreName(store.to_string()),
search_input,
closest_n,
algorithm,
condition,
}
}
Rule::get_pred => {
let mut inner_pairs = statement.into_inner();
let predicate_conditions = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?;
let store = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str();
DBQuery::GetPred {
store: StoreName(store.to_string()),
condition: parse_predicate_expression(predicate_conditions)?,
}
}
Rule::get_key => {
let mut inner_pairs = statement.into_inner();
let f32_arrays_pair = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?;
let keys = parse_multi_f32_array(f32_arrays_pair);

let store = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str();
DBQuery::GetKey {
store: StoreName(store.to_string()),
keys,
}
}
Rule::del_key => {
let mut inner_pairs = statement.into_inner();
let f32_arrays_pair = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?;
let keys = parse_multi_f32_array(f32_arrays_pair);

let store = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str();
DBQuery::DelKey {
store: StoreName(store.to_string()),
keys,
}
}
Rule::create_non_linear_algorithm_index => {
let mut inner_pairs = statement.into_inner();
let index_name_pairs = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?;
let non_linear_indices = index_name_pairs
.into_inner()
.flat_map(|index_pair| to_non_linear(index_pair.as_str()))
.collect();
let store = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str();
DBQuery::CreateNonLinearAlgorithmIndex {
store: StoreName(store.to_string()),
non_linear_indices,
}
}
Rule::create_pred_index => {
let mut inner_pairs = statement.into_inner();
let index_name_pairs = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?;
let predicates = index_name_pairs
.into_inner()
.map(|index_pair| MetadataKey::new(index_pair.as_str().to_string()))
.collect();
let store = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str();
DBQuery::CreatePredIndex {
store: StoreName(store.to_string()),
predicates,
}
}
Rule::drop_non_linear_algorithm_index => {
let mut inner_pairs = statement.into_inner().peekable();
let mut if_exists = false;
if let Some(next_pair) = inner_pairs.peek() {
if next_pair.as_rule() == Rule::if_exists {
inner_pairs.next(); // Consume rule
if_exists = true;
}
};
let index_names_pair = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?;
let store = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str();
let non_linear_indices = index_names_pair
.into_inner()
.flat_map(|index_pair| to_non_linear(index_pair.as_str()))
.collect();
DBQuery::DropNonLinearAlgorithmIndex {
store: StoreName(store.to_string()),
non_linear_indices,
error_if_not_exists: !if_exists,
}
}
Rule::drop_pred_index => {
let mut inner_pairs = statement.into_inner().peekable();
let mut if_exists = false;
if let Some(next_pair) = inner_pairs.peek() {
if next_pair.as_rule() == Rule::if_exists {
inner_pairs.next();
if_exists = true;
}
};
let index_names_pair = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?;
let store = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str();
let predicates = index_names_pair
.into_inner()
.map(|index_pair| MetadataKey::new(index_pair.as_str().to_string()))
.collect();
DBQuery::DropPredIndex {
store: StoreName(store.to_string()),
predicates,
error_if_not_exists: !if_exists,
}
}
Rule::drop_store => {
let mut inner_pairs = statement.into_inner();
let store = inner_pairs
.next()
.ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?
.as_str();
let if_exists = match inner_pairs.next() {
None => false,
Some(p) => {
if p.as_str().trim().to_lowercase() != "if exists" {
let start_pos = p.as_span().start_pos().pos();
let end_pos = p.as_span().end_pos().pos();
return Err(DslError::UnexpectedSpan((start_pos, end_pos)));
}
true
}
};
DBQuery::DropStore {
store: StoreName(store.to_string()),
error_if_not_exists: !if_exists,
}
}
_ => return Err(DslError::UnexpectedSpan((start_pos, end_pos))),
};
queries.push(query);
}
Ok(queries)
}
18 changes: 18 additions & 0 deletions ahnlich/dsl/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
use std::num::ParseIntError;

use crate::parser::Rule;
use thiserror::Error;

#[derive(Error, Debug)]
pub enum DslError {
#[error("Error parsing rule {0}")]
RuleParse(#[from] Box<pest::error::Error<Rule>>),
#[error("Unexpected Statement found {0:?}")]
UnexpectedSpan((usize, usize)),
#[error("Could not parse Hex string into image {0:?}")]
UnexpectedHex(String),
#[error("Could not parse string into nonzerousize {0:?}")]
NonZeroUsizeParse(#[from] ParseIntError),
#[error("Found unsupported algorithm {0}")]
UnsupportedAlgorithm(String),
}
Loading

0 comments on commit caecb1f

Please sign in to comment.