diff --git a/Cargo.lock b/Cargo.lock index 430abda..c1f4597 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1480,6 +1480,7 @@ dependencies = [ "criterion", "nom", "pretty_assertions", + "serde_json", ] [[package]] diff --git a/Makefile.toml b/Makefile.toml index 18b5f2f..883d9e7 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -75,7 +75,7 @@ args = ["fmt", "--all", "--", "--check"] [tasks.test] -env = { "RUN_MODE" = "test", "RUST_LOG" = "info" } +env = { "RUN_MODE" = "test", "RUST_LOG" = "info", "RUST_BACKTRACE" = "short" } command = "cargo" args = ["nextest", "run", "--workspace", "${@}"] diff --git a/scyllax-parser/Cargo.toml b/scyllax-parser/Cargo.toml index 5539c65..e5190d7 100644 --- a/scyllax-parser/Cargo.toml +++ b/scyllax-parser/Cargo.toml @@ -13,6 +13,7 @@ readme = 'crates.md' [dependencies] nom = "7" +serde_json = "1" [dev-dependencies] pretty_assertions = "1" diff --git a/scyllax-parser/src/comment.rs b/scyllax-parser/src/comment.rs new file mode 100644 index 0000000..874efbd --- /dev/null +++ b/scyllax-parser/src/comment.rs @@ -0,0 +1,45 @@ +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{alpha1, alphanumeric1}, + combinator::recognize, + multi::many0_count, + sequence::delimited, + IResult, +}; + +// matches a cql comment +// - `-- end of line comment` +// - `/* block comment */` (can be multiline) +// - `// end of line comment` +pub fn parse_comment(input: &str) -> IResult<&str, &str> { + alt(( + parse_line_comment, + parse_block_comment, + parse_line_comment_slash_slash, + ))(input) +} + +fn parse_line_comment(input: &str) -> IResult<&str, &str> { + delimited( + tag("--"), + recognize(many0_count(alt((alpha1, alphanumeric1, tag(" "))))), + tag("\n"), + )(input) +} + +fn parse_block_comment(input: &str) -> IResult<&str, &str> { + delimited( + tag("/*"), + recognize(many0_count(alt((alpha1, alphanumeric1, tag(" "))))), + tag("*/"), + )(input) +} + +fn parse_line_comment_slash_slash(input: &str) -> IResult<&str, &str> { + delimited( + tag("//"), + recognize(many0_count(alt((alpha1, alphanumeric1, tag(" "))))), + tag("\n"), + )(input) +} diff --git a/scyllax-parser/src/common.rs b/scyllax-parser/src/common.rs index c414480..0bb0a4c 100644 --- a/scyllax-parser/src/common.rs +++ b/scyllax-parser/src/common.rs @@ -72,14 +72,17 @@ pub enum Value { Literal(String), /// The value is a number Number(usize), + /// The value is a boolean + Boolean(bool), } /// Parses a [`Value`] pub fn parse_value(input: &str) -> IResult<&str, Value> { alt(( + map(parse_boolean, Value::Boolean), map(parse_variable, Value::Variable), map(parse_number, Value::Number), - map(parse_string, Value::Literal), + map(parse_string, Value::Literal), // must be last! ))(input) } @@ -87,7 +90,6 @@ pub fn parse_value(input: &str) -> IResult<&str, Value> { /// If there are any escaped quotes, they should be included in the output. /// e.g. `\"` should be parsed as `\"` /// - `foo` -> `foo` -/// TODO: - `"foo"` -> `"foo"` fn parse_string(input: &str) -> IResult<&str, String> { let (input, alpha) = alt(( // barf @@ -110,6 +112,16 @@ fn parse_number(input: &str) -> IResult<&str, usize> { Ok((input, number.parse().unwrap())) } +/// Parses a [`Value::Boolean`] +fn parse_boolean(input: &str) -> IResult<&str, bool> { + let (input, boolean) = alt(( + map(tag_no_case("true"), |_| true), + map(tag_no_case("false"), |_| false), + ))(input)?; + + Ok((input, boolean)) +} + /// Parses a Rust flavored variable wrapped in double quotes pub fn parse_string_escaped_rust_flavored_variable(input: &str) -> IResult<&str, String> { let (input, alpha) = delimited(tag("\""), parse_rust_flavored_variable, tag("\""))(input)?; diff --git a/scyllax-parser/src/create_keyspace.rs b/scyllax-parser/src/create_keyspace.rs new file mode 100644 index 0000000..2eafc54 --- /dev/null +++ b/scyllax-parser/src/create_keyspace.rs @@ -0,0 +1,276 @@ +//! Parses a create keyspace query. +//! ```ignore +//! create_keyspace_statement: CREATE KEYSPACE [ IF NOT EXISTS ] `keyspace_name` WITH `options` +//! ``` +//! ## Examples +//! ```cql,ignore +//! CREATE KEYSPACE Excalibur +//! WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1' : 1, 'DC2' : 3} +//! AND durable_writes = true; +//! ``` +//! +//! ```cql,ignore +//! CREATE KEYSPACE Excelsior +//! WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 3}; +//! ``` + +use std::collections::HashMap; + +use nom::{ + branch::alt, + bytes::complete::{tag, tag_no_case, take_while_m_n}, + character::complete::{alphanumeric0, char, multispace0}, + combinator::opt, + error::Error, + multi::separated_list0, + sequence::delimited, + Err, IResult, +}; + +use crate::{common::parse_rust_flavored_variable, r#where::parse_comparisons, Column, Value}; +#[derive(Debug, PartialEq)] +pub struct CreateKeyspaceQuery { + pub name: String, + pub if_not_exists: bool, + pub replication: ReplicationOption, + pub durable_writes: Option, +} + +#[derive(Debug, PartialEq)] +pub enum ReplicationOption { + SimpleStrategy(i32), + NetworkTopologyStrategy(HashMap), +} + +impl<'a> TryFrom<&'a str> for CreateKeyspaceQuery { + type Error = Err>; + + fn try_from(value: &'a str) -> Result { + Ok(parse_create_keyspace(value)?.1) + } +} + +pub fn parse_create_keyspace(input: &str) -> IResult<&str, CreateKeyspaceQuery> { + let (input, _) = tag_no_case("create keyspace ")(input)?; + let (input, if_not_exists) = parse_if_not_exists(input)?; + + let (input, name) = parse_keyspace_name(input)?; + + let (input, _) = multispace0(input)?; + let (input, replication) = parse_replication(input)?; + let (input, _) = multispace0(input)?; + let (input, durable_writes) = parse_durable_writes(input)?; + let (input, _) = opt(tag(";"))(input)?; + + Ok(( + input, + CreateKeyspaceQuery { + name, + if_not_exists, + replication, + durable_writes, + }, + )) +} + +fn parse_if_not_exists(input: &str) -> IResult<&str, bool> { + let (input, exists) = opt(tag_no_case("if not exists "))(input)?; + Ok((input, exists.is_some())) +} + +fn parse_keyspace_name(input: &str) -> IResult<&str, String> { + let (input, name) = parse_rust_flavored_variable(input)?; + Ok((input, name.to_string())) +} + +fn parse_replication(input: &str) -> IResult<&str, ReplicationOption> { + let (input, _) = tag_no_case("with replication =")(input)?; + let (input, strategy) = parse_replication_object(input)?; + + let class = strategy.get("class").unwrap(); + match *class { + "SimpleStrategy" => { + let replication_factor = strategy.get("replication_factor").unwrap(); + let replication_factor = replication_factor.parse::().unwrap(); + Ok((input, ReplicationOption::SimpleStrategy(replication_factor))) + } + "NetworkTopologyStrategy" => { + let mut map = HashMap::new(); + for (key, value) in strategy { + if key == "class" { + continue; + } + let value = value.parse::().unwrap(); + map.insert(key.to_string(), value); + } + Ok((input, ReplicationOption::NetworkTopologyStrategy(map))) + } + _ => panic!("Unknown replication strategy: {}", class), + } +} + +/// parse the weird json like replication strategy +/// eg: `{'class': 'NetworkTopologyStrategy', 'DC1' : 1, 'DC2' : 3}` +/// remember to parse the single quotes +fn parse_replication_object(input: &str) -> IResult<&str, HashMap<&str, &str>> { + let (input, _) = multispace0(input)?; + let (input, _) = tag("{")(input)?; + let (input, _) = multispace0(input)?; + let (input, pairs) = separated_list0(tag(","), parse_replication_pair)(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = tag("}")(input)?; + + let mut map = HashMap::new(); + for (key, value) in pairs { + map.insert(key, value); + } + + Ok((input, map)) +} + +// - 'class': 'NetworkTopologyStrategy' +// - 'DC1' : 1 +// - 'DC2' : 3 +/// remember to parse the single quotes and colon and command whitespaces +fn parse_replication_pair(input: &str) -> IResult<&str, (&str, &str)> { + let (input, _) = multispace0(input)?; + + let (input, key) = delimited(char('\''), parse_rust_flavored_variable, char('\''))(input)?; + + let (input, _) = multispace0(input)?; + let (input, _) = tag(":")(input)?; + let (input, _) = multispace0(input)?; + + let string_value = delimited(char('\''), alphanumeric0, char('\'')); + let int_value = take_while_m_n(1, usize::MAX, char::is_numeric); + + let (input, value) = alt((string_value, int_value))(input)?; + + let (input, _) = multispace0(input)?; + + Ok((input, (key, value))) +} + +fn parse_durable_writes(input: &str) -> IResult<&str, Option> { + let (input, comparisons) = opt(parse_comparisons)(input)?; + + let durable_writes = comparisons.and_then(|x| { + x.into_iter().find_map(|x| match x.column { + Column::Identifier(ref name) if name == "durable_writes" => Some(match x.value { + Value::Boolean(value) => value, + _ => panic!("Expected a boolean value for durable_writes"), + }), + _ => None, + }) + }); + + Ok((input, durable_writes)) +} + +#[cfg(test)] +mod test { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn test_simple_strategy() { + assert_eq!( + parse_create_keyspace( + "CREATE KEYSPACE Excalibur WITH replication = { 'class' : 'SimpleStrategy', 'replication_factor' : 3 } AND durable_writes = true;" + ), + Ok(( + "", + CreateKeyspaceQuery { + name: "Excalibur".to_string(), + if_not_exists: false, + replication: ReplicationOption::SimpleStrategy(3), + durable_writes: Some(true) + } + )) + ); + } + + #[test] + fn test_network_topology_strategy() { + assert_eq!( + parse_create_keyspace( + r#"CREATE KEYSPACE Excelsior WITH replication = { 'class': 'NetworkTopologyStrategy', 'DC1' : 1, 'DC2' : 3};"# + ), + Ok(( + "", + CreateKeyspaceQuery { + name: "Excelsior".to_string(), + if_not_exists: false, + replication: ReplicationOption::NetworkTopologyStrategy( + vec![("DC1".to_string(), 1), ("DC2".to_string(), 3)] + .into_iter() + .collect() + ), + durable_writes: None + } + )) + ); + } + + #[test] + fn test_if_not_exists() { + assert_eq!( + parse_create_keyspace( + r#"CREATE KEYSPACE IF NOT EXISTS Excelsior WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1' : 1, 'DC2' : 3};"# + ), + Ok(( + "", + CreateKeyspaceQuery { + name: "Excelsior".to_string(), + if_not_exists: true, + replication: ReplicationOption::NetworkTopologyStrategy( + vec![("DC1".to_string(), 1), ("DC2".to_string(), 3)] + .into_iter() + .collect() + ), + durable_writes: None + } + )) + ); + } + + #[test] + fn test_durable_writes() { + assert_eq!( + parse_create_keyspace( + r#"CREATE KEYSPACE Excelsior WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1' : 1, 'DC2' : 3} AND durable_writes = true;"# + ), + Ok(( + "", + CreateKeyspaceQuery { + name: "Excelsior".to_string(), + if_not_exists: false, + replication: ReplicationOption::NetworkTopologyStrategy( + vec![("DC1".to_string(), 1), ("DC2".to_string(), 3)] + .into_iter() + .collect() + ), + durable_writes: Some(true) + } + )) + ); + } + + #[test] + fn test_parse_replication_object() { + let res: HashMap<&str, &str> = vec![ + ("class", "NetworkTopologyStrategy"), + ("DC1", "1"), + ("DC2", "3"), + ] + .into_iter() + .collect(); + + assert_eq!( + parse_replication_object( + r#"{ 'class' : 'NetworkTopologyStrategy', 'DC1' : 1, 'DC2': 3}"# + ), + Ok(("", res)) + ); + } +} diff --git a/scyllax-parser/src/lib.rs b/scyllax-parser/src/lib.rs index 45e1e28..d76a477 100644 --- a/scyllax-parser/src/lib.rs +++ b/scyllax-parser/src/lib.rs @@ -1,17 +1,21 @@ //! A parser for CQL queries //! See the source code and tests for examples of usage (for now). +pub mod comment; pub mod common; +pub mod create_keyspace; pub mod delete; pub mod reserved; pub mod select; pub mod r#where; +use comment::parse_comment; pub use common::{Column, Value, Variable}; +use create_keyspace::CreateKeyspaceQuery; pub use delete::DeleteQuery; pub use r#where::{ComparisonOperator, WhereClause}; pub use select::SelectQuery; -use nom::{branch::alt, combinator::map, error::Error, Err, IResult}; +use nom::{branch::alt, combinator::map, error::Error, multi::many0, Err, IResult}; /// Represents a query /// ```rust @@ -43,16 +47,27 @@ pub enum Query { Select(SelectQuery), /// A delete query Delete(DeleteQuery), + /// A create keyspace query + CreateKeyspace(CreateKeyspaceQuery), } /// Parse a CQL query. pub fn parse_query(input: &str) -> IResult<&str, Query> { - let trimmed = input.trim(); + // trim whitespace + let input = input.trim(); + // strip comments + let (input, _) = many0(parse_comment)(input)?; + let input = input.trim(); + println!("input: {input:#?}"); alt(( map(select::parse_select, Query::Select), map(delete::parse_delete, Query::Delete), - ))(trimmed) + map( + create_keyspace::parse_create_keyspace, + Query::CreateKeyspace, + ), + ))(input) } impl<'a> TryFrom<&'a str> for Query { @@ -63,6 +78,19 @@ impl<'a> TryFrom<&'a str> for Query { } } +/// Parse a file that can contain multiple CQL queries. The queries are separated by a semicolon. +/// There may be an indeterminate number of newlines between the semicolon and the next query. +pub fn parse_query_file(input: &str) -> IResult<&str, Vec> { + let trimmed = input.trim(); + + let (input, queries) = nom::multi::separated_list1( + nom::character::complete::multispace0, + nom::sequence::terminated(parse_query, nom::character::complete::multispace0), + )(trimmed)?; + + Ok((input, queries)) +} + #[cfg(test)] mod test { use super::*; @@ -70,9 +98,15 @@ mod test { #[test] fn test_query_select() { - let query = Query::try_from( - "select id, name, age from person where id = :id and name = :name and age > ? limit 10", - ); + let query = "/* this is a comment */ select id, name, age + from person + where id = :id + and name = :name + and age > ? + limit 10"; + println!("query: {:#?}", query); + + let query = Query::try_from(query); assert_eq!( query, diff --git a/scyllax-parser/src/where.rs b/scyllax-parser/src/where.rs index f29d517..965168b 100644 --- a/scyllax-parser/src/where.rs +++ b/scyllax-parser/src/where.rs @@ -8,8 +8,8 @@ use nom::{ branch::alt, bytes::complete::{tag, tag_no_case}, - character::complete::multispace1, - combinator::map, + character::complete::multispace0, + combinator::{map, opt}, multi::separated_list0, IResult, }; @@ -19,6 +19,17 @@ use crate::common::{ Value, }; +/// Parses a condition. +/// - eg: `id = ?` +/// - eg: durable_writes = true +pub fn parse_comparisons(input: &str) -> IResult<&str, Vec> { + // remove leading and if exists + let (input, _) = opt(tag_no_case("and"))(input)?; + let (input, _) = multispace0(input)?; + + separated_list0(tag_no_case("and"), parse_where_condition)(input) +} + /// Parses a where clause with the following format: /// /// `where ` @@ -30,7 +41,7 @@ use crate::common::{ pub fn parse_where_clause(input: &str) -> IResult<&str, Vec> { let (input, _) = tag_no_case("where ")(input)?; - separated_list0(tag_no_case(" and "), parse_where_condition)(input) + separated_list0(tag_no_case("and"), parse_where_condition)(input) } /// Represents a single `where` clause on a CQL statement @@ -79,13 +90,21 @@ fn parse_where_column(input: &str) -> IResult<&str, String> { } /// Parses a single where condition -fn parse_where_condition(input: &str) -> IResult<&str, WhereClause> { +pub fn parse_where_condition(input: &str) -> IResult<&str, WhereClause> { + // eat leading whitespace + let (input, _) = multispace0(input)?; + let (input, column) = parse_where_column(input)?; - let (input, _) = multispace1(input)?; + let (input, _) = multispace0(input)?; let (input, operator) = parse_comparison_operator(input)?; - let (input, _) = multispace1(input)?; + let (input, _) = multispace0(input)?; let (input, value) = parse_value(input)?; + // eat trailing whitespace + let (input, _) = multispace0(input)?; + // eat trailing semicolon + let (input, _) = opt(tag(";"))(input)?; + Ok(( input, WhereClause {