From de8c6164cf24a52be587af31e530279c6496d801 Mon Sep 17 00:00:00 2001 From: Maxim Svistunov Date: Sun, 4 Feb 2024 13:32:55 +0100 Subject: [PATCH 1/5] Resolve TODOs in parsing Return error for queries containing non-ASCII characters Allow underscores in identifiers Add a delete statement test with spaces Remove trailing spaces and semicolons from tests and parsers Complete the multiple statement parser TODO --- parser/src/core.rs | 56 ++++++++++++++++++++++++++++++++---- parser/src/parsing/common.rs | 25 ++++++++++++---- parser/src/parsing/create.rs | 12 ++++---- parser/src/parsing/delete.rs | 11 ++++--- parser/src/parsing/index.rs | 6 ++-- parser/src/parsing/insert.rs | 6 ++-- parser/src/parsing/select.rs | 11 ++----- 7 files changed, 89 insertions(+), 38 deletions(-) diff --git a/parser/src/core.rs b/parser/src/core.rs index 75d2256..af348e4 100644 --- a/parser/src/core.rs +++ b/parser/src/core.rs @@ -1,6 +1,6 @@ use crate::syntax::RawQuerySyntax; use minisql::{interpreter::DbSchema, operation::Operation}; -use nom::{branch::alt, IResult}; +use nom::{branch::alt, character::complete::{multispace0, char}, multi::many1, sequence::{delimited, terminated}, IResult}; use thiserror::Error; use crate::{ @@ -19,6 +19,7 @@ pub enum Error { ValidationError(#[from] ValidationError), } +/// Parse single statement fn parse_statement(input: &str) -> IResult<&str, RawQuerySyntax> { alt(( parse_insert, @@ -31,14 +32,59 @@ fn parse_statement(input: &str) -> IResult<&str, RawQuerySyntax> { ))(input) } +/// Parse one or more statements +fn parse_statement1(input: &str) -> IResult<&str, Vec> { + many1(terminated(parse_statement, delimited(multispace0, char(';'), multispace0)))(input) +} + pub fn parse_and_validate(str_query: String, db_schema: &DbSchema) -> Result { + if let Some(non_ascii) = str_query.chars().find(|c| c.len_utf8() > 1) { + return Err(Error::ParsingError( + format!("Non ASCII character found: {}", non_ascii) + )); + } let (_, op) = parse_statement(str_query.as_str()).map_err(|err| Error::ParsingError(err.to_string()))?; Ok(validate_operation(op, db_schema)?) } -// #[test] -// fn test_select() { -// parse_and_validate("SELECT * FROM users;".to_string(), &Vec::new()).unwrap(); -// } +#[cfg(test)] +mod test { + use crate::core::parse_statement1; + use crate::parse_and_validate; + use crate::Error; + #[test] + fn test_non_unicode() { + let result = parse_and_validate(format!("SELECT * FROM users WHERE name = \"京\""), &Default::default()); + assert!(matches!(result, Err(Error::ParsingError(_)))); + if let Err(Error::ParsingError(err)) = result { + assert_eq!(err, format!("Non ASCII character found: {}", '京')); + } + } + + #[test] + fn test_parse_two_select() { + let (rest, sntx) = parse_statement1("SELECT * FROM users ; SELECT * FROM cities ; ").expect("should parse"); + assert_eq!( + sntx.len(), + 2 + ); + assert_eq!(rest, ""); + } + + #[test] + fn test_parse_three_insert_one_select() { + let (rest, sntx) = parse_statement1( + r#"INSERT INTO table1 (id, data) VALUES (u1, 2); + SELECT * FROM users ; + INSERT INTO table1 (id, data) VALUES (u4, 30) ; + INSERT INTO table1 (id, data) VALUES (u5, 40) ; + "#).expect("should parse"); + assert_eq!( + sntx.len(), + 4 + ); + assert_eq!(rest, ""); + } +} diff --git a/parser/src/parsing/common.rs b/parser/src/parsing/common.rs index fac2a0a..9b85260 100644 --- a/parser/src/parsing/common.rs +++ b/parser/src/parsing/common.rs @@ -1,8 +1,8 @@ use minisql::type_system::DbType; use nom::{ branch::alt, - bytes::complete::tag, - character::complete::{alphanumeric1, anychar, char, multispace0, multispace1}, + bytes::complete::{tag, take_while}, + character::{complete::{alphanumeric1, anychar, char, multispace0, multispace1}, is_alphanumeric}, combinator::peek, error::make_error, sequence::{delimited, terminated}, @@ -20,10 +20,11 @@ pub fn parse_table_name(input: &str) -> IResult<&str, &str> { } pub fn parse_identifier(input: &str) -> IResult<&str, &str> { - // TODO: allow underscores let (_, first) = peek(anychar)(input)?; - if first.is_alphabetic() { - alphanumeric1(input) + if first.is_alphabetic() || first == '_' { + take_while(|c: char| { + is_alphanumeric(c as u8) || c == '_' + })(input) } else { Err(nom::Err::Error(make_error( input, @@ -77,7 +78,7 @@ fn parse_equality(input: &str) -> IResult<&str, Condition> { mod tests { use minisql::type_system::DbType; - use crate::parsing::common::{parse_db_type, parse_equality}; + use crate::parsing::common::{parse_db_type, parse_equality, parse_identifier}; use crate::syntax::Condition; #[test] @@ -114,4 +115,16 @@ mod tests { )); assert!(matches!(parse_db_type("Unknown"), Err(_))); } + + #[test] + fn test_parse_identifier() { + assert_eq!( + parse_identifier("_variable__Test").expect("should parse").1, + "_variable__Test" + ); + assert!(matches!( + parse_identifier("123_variable__Test"), + Err(_) + )); + } } diff --git a/parser/src/parsing/create.rs b/parser/src/parsing/create.rs index f13e2bf..6ab7697 100644 --- a/parser/src/parsing/create.rs +++ b/parser/src/parsing/create.rs @@ -22,8 +22,6 @@ pub fn parse_create(input: &str) -> IResult<&str, RawQuerySyntax> { let (input, column_definitions) = parse_column_definitions(input)?; let (input, _) = char(')')(input)?; - let (input, _) = multispace0(input)?; - let (input, _) = char(';')(input)?; let schema = RawTableSchema { table_name: table_name.to_string(), columns: column_definitions, @@ -66,32 +64,32 @@ mod tests { #[test] fn test_parse_create_no_spaces() { - parse_create("CREATE TABLE \"Table1\"(id UUID ,column1 INT);").expect("should parse"); + parse_create("CREATE TABLE \"Table1\"(id UUID ,column1 INT)").expect("should parse"); } #[test] fn test_parse_create_primary_key() { - parse_create("CREATE TABLE \"Table1\"(id UUID PRIMARY KEY,column1 INT);") + parse_create("CREATE TABLE \"Table1\"(id UUID PRIMARY KEY,column1 INT)") .expect("should parse"); } #[test] fn test_parse_create_no_quotes_table_name() { - parse_create("CREATE TABLE Table1(id UUID PRIMARY KEY,column1 INT);") + parse_create("CREATE TABLE Table1(id UUID PRIMARY KEY,column1 INT)") .expect("should parse"); } #[test] fn test_parse_create_primary_key_with_spaces() { parse_create( - "CREATE TABLE \"Table1\" ( id UUID PRIMARY KEY , column1 INT ) ;", + "CREATE TABLE \"Table1\" ( id UUID PRIMARY KEY , column1 INT )", ) .expect("should parse"); } #[test] fn test_parse_create() { - let (_, create) = parse_create("CREATE TABLE \"Table1\"( id UUID , column1 INT );") + let (_, create) = parse_create("CREATE TABLE \"Table1\"( id UUID , column1 INT )") .expect("should parse"); assert!(matches!(create, RawQuerySyntax::CreateTable(_))); match create { diff --git a/parser/src/parsing/delete.rs b/parser/src/parsing/delete.rs index 2cbe88f..b7f3ac1 100644 --- a/parser/src/parsing/delete.rs +++ b/parser/src/parsing/delete.rs @@ -15,8 +15,6 @@ pub fn parse_delete(input: &str) -> IResult<&str, RawQuerySyntax> { let (input, table_name) = parse_table_name(input)?; let (input, _) = multispace0(input)?; let (input, condition) = parse_condition(input)?; - let (input, _) = multispace0(input)?; - let (input, _) = char(';')(input)?; Ok(( input, RawQuerySyntax::Delete(table_name.to_string(), condition), @@ -31,9 +29,14 @@ mod tests { #[test] fn test_parse_delete() { let (_, operation) = - parse_delete("DELETE FROM \"T1\" WHERE id = 1 ;").expect("should parse"); + parse_delete("DELETE FROM \"T1\" WHERE id = 1").expect("should parse"); assert!(matches!(operation, RawQuerySyntax::Delete(_, _))) } - // TODO: add test with condition + #[test] + fn test_parse_delete_with_spaces() { + let (_, operation) = + parse_delete("DELETE FROM T1 WHERE id = 1").expect("should parse"); + assert!(matches!(operation, RawQuerySyntax::Delete(_, _))) + } } diff --git a/parser/src/parsing/index.rs b/parser/src/parsing/index.rs index 4a38b32..a2941b8 100644 --- a/parser/src/parsing/index.rs +++ b/parser/src/parsing/index.rs @@ -30,8 +30,6 @@ pub fn parse_create_index(input: &str) -> IResult<&str, RawQuerySyntax> { let (input, column_name) = parse_identifier(input)?; let (input, _) = multispace0(input)?; let (input, _) = char(')')(input)?; - let (input, _) = multispace0(input)?; - let (input, _) = char(';')(input)?; let operation = RawQuerySyntax::CreateIndex(table_name.to_string(), column_name.to_string()); Ok((input, operation)) } @@ -44,7 +42,7 @@ mod tests { #[test] fn test_create_index() { let (_, syntax) = - parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" (email);") + parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" (email)") .expect("should parse"); assert!(matches!(syntax, RawQuerySyntax::CreateIndex(_, _))); match syntax { @@ -59,7 +57,7 @@ mod tests { #[test] fn test_create_index_with_spaces() { let (_, syntax) = parse_create_index( - "CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" ( email ) ;", + "CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" ( email )", ) .expect("should parse"); assert!(matches!(syntax, RawQuerySyntax::CreateIndex(_, _))); diff --git a/parser/src/parsing/insert.rs b/parser/src/parsing/insert.rs index 2dfa061..1048559 100644 --- a/parser/src/parsing/insert.rs +++ b/parser/src/parsing/insert.rs @@ -33,8 +33,6 @@ pub fn parse_insert(input: &str) -> IResult<&str, RawQuerySyntax> { let (input, values) = parse_values(input)?; let (input, _) = multispace0(input)?; let (input, _) = char(')')(input)?; - let (input, _) = multispace0(input)?; - let (input, _) = char(';')(input)?; Ok(( input, RawQuerySyntax::Insert( @@ -64,7 +62,7 @@ mod tests { #[test] fn test_parse_insert() { - let sql = "INSERT INTO \"MyTable\" (id, data) VALUES(1, \"Text\");"; + let sql = "INSERT INTO \"MyTable\" (id, data) VALUES(1, \"Text\")"; let syntax = parse_insert(sql).expect("should parse"); match syntax { ("", RawQuerySyntax::Insert(table_name, insertion_values)) => { @@ -89,7 +87,7 @@ mod tests { #[test] fn test_parse_insert_with_spaces() { let sql = - "INSERT INTO \"MyTable\" ( id, data ) VALUES ( 1, \"Text\" ) ;"; + "INSERT INTO \"MyTable\" ( id, data ) VALUES ( 1, \"Text\" )"; let operation = parse_insert(sql).expect("should parse"); match operation { ("", RawQuerySyntax::Insert(table_name, insertion_values)) => { diff --git a/parser/src/parsing/select.rs b/parser/src/parsing/select.rs index 3d14bd6..0f8094d 100644 --- a/parser/src/parsing/select.rs +++ b/parser/src/parsing/select.rs @@ -22,9 +22,6 @@ pub fn parse_select(input: &str) -> IResult<&str, RawQuerySyntax> { let (input, table_name) = parse_table_name(input)?; let (input, _) = multispace0(input)?; let (input, condition) = parse_condition(input)?; - let (input, _) = multispace0(input)?; - // TODO: make it optional? - let (input, _) = tag(";")(input)?; Ok(( input, RawQuerySyntax::Select(table_name.to_string(), column_selection, condition), @@ -52,7 +49,7 @@ mod tests { #[test] fn test_parse_select_all() { - let sql = "SELECT * FROM \"MyTable\";"; + let sql = "SELECT * FROM \"MyTable\""; let operation = parse_select(sql).expect("should parse"); match operation { ("", RawQuerySyntax::Select(table_name, column_selection, maybe_condition)) => { @@ -79,7 +76,7 @@ mod tests { #[test] fn test_parse_select_columns() { - let sql = "SELECT name , email FROM \"AddressBook\" ;"; + let sql = "SELECT name , email FROM \"AddressBook\""; let operation = parse_select(sql).expect("should parse"); match operation { ("", RawQuerySyntax::Select(table_name, column_selection, maybe_condition)) => { @@ -105,7 +102,7 @@ mod tests { #[test] fn test_parse_select_where() { use crate::syntax::Condition; - let sql = "SELECT * FROM \"AddressBook\" WHERE id = 5 ;"; + let sql = "SELECT * FROM \"AddressBook\" WHERE id = 5"; let operation = parse_select(sql).expect("should parse"); match operation { ("", RawQuerySyntax::Select(table_name, column_selection, maybe_condition)) => { @@ -119,6 +116,4 @@ mod tests { } } } - // TODO: a test with multiple statements - // TODO: allow underscores in identifiers } From 6245dba4f0f2b8c59f40d4b32f143ad011590b70 Mon Sep 17 00:00:00 2001 From: Maxim Svistunov Date: Sun, 4 Feb 2024 13:46:31 +0100 Subject: [PATCH 2/5] Parsing and validation for Option Add option type and option value parsers value->literal in parser, implement Option literal --- parser/src/parsing/common.rs | 63 +++++++++++------- parser/src/parsing/create.rs | 43 +++++++++++++ parser/src/parsing/delete.rs | 26 ++++++-- parser/src/parsing/insert.rs | 51 +++++++++++---- parser/src/parsing/literal.rs | 118 +++++++++++++++++++++------------- parser/src/parsing/select.rs | 48 +++++++++++++- parser/src/syntax.rs | 7 +- parser/src/validation.rs | 75 +++++++++++++++------ 8 files changed, 322 insertions(+), 109 deletions(-) diff --git a/parser/src/parsing/common.rs b/parser/src/parsing/common.rs index 9b85260..3c9a585 100644 --- a/parser/src/parsing/common.rs +++ b/parser/src/parsing/common.rs @@ -1,16 +1,10 @@ use minisql::type_system::DbType; use nom::{ - branch::alt, - bytes::complete::{tag, take_while}, - character::{complete::{alphanumeric1, anychar, char, multispace0, multispace1}, is_alphanumeric}, - combinator::peek, - error::make_error, - sequence::{delimited, terminated}, - IResult, + branch::alt, bytes::complete::{tag, take_while}, character::{complete::{alphanumeric1, anychar, char, multispace0, multispace1}, is_alphanumeric}, combinator::peek, error::make_error, sequence::{delimited, terminated}, IResult, Parser }; -use super::literal::parse_db_value; use crate::syntax::Condition; +use super::literal::parse_literal; pub fn parse_table_name(input: &str) -> IResult<&str, &str> { alt(( @@ -38,19 +32,22 @@ pub fn parse_column_name(input: &str) -> IResult<&str, String> { } pub fn parse_db_type(input: &str) -> IResult<&str, DbType> { - let (input, type_name) = alt((tag("STRING"), tag("INT"), tag("NUMBER"), tag("UUID")))(input)?; - let db_type = match type_name { - "STRING" => DbType::String, - "INT" => DbType::Int, - "UUID" => DbType::Uuid, - "NUMBER" => DbType::Number, - _ => { - return Err(nom::Err::Failure(make_error( - input, - nom::error::ErrorKind::IsNot, - ))) - } - }; + let (input, db_type) = alt( + ( + tag("STRING") + .map(|_| DbType::String), + tag("INT") + .map(|_| DbType::Int), + tag("NUMBER") + .map(|_| DbType::Number), + tag("UUID") + .map(|_| DbType::Uuid), + delimited(tag("Option("), parse_db_type, tag(")")) + .map(|ty| { + DbType::Option(Box::new(ty)) + }) + ) + )(input)?; Ok((input, db_type)) } @@ -70,8 +67,8 @@ fn parse_equality(input: &str) -> IResult<&str, Condition> { let (input, _) = multispace0(input)?; let (input, _) = char('=')(input)?; let (input, _) = multispace0(input)?; - let (input, db_value) = parse_db_value(input)?; - Ok((input, Condition::Eq(column_name, db_value))) + let (input, lit) = parse_literal(input)?; + Ok((input, Condition::Eq(column_name, lit))) } #[cfg(test)] @@ -79,15 +76,15 @@ mod tests { use minisql::type_system::DbType; use crate::parsing::common::{parse_db_type, parse_equality, parse_identifier}; + use crate::parsing::literal::Literal; use crate::syntax::Condition; #[test] fn test_parse_equality() { - use minisql::type_system::Value; match parse_equality("id = 1") { Ok(("", Condition::Eq(column_name, value))) => { assert!(column_name.eq("id")); - assert_eq!(value, Value::Int(1)) + assert_eq!(value, Literal::Int(1)) } _ => { panic!("should parse"); @@ -127,4 +124,20 @@ mod tests { Err(_) )); } + + #[test] + fn test_parse_option_string_type() { + assert_eq!( + parse_db_type("Option(STRING)").expect("should parse").1, + DbType::Option(Box::new(DbType::String)) + ); + } + + #[test] + fn test_parse_nested_option_int_type() { + assert_eq!( + parse_db_type("Option(Option(Option(INT)))").expect("should parse").1, + DbType::Option(Box::new(DbType::Option(Box::new(DbType::Option(Box::new(DbType::Int)))))) + ); + } } diff --git a/parser/src/parsing/create.rs b/parser/src/parsing/create.rs index 6ab7697..1f4f7b1 100644 --- a/parser/src/parsing/create.rs +++ b/parser/src/parsing/create.rs @@ -59,6 +59,8 @@ fn parse_column_definition(input: &str) -> IResult<&str, ColumnSchema> { #[cfg(test)] mod tests { + use minisql::type_system::DbType; + use crate::parsing::create::parse_create; use crate::syntax::RawQuerySyntax; @@ -112,4 +114,45 @@ mod tests { _ => {} } } + + #[test] + fn test_parse_create_option() { + let (_, create) = parse_create("CREATE TABLE games (id UUID PRIMARY KEY, name STRING, year Option(INT), price NUMBER)") + .expect("should parse"); + assert!(matches!(create, RawQuerySyntax::CreateTable(_))); + match create { + RawQuerySyntax::CreateTable(schema) => { + assert_eq!(schema.table_name, "games"); + assert_eq!(schema.number_of_columns(), 4); + + let result_id = schema.get_column(&"id".to_string()); + assert!(matches!(result_id, Some(_))); + let Some(id_column) = result_id else { panic!() }; + assert_eq!(id_column.column_name, "id".to_string()); + + let result_column1 = schema.get_column(&"name".to_string()); + assert!(matches!(result_column1, Some(_))); + let Some(column1_column) = result_column1 else { + panic!() + }; + assert_eq!(column1_column.column_name, "name".to_string()); + assert_eq!(column1_column.type_, DbType::String); + + let column = schema.get_column(&"year".to_string()); + let Some(column) = column else { + panic!() + }; + assert_eq!(column.column_name, "year".to_string()); + assert_eq!(column.type_, DbType::Option(Box::new(DbType::Int))); + + let column = schema.get_column(&"price".to_string()); + let Some(column) = column else { + panic!() + }; + assert_eq!(column.column_name, "price".to_string()); + assert_eq!(column.type_, DbType::Number); + } + _ => {} + } + } } diff --git a/parser/src/parsing/delete.rs b/parser/src/parsing/delete.rs index b7f3ac1..56e56f3 100644 --- a/parser/src/parsing/delete.rs +++ b/parser/src/parsing/delete.rs @@ -1,6 +1,6 @@ use nom::{ bytes::complete::tag, - character::complete::{char, multispace0, multispace1}, + character::complete::{multispace0, multispace1}, IResult, }; @@ -24,19 +24,33 @@ pub fn parse_delete(input: &str) -> IResult<&str, RawQuerySyntax> { #[cfg(test)] mod tests { use crate::parsing::delete::parse_delete; - use crate::syntax::RawQuerySyntax; + use crate::parsing::literal::Literal; + use crate::syntax::{Condition, RawQuerySyntax}; #[test] fn test_parse_delete() { - let (_, operation) = + let (_, sntx) = parse_delete("DELETE FROM \"T1\" WHERE id = 1").expect("should parse"); - assert!(matches!(operation, RawQuerySyntax::Delete(_, _))) + assert!(matches!(sntx, RawQuerySyntax::Delete(_, _))) } #[test] fn test_parse_delete_with_spaces() { - let (_, operation) = + let (_, sntx) = parse_delete("DELETE FROM T1 WHERE id = 1").expect("should parse"); - assert!(matches!(operation, RawQuerySyntax::Delete(_, _))) + assert!(matches!(sntx, RawQuerySyntax::Delete(_, _))) + } + + #[test] + fn test_parse_delete_none() { + let (_, sntx) = + parse_delete("DELETE FROM games WHERE year = None").expect("should parse"); + if let RawQuerySyntax::Delete(tname, Some(Condition::Eq(column_name, lit))) = sntx { + assert_eq!(tname, "games".to_string()); + assert_eq!(column_name, "year".to_string()); + assert_eq!(lit, Literal::None) + } else { + panic!() + } } } diff --git a/parser/src/parsing/insert.rs b/parser/src/parsing/insert.rs index 1048559..e4c5328 100644 --- a/parser/src/parsing/insert.rs +++ b/parser/src/parsing/insert.rs @@ -1,9 +1,8 @@ use super::{ common::{parse_identifier, parse_table_name}, - literal::parse_db_value, + literal::{parse_literal, Literal}, }; use crate::syntax::RawQuerySyntax; -use minisql::type_system::Value; use nom::{ bytes::complete::tag, character::complete::{char, multispace0, multispace1}, @@ -49,16 +48,14 @@ pub fn parse_columns(input: &str) -> IResult<&str, Vec> { )(input) } -pub fn parse_values(input: &str) -> IResult<&str, Vec> { - separated_list0(terminated(char(','), multispace0), parse_db_value)(input) +pub fn parse_values(input: &str) -> IResult<&str, Vec> { + separated_list0(terminated(char(','), multispace0), parse_literal)(input) } #[cfg(test)] mod tests { - use minisql::type_system::Value; - use super::parse_insert; - use crate::syntax::RawQuerySyntax; + use crate::{parsing::literal::Literal, syntax::RawQuerySyntax}; #[test] fn test_parse_insert() { @@ -70,10 +67,10 @@ mod tests { assert_eq!( insertion_values, vec![ - ("id".to_string(), Value::Int(1)), + ("id".to_string(), Literal::Int(1)), ( "data".to_string(), - Value::String("Text".to_string()) + Literal::String("Text".to_string()) ) ] ); @@ -95,10 +92,10 @@ mod tests { assert_eq!( insertion_values, vec![ - ("id".to_string(), Value::Int(1)), + ("id".to_string(), Literal::Int(1)), ( "data".to_string(), - Value::String("Text".to_string()) + Literal::String("Text".to_string()) ) ] ); @@ -108,4 +105,36 @@ mod tests { } } } + + #[test] + fn test_parse_insert_option() { + let sql = r#"INSERT INTO games (id, name, year, price) VALUES (u12345, "Doom", Some(1993), 6.5);"#; + let syntax = parse_insert(sql).expect("should parse"); + match syntax { + (";", RawQuerySyntax::Insert(table_name, insertion_values)) => { + assert_eq!(table_name, "games"); + assert_eq!( + insertion_values, + vec![ + ("id".to_string(), Literal::Uuid(12345)), + ( + "name".to_string(), + Literal::String("Doom".to_string()) + ), + ( + "year".to_string(), + Literal::Some(Box::new(Literal::Int(1993))) + ), + ( + "price".to_string(), + Literal::Number(6.5) + ) + ] + ); + } + _ => { + panic!() + } + } + } } diff --git a/parser/src/parsing/literal.rs b/parser/src/parsing/literal.rs index 8ca72e8..886798e 100644 --- a/parser/src/parsing/literal.rs +++ b/parser/src/parsing/literal.rs @@ -1,19 +1,23 @@ -use minisql::type_system::Value; +use minisql::type_system::DbType; use nom::{ - branch::alt, - character::complete::{char, digit1, none_of, u64}, - combinator::opt, - error::make_error, - multi::many0, - sequence::{delimited, pair, preceded}, - IResult, + branch::alt, bytes::complete::tag, character::complete::{char, digit1, none_of, u64}, combinator::opt, error::make_error, multi::many0, sequence::{delimited, pair, preceded}, IResult, Parser }; -pub fn parse_db_value(input: &str) -> IResult<&str, Value> { - alt((parse_string, parse_number, parse_int, parse_uuid))(input) +#[derive(Debug, PartialEq)] +pub enum Literal { + Number(f64), + String(String), + Int(u64), + Uuid(u64), + Some(Box), + None, } -pub fn parse_number(input: &str) -> IResult<&str, Value> { +pub fn parse_literal(input: &str) -> IResult<&str, Literal> { + alt((parse_option, parse_string, parse_number, parse_int, parse_uuid))(input) +} + +pub fn parse_number(input: &str) -> IResult<&str, Literal> { // Parse the integer part let (input, (sign, digits)) = pair(opt(char('-')), digit1)(input)?; @@ -28,19 +32,19 @@ pub fn parse_number(input: &str) -> IResult<&str, Value> { let value = combined_parts .parse::() .map_err(|_| nom::Err::Failure(make_error(input, nom::error::ErrorKind::Fail)))?; - Ok((input, Value::Number(value))) + Ok((input, Literal::Number(value))) } None => { let value = format!("{}{}", sign.unwrap_or('+'), digits) .parse::() .map_err(|_| nom::Err::Failure(make_error(input, nom::error::ErrorKind::Fail)))?; - Ok((input, Value::Int(value))) + Ok((input, Literal::Int(value))) } } } -pub fn parse_int(input: &str) -> IResult<&str, Value> { - u64(input).map(|(input, v)| (input, Value::Int(v))) +pub fn parse_int(input: &str) -> IResult<&str, Literal> { + u64(input).map(|(input, v)| (input, Literal::Int(v))) } fn escape_tab(input: &str) -> IResult<&str, char> { @@ -67,7 +71,7 @@ fn escape_doublequote(input: &str) -> IResult<&str, char> { preceded(char('\\'), char('"'))(input) } -pub fn parse_string(input: &str) -> IResult<&str, Value> { +pub fn parse_string(input: &str) -> IResult<&str, Literal> { // Parse the content inside the double quotes let (input, content) = delimited( char('"'), @@ -85,19 +89,27 @@ pub fn parse_string(input: &str) -> IResult<&str, Value> { // Combine the characters into a string let value: String = content.into_iter().collect(); - Ok((input, Value::String(value))) + Ok((input, Literal::String(value))) } -pub fn parse_uuid(input: &str) -> IResult<&str, Value> { +pub fn parse_uuid(input: &str) -> IResult<&str, Literal> { let (input, value) = pair(char('u'), u64)(input) - .map(|(input, (_, v))| (input, Value::Uuid(v)))?; + .map(|(input, (_, v))| (input, Literal::Uuid(v)))?; Ok((input, value)) } +pub fn parse_option(input: &str) -> IResult<&str, Literal> { + let (input, inner) = alt((tag("None") + .map(|_| Literal::None), delimited(tag("Some("), parse_literal, tag(")")).map(|v| { + Literal::Some(Box::new(v)) + })))(input)?; + Ok((input, inner)) +} + #[cfg(test)] mod tests { - use crate::parsing::literal::{parse_db_value, parse_string, parse_uuid}; - use minisql::type_system::Value; + use crate::parsing::literal::{parse_literal, parse_option, parse_string, parse_uuid, Literal}; + use minisql::type_system::DbType; #[test] fn test_string_parser() { @@ -105,42 +117,42 @@ mod tests { parse_string(r#""simple""#), Ok(( "", - Value::String(String::from("simple")) + Literal::String(String::from("simple")) )) ); assert_eq!( parse_string(r#""\"\t\r\n\\""#), Ok(( "", - Value::String(String::from("\"\t\r\n\\")) + Literal::String(String::from("\"\t\r\n\\")) )) ); assert_eq!( parse_string(r#""name is \"John\".""#), Ok(( "", - Value::String(String::from("name is \"John\".")) + Literal::String(String::from("name is \"John\".")) )) ); } #[test] fn test_parse_db_value() { - let (input, value) = parse_db_value("5").expect("should parse"); + let (input, value) = parse_literal("5").expect("should parse"); assert_eq!(input, ""); - assert_eq!(value, Value::Int(5)); + assert_eq!(value, Literal::Int(5)); - let (input, value) = parse_db_value("5.5").expect("should parse"); + let (input, value) = parse_literal("5.5").expect("should parse"); assert_eq!(input, ""); - assert_eq!(value, Value::Number(5.5)); + assert_eq!(value, Literal::Number(5.5)); - let (_, _) = parse_db_value("\"STRING\"").expect("should parse"); + let (_, _) = parse_literal("\"STRING\"").expect("should parse"); let (input, value) = - parse_db_value("\"abcdefghkjklmnopqrstuvwxyz!@#$%^&*()_+ \"").expect("should parse"); + parse_literal("\"abcdefghkjklmnopqrstuvwxyz!@#$%^&*()_+ \"").expect("should parse"); assert_eq!(input, ""); assert_eq!( value, - Value::String( + Literal::String( "abcdefghkjklmnopqrstuvwxyz!@#$%^&*()_+ ".to_string() ) ); @@ -149,41 +161,41 @@ mod tests { #[test] fn test_parse_positive_float() { assert_eq!( - parse_db_value("23.213313"), - Ok(("", Value::Number(23.213313))) + parse_literal("23.213313"), + Ok(("", Literal::Number(23.213313))) ); assert_eq!( - parse_db_value("2241.9734"), - Ok(("", Value::Number(2241.9734))) + parse_literal("2241.9734"), + Ok(("", Literal::Number(2241.9734))) ); } #[test] fn test_parse_negative_float() { assert_eq!( - parse_db_value("-9241.873654"), - Ok(("", Value::Number(-9241.873654))) + parse_literal("-9241.873654"), + Ok(("", Literal::Number(-9241.873654))) ); assert_eq!( - parse_db_value("-62625.0"), - Ok(("", Value::Number(-62625.0))) + parse_literal("-62625.0"), + Ok(("", Literal::Number(-62625.0))) ); } #[test] fn test_parse_float_between_0_and_1() { assert_eq!( - parse_db_value("0.873654"), - Ok(("", Value::Number(0.873654))) + parse_literal("0.873654"), + Ok(("", Literal::Number(0.873654))) ); - assert_eq!(parse_db_value("0.62625"), Ok(("", Value::Number(0.62625)))); + assert_eq!(parse_literal("0.62625"), Ok(("", Literal::Number(0.62625)))); } #[test] fn test_parse_int() { assert_eq!( - parse_db_value("5134616"), - Ok(("", Value::Int(5134616))) + parse_literal("5134616"), + Ok(("", Literal::Int(5134616))) ); } @@ -191,7 +203,23 @@ mod tests { fn test_parse_uuid() { assert_eq!( parse_uuid("u131515"), - Ok(("", Value::Uuid(131515))) + Ok(("", Literal::Uuid(131515))) + ) + } + + #[test] + fn test_parse_option_int() { + assert_eq!( + parse_option("Some(2)"), + Ok(("", Literal::Some(Box::new(Literal::Int(2))))) + ); + assert_eq!( + parse_option("Some(Some(3))"), + Ok(("", Literal::Some(Box::new(Literal::Some(Box::new(Literal::Int(3))))))) + ); + assert_eq!( + parse_option("Some(None)"), + Ok(("", Literal::Some(Box::new(Literal::None)))) ) } } diff --git a/parser/src/parsing/select.rs b/parser/src/parsing/select.rs index 0f8094d..6d8274d 100644 --- a/parser/src/parsing/select.rs +++ b/parser/src/parsing/select.rs @@ -42,8 +42,7 @@ pub fn try_parse_column_selection(input: &str) -> IResult<&str, ColumnSelection> #[cfg(test)] mod tests { use crate::parsing::{ - common::{parse_column_name, parse_table_name}, - select::parse_select, + common::{parse_column_name, parse_table_name}, literal::Literal, select::parse_select }; use crate::syntax::{ColumnSelection, RawQuerySyntax}; @@ -116,4 +115,49 @@ mod tests { } } } + #[test] + fn test_parse_select_option() { + use crate::syntax::Condition; + let sql = "SELECT * FROM games WHERE year = Some(2006)"; + let operation = parse_select(sql).expect("should parse"); + match operation { + ("", RawQuerySyntax::Select(table_name, column_selection, maybe_condition)) => { + assert_eq!(table_name, "games"); + assert!(matches!(column_selection, ColumnSelection::All)); + if let Some(Condition::Eq(left, right)) = maybe_condition { + assert_eq!(left, "year".to_string()); + assert_eq!(right, Literal::Some(Box::new(Literal::Int(2006)))) + } else { + panic!(); + } + } + (input, _) => { + println!("Input to be parsed: {}", input); + panic!("expected select operation") + } + } + } + + #[test] + fn test_parse_select_option_none() { + use crate::syntax::Condition; + let sql = "SELECT * FROM games WHERE year = None"; + let operation = parse_select(sql).expect("should parse"); + match operation { + ("", RawQuerySyntax::Select(table_name, column_selection, maybe_condition)) => { + assert_eq!(table_name, "games"); + assert!(matches!(column_selection, ColumnSelection::All)); + if let Some(Condition::Eq(left, right)) = maybe_condition { + assert_eq!(left, "year".to_string()); + assert_eq!(right, Literal::None) + } else { + panic!(); + } + } + (input, _) => { + println!("Input to be parsed: {}", input); + panic!("expected select operation") + } + } + } } diff --git a/parser/src/syntax.rs b/parser/src/syntax.rs index 27d3306..58e5623 100644 --- a/parser/src/syntax.rs +++ b/parser/src/syntax.rs @@ -3,6 +3,8 @@ use minisql::{ type_system::{DbType, Value}, }; +use crate::parsing::literal::Literal; + // ===Table Schema=== #[derive(Debug, Clone, PartialEq)] pub struct RawTableSchema { @@ -27,7 +29,7 @@ pub enum RawQuerySyntax { CreateIndex(TableName, ColumnName), // DropTable(TableName), } -pub type InsertionValues = Vec<(ColumnName, Value)>; +pub type InsertionValues = Vec<(ColumnName, Literal)>; pub enum ColumnSelection { All, @@ -38,7 +40,7 @@ pub enum Condition { // And(Box, Box), // Or(Box, Box), // Not(Box), - Eq(ColumnName, Value), + Eq(ColumnName, Literal), // LessOrEqual(ColumnName, DbValue), // Less(ColumnName, DbValue), @@ -69,3 +71,4 @@ impl RawTableSchema { .collect() } } + diff --git a/parser/src/validation.rs b/parser/src/validation.rs index 9959abf..037bed2 100644 --- a/parser/src/validation.rs +++ b/parser/src/validation.rs @@ -1,6 +1,7 @@ use std::collections::{BTreeMap, HashSet}; use thiserror::Error; +use crate::parsing::literal::Literal; use crate::syntax; use crate::syntax::{ColumnSchema, RawQuerySyntax, RawTableSchema}; use minisql::operation; @@ -34,6 +35,8 @@ pub enum ValidationError { received_type: DbType, expected_type: DbType, }, + #[error("Expected type {expected_type:?}, received None")] + UnexpectedNoneValue{ expected_type: DbType }, #[error("values for required columns {0:?} are missing")] RequiredColumnsAreMissing(Vec), } @@ -240,7 +243,7 @@ fn validate_insert( .ok_or(ValidationError::ColumnsDoNotExist(vec![ column_name.to_string() ]))?; // By the previous validation steps this is never gonna trigger an error. - let value_type = value.to_type(); + let value_type = type_from_literal_with_type_hint(&value, &expected_type)?; if value_type != expected_type { return Err(ValidationError::TypeMismatch { column_name: column_name.to_string(), @@ -248,7 +251,7 @@ fn validate_insert( expected_type, }); } - values_map.insert(column, value); + values_map.insert(column, literal_to_value(value, &expected_type)); } // WARNING: If you use `values_map: HashMap<_,_>`, this is not gonna sort values by key. @@ -278,9 +281,9 @@ fn validate_condition( let (column, expected_type) = schema.get_typed_column(&column_name).ok_or( ValidationError::ColumnsDoNotExist(vec![column_name.to_string()]), )?; - let value_type: DbType = value.to_type(); + let value_type: DbType = type_from_literal_with_type_hint(&value, &expected_type)?; if expected_type.eq(&value_type) { - Ok(Some(operation::Condition::Eq(column, value))) + Ok(Some(operation::Condition::Eq(column, literal_to_value(value, &expected_type)))) } else { Err(ValidationError::TypeMismatch { column_name: column_name.to_string(), @@ -345,6 +348,42 @@ fn get_table_schema<'a>( Some(table_schema) } +fn literal_to_value(lit: Literal, hint: &DbType) -> Value { + match lit { + Literal::Number(v) => Value::Number(v), + Literal::String(v) => Value::String(v), + Literal::Int(v) => Value::Int(v), + Literal::Uuid(v) => Value::Uuid(v), + Literal::Some(v) => Value::Some(Box::new(literal_to_value(*v, hint))), + Literal::None => { + if let DbType::Option(t) = hint { + Value::None(*t.clone()) + } else { + // By the time calling current function, hopefully we should be sure about the + // type we want from the literal + panic!() + } + }, + } +} + +fn type_from_literal_with_type_hint(lit: &Literal, hint: &DbType) -> Result { + Ok(match lit { + Literal::Number(_) => DbType::Number, + Literal::String(_) => DbType::String, + Literal::Int(_) => DbType::Int, + Literal::Uuid(_) => DbType::Uuid, + Literal::Some(l) => type_from_literal_with_type_hint(l, hint)?, + Literal::None => { + if matches!(hint, DbType::Option(_)) { + hint.clone() + } else { + return Err(ValidationError::UnexpectedNoneValue { expected_type: hint.clone() }) + } + } + }) +} + #[cfg(test)] mod tests { use super::*; @@ -565,7 +604,7 @@ mod tests { let syntax: RawQuerySyntax = Select( "users".to_string(), ColumnSelection::All, - Some(Eq("age".to_string(), Value::Int(25))), + Some(Eq("age".to_string(), Literal::Int(25))), ); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Ok(Operation::Select(_, _, _)))); @@ -632,7 +671,7 @@ mod tests { let syntax: RawQuerySyntax = Select( "users".to_string(), ColumnSelection::All, - Some(Eq("does_not_exist".to_string(), Value::Int(25))), + Some(Eq("does_not_exist".to_string(), Literal::Int(25))), ); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Err(ValidationError::ColumnsDoNotExist(_)))); @@ -646,7 +685,7 @@ mod tests { let syntax: RawQuerySyntax = Select( "users".to_string(), ColumnSelection::All, - Some(Eq("age".to_string(), Value::String("25".to_string()))), + Some(Eq("age".to_string(), Literal::String("25".to_string()))), ); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Err(ValidationError::TypeMismatch { .. }))); @@ -663,9 +702,9 @@ mod tests { let syntax: RawQuerySyntax = Insert( "users".to_string(), vec![ - ("name".to_string(), Value::String("Alice".to_string())), - ("id".to_string(), Value::Uuid(0)), - ("age".to_string(), Value::Int(25)), + ("name".to_string(), Literal::String("Alice".to_string())), + ("id".to_string(), Literal::Uuid(0)), + ("age".to_string(), Literal::Int(25)), ], ); let result = validate_operation(syntax, &db_schema); @@ -698,10 +737,10 @@ mod tests { let syntax: RawQuerySyntax = Insert( "users".to_string(), vec![ - ("name".to_string(), Value::String("Alice".to_string())), - ("id".to_string(), Value::Uuid(0)), - ("age".to_string(), Value::Int(25)), - ("does_not_exist".to_string(), Value::Int(25)), + ("name".to_string(), Literal::String("Alice".to_string())), + ("id".to_string(), Literal::Uuid(0)), + ("age".to_string(), Literal::Int(25)), + ("does_not_exist".to_string(), Literal::Int(25)), ], ); let result = validate_operation(syntax, &db_schema); @@ -716,9 +755,9 @@ mod tests { let syntax: RawQuerySyntax = Insert( "users".to_string(), vec![ - ("name".to_string(), Value::String("Alice".to_string())), - ("id".to_string(), Value::Uuid(0)), - ("age".to_string(), Value::Number(25.0)), + ("name".to_string(), Literal::String("Alice".to_string())), + ("id".to_string(), Literal::Uuid(0)), + ("age".to_string(), Literal::Number(25.0)), ], ); let result = validate_operation(syntax, &db_schema); @@ -754,7 +793,7 @@ mod tests { let syntax: RawQuerySyntax = Delete( "users".to_string(), - Some(Eq("age".to_string(), Value::Int(25))), + Some(Eq("age".to_string(), Literal::Int(25))), ); let result = validate_operation(syntax, &db_schema); assert!(matches!( From da8e2d6771ac2cd6b945256c632014aec47b19e5 Mon Sep 17 00:00:00 2001 From: Yuriy Dupyn <2153100+omedusyo@users.noreply.github.com> Date: Sun, 4 Feb 2024 14:27:59 +0100 Subject: [PATCH 3/5] Remove some unused imports --- parser/src/core.rs | 1 + parser/src/parsing/literal.rs | 1 - parser/src/syntax.rs | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/parser/src/core.rs b/parser/src/core.rs index af348e4..4351bd1 100644 --- a/parser/src/core.rs +++ b/parser/src/core.rs @@ -33,6 +33,7 @@ fn parse_statement(input: &str) -> IResult<&str, RawQuerySyntax> { } /// Parse one or more statements +#[allow(dead_code)] fn parse_statement1(input: &str) -> IResult<&str, Vec> { many1(terminated(parse_statement, delimited(multispace0, char(';'), multispace0)))(input) } diff --git a/parser/src/parsing/literal.rs b/parser/src/parsing/literal.rs index 886798e..b09b0cb 100644 --- a/parser/src/parsing/literal.rs +++ b/parser/src/parsing/literal.rs @@ -1,4 +1,3 @@ -use minisql::type_system::DbType; use nom::{ branch::alt, bytes::complete::tag, character::complete::{char, digit1, none_of, u64}, combinator::opt, error::make_error, multi::many0, sequence::{delimited, pair, preceded}, IResult, Parser }; diff --git a/parser/src/syntax.rs b/parser/src/syntax.rs index 58e5623..a0f5df3 100644 --- a/parser/src/syntax.rs +++ b/parser/src/syntax.rs @@ -1,6 +1,6 @@ use minisql::{ schema::{ColumnName, TableName}, - type_system::{DbType, Value}, + type_system::DbType, }; use crate::parsing::literal::Literal; From c4f5ac7b70c35e2213231573b5f1bbaef3a1f728 Mon Sep 17 00:00:00 2001 From: Yuriy Dupyn <2153100+omedusyo@users.noreply.github.com> Date: Sun, 4 Feb 2024 14:40:36 +0100 Subject: [PATCH 4/5] Fix type-checking option bug --- parser/src/validation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser/src/validation.rs b/parser/src/validation.rs index 037bed2..741d0fd 100644 --- a/parser/src/validation.rs +++ b/parser/src/validation.rs @@ -373,7 +373,7 @@ fn type_from_literal_with_type_hint(lit: &Literal, hint: &DbType) -> Result DbType::String, Literal::Int(_) => DbType::Int, Literal::Uuid(_) => DbType::Uuid, - Literal::Some(l) => type_from_literal_with_type_hint(l, hint)?, + Literal::Some(l) => DbType::Option(Box::new(type_from_literal_with_type_hint(l, hint)?)), Literal::None => { if matches!(hint, DbType::Option(_)) { hint.clone() From a8c9c572314f56aa6f81a823965fd941ffd7fc53 Mon Sep 17 00:00:00 2001 From: Maxim Svistunov Date: Sun, 4 Feb 2024 14:53:52 +0100 Subject: [PATCH 5/5] Remove the ASCII-only check --- parser/src/core.rs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/parser/src/core.rs b/parser/src/core.rs index 4351bd1..1782f6b 100644 --- a/parser/src/core.rs +++ b/parser/src/core.rs @@ -39,11 +39,6 @@ fn parse_statement1(input: &str) -> IResult<&str, Vec> { } pub fn parse_and_validate(str_query: String, db_schema: &DbSchema) -> Result { - if let Some(non_ascii) = str_query.chars().find(|c| c.len_utf8() > 1) { - return Err(Error::ParsingError( - format!("Non ASCII character found: {}", non_ascii) - )); - } let (_, op) = parse_statement(str_query.as_str()).map_err(|err| Error::ParsingError(err.to_string()))?; @@ -55,14 +50,6 @@ mod test { use crate::core::parse_statement1; use crate::parse_and_validate; use crate::Error; - #[test] - fn test_non_unicode() { - let result = parse_and_validate(format!("SELECT * FROM users WHERE name = \"京\""), &Default::default()); - assert!(matches!(result, Err(Error::ParsingError(_)))); - if let Err(Error::ParsingError(err)) = result { - assert_eq!(err, format!("Non ASCII character found: {}", '京')); - } - } #[test] fn test_parse_two_select() {