From de8c6164cf24a52be587af31e530279c6496d801 Mon Sep 17 00:00:00 2001 From: Maxim Svistunov Date: Sun, 4 Feb 2024 13:32:55 +0100 Subject: [PATCH] Resolve TODOs in parsing Return error for queries containing non-ASCII characters Allow underscores in identifiers Add a delete statement test with spaces Remove trailing spaces and semicolons from tests and parsers Complete the multiple statement parser TODO --- parser/src/core.rs | 56 ++++++++++++++++++++++++++++++++---- parser/src/parsing/common.rs | 25 ++++++++++++---- parser/src/parsing/create.rs | 12 ++++---- parser/src/parsing/delete.rs | 11 ++++--- parser/src/parsing/index.rs | 6 ++-- parser/src/parsing/insert.rs | 6 ++-- parser/src/parsing/select.rs | 11 ++----- 7 files changed, 89 insertions(+), 38 deletions(-) diff --git a/parser/src/core.rs b/parser/src/core.rs index 75d2256..af348e4 100644 --- a/parser/src/core.rs +++ b/parser/src/core.rs @@ -1,6 +1,6 @@ use crate::syntax::RawQuerySyntax; use minisql::{interpreter::DbSchema, operation::Operation}; -use nom::{branch::alt, IResult}; +use nom::{branch::alt, character::complete::{multispace0, char}, multi::many1, sequence::{delimited, terminated}, IResult}; use thiserror::Error; use crate::{ @@ -19,6 +19,7 @@ pub enum Error { ValidationError(#[from] ValidationError), } +/// Parse single statement fn parse_statement(input: &str) -> IResult<&str, RawQuerySyntax> { alt(( parse_insert, @@ -31,14 +32,59 @@ fn parse_statement(input: &str) -> IResult<&str, RawQuerySyntax> { ))(input) } +/// Parse one or more statements +fn parse_statement1(input: &str) -> IResult<&str, Vec> { + many1(terminated(parse_statement, delimited(multispace0, char(';'), multispace0)))(input) +} + pub fn parse_and_validate(str_query: String, db_schema: &DbSchema) -> Result { + if let Some(non_ascii) = str_query.chars().find(|c| c.len_utf8() > 1) { + return Err(Error::ParsingError( + format!("Non ASCII character found: {}", non_ascii) + )); + } let (_, op) = parse_statement(str_query.as_str()).map_err(|err| Error::ParsingError(err.to_string()))?; Ok(validate_operation(op, db_schema)?) } -// #[test] -// fn test_select() { -// parse_and_validate("SELECT * FROM users;".to_string(), &Vec::new()).unwrap(); -// } +#[cfg(test)] +mod test { + use crate::core::parse_statement1; + use crate::parse_and_validate; + use crate::Error; + #[test] + fn test_non_unicode() { + let result = parse_and_validate(format!("SELECT * FROM users WHERE name = \"京\""), &Default::default()); + assert!(matches!(result, Err(Error::ParsingError(_)))); + if let Err(Error::ParsingError(err)) = result { + assert_eq!(err, format!("Non ASCII character found: {}", '京')); + } + } + + #[test] + fn test_parse_two_select() { + let (rest, sntx) = parse_statement1("SELECT * FROM users ; SELECT * FROM cities ; ").expect("should parse"); + assert_eq!( + sntx.len(), + 2 + ); + assert_eq!(rest, ""); + } + + #[test] + fn test_parse_three_insert_one_select() { + let (rest, sntx) = parse_statement1( + r#"INSERT INTO table1 (id, data) VALUES (u1, 2); + SELECT * FROM users ; + INSERT INTO table1 (id, data) VALUES (u4, 30) ; + INSERT INTO table1 (id, data) VALUES (u5, 40) ; + "#).expect("should parse"); + assert_eq!( + sntx.len(), + 4 + ); + assert_eq!(rest, ""); + } +} diff --git a/parser/src/parsing/common.rs b/parser/src/parsing/common.rs index fac2a0a..9b85260 100644 --- a/parser/src/parsing/common.rs +++ b/parser/src/parsing/common.rs @@ -1,8 +1,8 @@ use minisql::type_system::DbType; use nom::{ branch::alt, - bytes::complete::tag, - character::complete::{alphanumeric1, anychar, char, multispace0, multispace1}, + bytes::complete::{tag, take_while}, + character::{complete::{alphanumeric1, anychar, char, multispace0, multispace1}, is_alphanumeric}, combinator::peek, error::make_error, sequence::{delimited, terminated}, @@ -20,10 +20,11 @@ pub fn parse_table_name(input: &str) -> IResult<&str, &str> { } pub fn parse_identifier(input: &str) -> IResult<&str, &str> { - // TODO: allow underscores let (_, first) = peek(anychar)(input)?; - if first.is_alphabetic() { - alphanumeric1(input) + if first.is_alphabetic() || first == '_' { + take_while(|c: char| { + is_alphanumeric(c as u8) || c == '_' + })(input) } else { Err(nom::Err::Error(make_error( input, @@ -77,7 +78,7 @@ fn parse_equality(input: &str) -> IResult<&str, Condition> { mod tests { use minisql::type_system::DbType; - use crate::parsing::common::{parse_db_type, parse_equality}; + use crate::parsing::common::{parse_db_type, parse_equality, parse_identifier}; use crate::syntax::Condition; #[test] @@ -114,4 +115,16 @@ mod tests { )); assert!(matches!(parse_db_type("Unknown"), Err(_))); } + + #[test] + fn test_parse_identifier() { + assert_eq!( + parse_identifier("_variable__Test").expect("should parse").1, + "_variable__Test" + ); + assert!(matches!( + parse_identifier("123_variable__Test"), + Err(_) + )); + } } diff --git a/parser/src/parsing/create.rs b/parser/src/parsing/create.rs index f13e2bf..6ab7697 100644 --- a/parser/src/parsing/create.rs +++ b/parser/src/parsing/create.rs @@ -22,8 +22,6 @@ pub fn parse_create(input: &str) -> IResult<&str, RawQuerySyntax> { let (input, column_definitions) = parse_column_definitions(input)?; let (input, _) = char(')')(input)?; - let (input, _) = multispace0(input)?; - let (input, _) = char(';')(input)?; let schema = RawTableSchema { table_name: table_name.to_string(), columns: column_definitions, @@ -66,32 +64,32 @@ mod tests { #[test] fn test_parse_create_no_spaces() { - parse_create("CREATE TABLE \"Table1\"(id UUID ,column1 INT);").expect("should parse"); + parse_create("CREATE TABLE \"Table1\"(id UUID ,column1 INT)").expect("should parse"); } #[test] fn test_parse_create_primary_key() { - parse_create("CREATE TABLE \"Table1\"(id UUID PRIMARY KEY,column1 INT);") + parse_create("CREATE TABLE \"Table1\"(id UUID PRIMARY KEY,column1 INT)") .expect("should parse"); } #[test] fn test_parse_create_no_quotes_table_name() { - parse_create("CREATE TABLE Table1(id UUID PRIMARY KEY,column1 INT);") + parse_create("CREATE TABLE Table1(id UUID PRIMARY KEY,column1 INT)") .expect("should parse"); } #[test] fn test_parse_create_primary_key_with_spaces() { parse_create( - "CREATE TABLE \"Table1\" ( id UUID PRIMARY KEY , column1 INT ) ;", + "CREATE TABLE \"Table1\" ( id UUID PRIMARY KEY , column1 INT )", ) .expect("should parse"); } #[test] fn test_parse_create() { - let (_, create) = parse_create("CREATE TABLE \"Table1\"( id UUID , column1 INT );") + let (_, create) = parse_create("CREATE TABLE \"Table1\"( id UUID , column1 INT )") .expect("should parse"); assert!(matches!(create, RawQuerySyntax::CreateTable(_))); match create { diff --git a/parser/src/parsing/delete.rs b/parser/src/parsing/delete.rs index 2cbe88f..b7f3ac1 100644 --- a/parser/src/parsing/delete.rs +++ b/parser/src/parsing/delete.rs @@ -15,8 +15,6 @@ pub fn parse_delete(input: &str) -> IResult<&str, RawQuerySyntax> { let (input, table_name) = parse_table_name(input)?; let (input, _) = multispace0(input)?; let (input, condition) = parse_condition(input)?; - let (input, _) = multispace0(input)?; - let (input, _) = char(';')(input)?; Ok(( input, RawQuerySyntax::Delete(table_name.to_string(), condition), @@ -31,9 +29,14 @@ mod tests { #[test] fn test_parse_delete() { let (_, operation) = - parse_delete("DELETE FROM \"T1\" WHERE id = 1 ;").expect("should parse"); + parse_delete("DELETE FROM \"T1\" WHERE id = 1").expect("should parse"); assert!(matches!(operation, RawQuerySyntax::Delete(_, _))) } - // TODO: add test with condition + #[test] + fn test_parse_delete_with_spaces() { + let (_, operation) = + parse_delete("DELETE FROM T1 WHERE id = 1").expect("should parse"); + assert!(matches!(operation, RawQuerySyntax::Delete(_, _))) + } } diff --git a/parser/src/parsing/index.rs b/parser/src/parsing/index.rs index 4a38b32..a2941b8 100644 --- a/parser/src/parsing/index.rs +++ b/parser/src/parsing/index.rs @@ -30,8 +30,6 @@ pub fn parse_create_index(input: &str) -> IResult<&str, RawQuerySyntax> { let (input, column_name) = parse_identifier(input)?; let (input, _) = multispace0(input)?; let (input, _) = char(')')(input)?; - let (input, _) = multispace0(input)?; - let (input, _) = char(';')(input)?; let operation = RawQuerySyntax::CreateIndex(table_name.to_string(), column_name.to_string()); Ok((input, operation)) } @@ -44,7 +42,7 @@ mod tests { #[test] fn test_create_index() { let (_, syntax) = - parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" (email);") + parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" (email)") .expect("should parse"); assert!(matches!(syntax, RawQuerySyntax::CreateIndex(_, _))); match syntax { @@ -59,7 +57,7 @@ mod tests { #[test] fn test_create_index_with_spaces() { let (_, syntax) = parse_create_index( - "CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" ( email ) ;", + "CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" ( email )", ) .expect("should parse"); assert!(matches!(syntax, RawQuerySyntax::CreateIndex(_, _))); diff --git a/parser/src/parsing/insert.rs b/parser/src/parsing/insert.rs index 2dfa061..1048559 100644 --- a/parser/src/parsing/insert.rs +++ b/parser/src/parsing/insert.rs @@ -33,8 +33,6 @@ pub fn parse_insert(input: &str) -> IResult<&str, RawQuerySyntax> { let (input, values) = parse_values(input)?; let (input, _) = multispace0(input)?; let (input, _) = char(')')(input)?; - let (input, _) = multispace0(input)?; - let (input, _) = char(';')(input)?; Ok(( input, RawQuerySyntax::Insert( @@ -64,7 +62,7 @@ mod tests { #[test] fn test_parse_insert() { - let sql = "INSERT INTO \"MyTable\" (id, data) VALUES(1, \"Text\");"; + let sql = "INSERT INTO \"MyTable\" (id, data) VALUES(1, \"Text\")"; let syntax = parse_insert(sql).expect("should parse"); match syntax { ("", RawQuerySyntax::Insert(table_name, insertion_values)) => { @@ -89,7 +87,7 @@ mod tests { #[test] fn test_parse_insert_with_spaces() { let sql = - "INSERT INTO \"MyTable\" ( id, data ) VALUES ( 1, \"Text\" ) ;"; + "INSERT INTO \"MyTable\" ( id, data ) VALUES ( 1, \"Text\" )"; let operation = parse_insert(sql).expect("should parse"); match operation { ("", RawQuerySyntax::Insert(table_name, insertion_values)) => { diff --git a/parser/src/parsing/select.rs b/parser/src/parsing/select.rs index 3d14bd6..0f8094d 100644 --- a/parser/src/parsing/select.rs +++ b/parser/src/parsing/select.rs @@ -22,9 +22,6 @@ pub fn parse_select(input: &str) -> IResult<&str, RawQuerySyntax> { let (input, table_name) = parse_table_name(input)?; let (input, _) = multispace0(input)?; let (input, condition) = parse_condition(input)?; - let (input, _) = multispace0(input)?; - // TODO: make it optional? - let (input, _) = tag(";")(input)?; Ok(( input, RawQuerySyntax::Select(table_name.to_string(), column_selection, condition), @@ -52,7 +49,7 @@ mod tests { #[test] fn test_parse_select_all() { - let sql = "SELECT * FROM \"MyTable\";"; + let sql = "SELECT * FROM \"MyTable\""; let operation = parse_select(sql).expect("should parse"); match operation { ("", RawQuerySyntax::Select(table_name, column_selection, maybe_condition)) => { @@ -79,7 +76,7 @@ mod tests { #[test] fn test_parse_select_columns() { - let sql = "SELECT name , email FROM \"AddressBook\" ;"; + let sql = "SELECT name , email FROM \"AddressBook\""; let operation = parse_select(sql).expect("should parse"); match operation { ("", RawQuerySyntax::Select(table_name, column_selection, maybe_condition)) => { @@ -105,7 +102,7 @@ mod tests { #[test] fn test_parse_select_where() { use crate::syntax::Condition; - let sql = "SELECT * FROM \"AddressBook\" WHERE id = 5 ;"; + let sql = "SELECT * FROM \"AddressBook\" WHERE id = 5"; let operation = parse_select(sql).expect("should parse"); match operation { ("", RawQuerySyntax::Select(table_name, column_selection, maybe_condition)) => { @@ -119,6 +116,4 @@ mod tests { } } } - // TODO: a test with multiple statements - // TODO: allow underscores in identifiers }