From 61c0a34253f2565c2c19f70a0ca15f6275b952ac Mon Sep 17 00:00:00 2001 From: Maxim Svistunov Date: Fri, 26 Jan 2024 18:20:45 +0100 Subject: [PATCH 1/8] Add parsing (incl. validation) Ensure identifiers start with alphabetical character Rename parse_variable_name -> parse_column_name Add DB value parsers and condition parser placeholder Fix number parser, basic condition parser Move select parser to select module Add create statement parser Move condition parser to common; add delete statement parser Add drop statement parser Add insert parser Add update parser, combine operation parsers into one Add initial validation, fix compiler warnings Validation WIP Allow more spaces in create statement, update TableSchema struct Add create index parser and validator Add todo in parse_identifier Rework the new structure, many other changes --- Cargo.lock | 124 +++++++++++++++++++--- Cargo.toml | 3 +- client/Cargo.toml | 4 +- client/src/main.rs | 14 ++- minisql/src/interpreter.rs | 18 +++- minisql/src/schema.rs | 8 +- minisql/src/type_system.rs | 2 +- parser/Cargo.toml | 13 +++ parser/src/common.rs | 95 +++++++++++++++++ parser/src/core.rs | 41 ++++++++ parser/src/create.rs | 109 ++++++++++++++++++++ parser/src/delete.rs | 38 +++++++ parser/src/index.rs | 70 +++++++++++++ parser/src/insert.rs | 94 +++++++++++++++++ parser/src/lib.rs | 16 +++ parser/src/literal.rs | 164 +++++++++++++++++++++++++++++ parser/src/select.rs | 122 ++++++++++++++++++++++ parser/src/validation.rs | 206 +++++++++++++++++++++++++++++++++++++ server/Cargo.toml | 4 +- server/src/main.rs | 32 ++++-- 20 files changed, 1138 insertions(+), 39 deletions(-) create mode 100644 parser/Cargo.toml create mode 100644 parser/src/common.rs create mode 100644 parser/src/core.rs create mode 100644 parser/src/create.rs create mode 100644 parser/src/delete.rs create mode 100644 parser/src/index.rs create mode 100644 parser/src/insert.rs create mode 100644 parser/src/lib.rs create mode 100644 parser/src/literal.rs create mode 100644 parser/src/select.rs create mode 100644 parser/src/validation.rs diff --git a/Cargo.lock b/Cargo.lock index 8b89a08..428e5cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,9 +29,9 @@ version = "0.1.74" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 2.0.41", ] [[package]] @@ -112,6 +112,8 @@ name = "client" version = "0.1.0" dependencies = [ "anyhow", + "minisql", + "parser", "proto", "tokio", ] @@ -150,6 +152,12 @@ version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "minisql" version = "0.1.0" @@ -177,6 +185,38 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "nom" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" +dependencies = [ + "memchr", + "version_check", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nom-peg" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3fefa2869e8c4f92ee5135cbeba457eebf1f24e188616bcbd334abb51be6a3" +dependencies = [ + "nom 4.2.3", + "proc-macro2 0.4.30", + "quote 0.6.13", + "syn 0.15.44", +] + [[package]] name = "num_cpus" version = "1.16.0" @@ -219,12 +259,32 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "parser" +version = "0.1.0" +dependencies = [ + "bimap", + "minisql", + "nom 7.1.3", + "nom-peg", + "thiserror", +] + [[package]] name = "pin-project-lite" version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +[[package]] +name = "proc-macro2" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +dependencies = [ + "unicode-xid", +] + [[package]] name = "proc-macro2" version = "1.0.70" @@ -244,13 +304,22 @@ dependencies = [ "tokio", ] +[[package]] +name = "quote" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +dependencies = [ + "proc-macro2 0.4.30", +] + [[package]] name = "quote" version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ - "proc-macro2", + "proc-macro2 1.0.70", ] [[package]] @@ -289,9 +358,9 @@ version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 2.0.41", ] [[package]] @@ -299,6 +368,8 @@ name = "server" version = "0.1.0" dependencies = [ "anyhow", + "minisql", + "parser", "proto", "tokio", ] @@ -328,14 +399,25 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "syn" +version = "0.15.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "unicode-xid", +] + [[package]] name = "syn" version = "2.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" dependencies = [ - "proc-macro2", - "quote", + "proc-macro2 1.0.70", + "quote 1.0.33", "unicode-ident", ] @@ -354,9 +436,9 @@ version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 2.0.41", ] [[package]] @@ -384,9 +466,9 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 2.0.41", ] [[package]] @@ -395,6 +477,18 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" + +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" + [[package]] name = "virtue" version = "0.0.13" diff --git a/Cargo.toml b/Cargo.toml index 714e4bf..57d0219 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,5 +4,6 @@ members = [ "minisql", "proto", "server", - "client" + "client", + "parser" ] diff --git a/client/Cargo.toml b/client/Cargo.toml index 9cf09e6..a8012c6 100644 --- a/client/Cargo.toml +++ b/client/Cargo.toml @@ -8,4 +8,6 @@ edition = "2021" [dependencies] tokio = { version = "1.35.1", features = ["full"] } anyhow = "1.0.76" -proto = { path = "../proto" } \ No newline at end of file +proto = { path = "../proto" } +minisql = { path = "../minisql" } +parser = { path = "../parser" } diff --git a/client/src/main.rs b/client/src/main.rs index e77ea5c..fc845e9 100644 --- a/client/src/main.rs +++ b/client/src/main.rs @@ -31,7 +31,7 @@ async fn main() -> anyhow::Result<()> { query: "SELECT * FROM users;".to_string().into(), })).await?; writer.flush().await?; - + let mut line = String::new(); loop { let msg: BackendMessage = reader.read_proto().await?; match msg { @@ -46,7 +46,17 @@ async fn main() -> anyhow::Result<()> { }, BackendMessage::ReadyForQuery(data) => { println!("Ready for query: {:?}", data); - break; + line.clear(); + let res = std::io::stdin().read_line(&mut line); + if let Ok(_) = res { + if line.eq("exit") { + break; + } + writer.write_proto(FrontendMessage::Query(QueryData { + query: line.clone().into(), + })).await?; + writer.flush().await?; + } }, m => { println!("Unexpected message: {:?}", m); diff --git a/minisql/src/interpreter.rs b/minisql/src/interpreter.rs index 84f2494..e85d085 100644 --- a/minisql/src/interpreter.rs +++ b/minisql/src/interpreter.rs @@ -20,7 +20,7 @@ pub struct State { // #[derive(Debug)] pub enum Response<'a> { - Selected(Box + 'a>), + Selected(Box + 'a + Send>), Inserted, Deleted(usize), // how many were deleted TableCreated, @@ -48,13 +48,23 @@ impl std::fmt::Debug for Response<'_> { } impl State { - fn new() -> Self { + pub fn new() -> Self { Self { table_name_position_mapping: BiMap::new(), tables: vec![], } } + /// TODO: return a reference to avoid allocations + pub fn metadata<'a>(&'a self) -> Vec<(String, &'a TableSchema)> { + let mut m = Vec::new(); + for (name, pos) in &self.table_name_position_mapping { + let table_schema = self.tables.get(*pos).unwrap().schema(); + m.push((name.clone(), table_schema)); + } + m + } + fn table_from_name<'a>(&'a self, table_name: &TableName) -> DbResult<&'a Table> { match self.table_name_position_mapping.get_by_left(table_name) { Some(table_position) => { @@ -99,7 +109,7 @@ impl State { let selected_rows = match maybe_condition { None => { let x = table.select_all_rows(selected_column_positions); - Box::new(x) as Box + 'a> + Box::new(x) as Box + 'a + Send> }, Some(Condition::Eq(eq_column_name, value)) => { @@ -112,7 +122,7 @@ impl State { eq_column_position, value, )?; - Box::new(x) as Box + 'a> + Box::new(x) as Box + 'a + Send> } }; diff --git a/minisql/src/schema.rs b/minisql/src/schema.rs index c9574ac..ba9759a 100644 --- a/minisql/src/schema.rs +++ b/minisql/src/schema.rs @@ -12,15 +12,15 @@ use std::collections::HashMap; pub struct TableSchema { table_name: TableName, // used for descriptive errors primary_key: ColumnPosition, - column_name_position_mapping: BiMap, - types: Vec, + pub column_name_position_mapping: BiMap, + pub types: Vec, } pub type TableName = String; pub type ColumnName = String; impl TableSchema { - pub(crate) fn new(table_name: TableName, primary_key: ColumnPosition, column_name_position_map: Vec<(ColumnName, ColumnPosition)>, types: Vec) -> Self { + pub fn new(table_name: TableName, primary_key: ColumnPosition, column_name_position_map: Vec<(ColumnName, ColumnPosition)>, types: Vec) -> Self { let mut column_name_position_mapping: BiMap = BiMap::new(); for (column_name, column_position) in column_name_position_map { column_name_position_mapping.insert(column_name, column_position); @@ -113,7 +113,7 @@ impl TableSchema { } } - fn number_of_columns(&self) -> usize { + pub fn number_of_columns(&self) -> usize { self.column_name_position_mapping.len() } diff --git a/minisql/src/type_system.rs b/minisql/src/type_system.rs index 4edc3ec..7bf5f60 100644 --- a/minisql/src/type_system.rs +++ b/minisql/src/type_system.rs @@ -1,5 +1,5 @@ // ==============Types================ -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum DbType { String, Int, diff --git a/parser/Cargo.toml b/parser/Cargo.toml new file mode 100644 index 0000000..ff211a0 --- /dev/null +++ b/parser/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "parser" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +nom-peg = "0.1.1" +nom = "7.1.3" +minisql = { path = "../minisql" } +bimap = "0.6.3" +thiserror = "1" \ No newline at end of file diff --git a/parser/src/common.rs b/parser/src/common.rs new file mode 100644 index 0000000..0cd90c1 --- /dev/null +++ b/parser/src/common.rs @@ -0,0 +1,95 @@ +use nom::{ + character::complete::{alphanumeric1, char, multispace0, anychar, multispace1}, + combinator::peek, + error::make_error, + sequence::{delimited, terminated}, + bytes::complete::tag, + IResult, branch::alt, +}; +use minisql::{operation::Condition, type_system::DbType}; + +use crate::literal::parse_db_value; + +pub fn parse_table_name(input: &str) -> IResult<&str, &str> { + alt(( + delimited(char('"'), alphanumeric1, char('"')), + parse_identifier + ))(input) +} + +pub fn parse_identifier(input: &str) -> IResult<&str, &str> { + // TODO: allow underscores + let (_, first) = peek(anychar)(input)?; + if first.is_alphabetic() { + alphanumeric1(input) + } else { + Err(nom::Err::Error(make_error(input, nom::error::ErrorKind::Alpha))) + } +} + +pub fn parse_column_name(input: &str) -> IResult<&str, String> { + terminated(parse_identifier, multispace0)(input).map(|(rest, name)| (rest, name.to_string())) +} + +pub fn parse_db_type(input: &str) -> IResult<&str, DbType> { + let (input, type_name) = alt((tag("STRING"), tag("INT"), tag("Float"), tag("UUID")))(input)?; + let db_type = match type_name { + "STRING" => DbType::String, + "INT" => DbType::Int, + "UUID" => DbType::Uuid, + "Float" => DbType::Number, + _ => return Err(nom::Err::Failure(make_error(input, nom::error::ErrorKind::IsNot))) + }; + Ok((input, db_type)) +} + +pub fn parse_condition(input: &str) -> IResult<&str, Option> { + match tag::<&str, &str, nom::error::Error<&str>>("WHERE")(input) { + Ok((input, _)) => { + let (input, _) = multispace1(input)?; + let (input, condition) = parse_equality(input)?; + Ok((input, Some(condition))) + } + Err(_) => { + Ok((input, None)) + } + } +} + +fn parse_equality(input: &str) -> IResult<&str, Condition> { + let (input, column_name) = parse_column_name(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char('=')(input)?; + let (input, _) = multispace0(input)?; + let (input, db_value) = parse_db_value(input)?; + Ok((input, Condition::Eq(column_name, db_value))) +} + +#[cfg(test)] +mod tests { + use minisql::{operation::Condition, type_system::DbType}; + use crate::common::{parse_db_type, parse_equality}; + + #[test] + fn test_parse_equality() { + use minisql::type_system::{IndexableValue, Value}; + match parse_equality("id = 1") { + Ok(("", Condition::Eq(column_name, value))) => { + assert!(column_name.eq("id")); + assert_eq!(value, Value::Indexable(IndexableValue::Int(1))) + } + _ => { + panic!("should parse"); + } + } + } + + #[test] + fn test_parse_db_type() { + assert!(matches!(parse_db_type("INT").expect("should parse").1, DbType::Int)); + assert!(matches!(parse_db_type("STRING").expect("should parse").1, DbType::String)); + assert!(matches!(parse_db_type("UUID").expect("should parse").1, DbType::Uuid)); + assert!(matches!(parse_db_type("NUMBER").expect("should parse").1, DbType::Number)); + assert!(matches!(parse_db_type("Unknown"), Err(_))); + } +} \ No newline at end of file diff --git a/parser/src/core.rs b/parser/src/core.rs new file mode 100644 index 0000000..778973d --- /dev/null +++ b/parser/src/core.rs @@ -0,0 +1,41 @@ +use minisql::{operation::Operation, schema::TableSchema}; +use nom::{branch::alt, multi::many0, IResult}; + +use crate::{create::parse_create, delete::parse_delete, index::parse_create_index, insert::parse_insert, select::parse_select, validation::{validate_operation, ValidationError}}; + +#[derive(Debug)] +pub enum Error { + ParsingError(String), + ValidationError(ValidationError) +} + +pub fn parse_statement<'a>(input: &'a str) -> IResult<&str, Operation> { + alt(( + parse_insert, + parse_create, + parse_delete, + //parse_drop, + parse_select, + // parse_update, + parse_create_index + ))(input) +} + +pub fn parse_statements<'a>(input: &'a str) -> IResult<&str, Vec> { + many0(parse_statement)(input) +} + +pub fn parse_and_validate(query: String, db_metadata: &Vec<(String, &TableSchema)>) -> Result { + let (_, op) = parse_statement(query.as_str()) + .map_err(|err| { + Error::ParsingError(err.to_string()) + })?; + + validate_operation(&op, db_metadata).map_err(|err| Error::ValidationError(err))?; + Ok(op) +} + +// #[test] +// fn test_select() { +// parse_and_validate("SELECT * FROM users;".to_string(), &Vec::new()).unwrap(); +// } \ No newline at end of file diff --git a/parser/src/create.rs b/parser/src/create.rs new file mode 100644 index 0000000..4cda32d --- /dev/null +++ b/parser/src/create.rs @@ -0,0 +1,109 @@ +use minisql::{operation::Operation, schema::{ColumnName, TableSchema}, type_system::DbType}; +use nom::{ + bytes::complete::tag, + character::complete::{char, multispace0, multispace1}, + multi::separated_list0, + sequence::terminated, + IResult, combinator::opt, +}; + +use crate::common::{parse_table_name, parse_identifier, parse_db_type}; + +pub fn parse_create(input: &str) -> IResult<&str, Operation> { + let (input, _) = tag("CREATE")(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("TABLE")(input)?; + let (input, _) = multispace1(input)?; + let (input, table_name) = parse_table_name(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char('(')(input)?; + let (input, _) = multispace0(input)?; + let (input, column_definitions) = parse_column_definitions(input)?; + let mut column_name_position_mapping = Vec::new(); + let mut types: Vec = Vec::new(); + let mut primary_key = None; + for (position, (column_name, db_type, pk)) in column_definitions.iter().enumerate() { + types.push(db_type.clone()); + if *pk { + primary_key = Some(position); + } + column_name_position_mapping.push((column_name.clone(), position)); + } + + let (input, _) = char(')')(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(';')(input)?; + let schema = TableSchema::new( + table_name.to_string(), + primary_key.unwrap_or_default(), + column_name_position_mapping, + types + ); + Ok(( + input, + Operation::CreateTable(table_name.to_string(), schema), + )) +} + +pub fn parse_column_definitions(input: &str) -> IResult<&str, Vec<(ColumnName, DbType, bool)>> { + separated_list0(terminated(char(','), multispace0), parse_column_definition)(input) +} + +fn parse_primary_key(input: &str) -> IResult<&str, &str> { + let (input, _) = multispace1(input)?; + let (input, _) = tag("PRIMARY")(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("KEY")(input)?; + Ok((input, "PRIMARY KEY")) +} + +pub fn parse_column_definition(input: &str) -> IResult<&str, (ColumnName, DbType, bool)> { + let (input, identifier) = parse_identifier(input)?; + let (input, _) = multispace1(input)?; + let (input, db_type) = parse_db_type(input)?; + let (input, pk) = opt(parse_primary_key)(input).map(|(input, pk)| (input, pk.is_some()))?; + let (input, _) = multispace0(input)?; + Ok((input, (identifier.to_string(), db_type, pk))) +} + +#[cfg(test)] +mod tests { + use minisql::operation::Operation; + use crate::create::parse_create; + + #[test] + fn test_parse_create_no_spaces() { + parse_create("CREATE TABLE \"Table1\"(id UUID ,column1 INT);").expect("should parse"); + } + + #[test] + fn test_parse_create_primary_key() { + parse_create("CREATE TABLE \"Table1\"(id UUID PRIMARY KEY,column1 INT);").expect("should parse"); + } + + #[test] + fn test_parse_create_no_quotes_table_name() { + parse_create("CREATE TABLE Table1(id UUID PRIMARY KEY,column1 INT);").expect("should parse"); + } + + #[test] + fn test_parse_create_primary_key_with_spaces() { + parse_create("CREATE TABLE \"Table1\" ( id UUID PRIMARY KEY , column1 INT ) ;").expect("should parse"); + } + + #[test] + fn test_parse_create() { + let (_, create) = parse_create("CREATE TABLE \"Table1\"( id UUID , column1 INT );").expect("should parse"); + assert!(matches!(create, Operation::CreateTable(_ ,_))); + match create { + Operation::CreateTable(name, schema) => { + assert_eq!(name, "Table1"); + assert_eq!(schema.number_of_columns(), 2); + assert_eq!(schema.column_position_from_column_name(&"id".to_string()).unwrap(), 0); + assert_eq!(schema.column_position_from_column_name(&"column1".to_string()).unwrap(), 1); + } + _ => {} + } + + } +} \ No newline at end of file diff --git a/parser/src/delete.rs b/parser/src/delete.rs new file mode 100644 index 0000000..e1f5a4c --- /dev/null +++ b/parser/src/delete.rs @@ -0,0 +1,38 @@ +use minisql::operation::Operation; +use nom::{ + bytes::complete::tag, + character::complete::{char, multispace0, multispace1}, + IResult, +}; + +use crate::common::{parse_table_name, parse_condition}; + +pub fn parse_delete(input: &str) -> IResult<&str, Operation> { + let (input, _) = tag("DELETE")(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("FROM")(input)?; + let (input, _) = multispace1(input)?; + let (input, table_name) = parse_table_name(input)?; + let (input, _) = multispace0(input)?; + let (input, condition) = parse_condition(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(';')(input)?; + Ok(( + input, + Operation::Delete(table_name.to_string(), condition), + )) +} + +#[cfg(test)] +mod tests { + use minisql::operation::Operation; + use crate::delete::parse_delete; + + #[test] + fn test_parse_delete() { + let (_, operation) = parse_delete("DELETE FROM \"T1\" WHERE id = 1 ;").expect("should parse"); + assert!(matches!(operation, Operation::Delete(_, _))) + } + +// TODO: add test with condition +} \ No newline at end of file diff --git a/parser/src/index.rs b/parser/src/index.rs new file mode 100644 index 0000000..80f6a96 --- /dev/null +++ b/parser/src/index.rs @@ -0,0 +1,70 @@ +use minisql::operation::Operation; +use nom::{ + bytes::complete::tag, + character::complete::{char, multispace0, multispace1}, + IResult, combinator::opt, +}; + +use crate::common::{parse_identifier, parse_table_name}; + +pub fn parse_create_index(input: &str) -> IResult<&str, Operation> { + let (input, _) = tag("CREATE")(input)?; + let unique = |input| -> IResult<&str, bool> { + let (input, _) = multispace1(input)?; + let (input, _) = tag("UNIQUE")(input)?; + Ok((input, true)) + }; + let (input, _) = opt(unique)(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("INDEX")(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = parse_identifier(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("ON")(input)?; + let (input, _) = multispace1(input)?; + let (input, table_name) = parse_table_name(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char('(')(input)?; + let (input, _) = multispace0(input)?; + let (input, column_name) = parse_identifier(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(')')(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(';')(input)?; + let operation = Operation::CreateIndex(table_name.to_string(), column_name.to_string()); + Ok((input, operation)) +} + + +#[cfg(test)] +mod tests { + use minisql::operation::Operation; + use crate::index::parse_create_index; + + + #[test] + fn test_create_index() { + let (_, operation) = parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" (email);").expect("should parse"); + assert!(matches!(operation, Operation::CreateIndex(_, _))); + match operation { + Operation::CreateIndex(table_name, column_name) => { + assert_eq!(table_name, "contacts"); + assert_eq!(column_name, "email"); + } + _ => {} + } + } + + #[test] + fn test_create_index_with_spaces() { + let (_, operation) = parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" ( email ) ;").expect("should parse"); + assert!(matches!(operation, Operation::CreateIndex(_, _))); + match operation { + Operation::CreateIndex(table_name, column_name) => { + assert_eq!(table_name, "contacts"); + assert_eq!(column_name, "email"); + } + _ => {} + } + } +} \ No newline at end of file diff --git a/parser/src/insert.rs b/parser/src/insert.rs new file mode 100644 index 0000000..340e6bf --- /dev/null +++ b/parser/src/insert.rs @@ -0,0 +1,94 @@ +use crate::{literal::parse_db_value, common::{parse_table_name, parse_identifier}}; +use minisql::{operation::Operation, type_system::Value}; +use nom::{ + bytes::complete::tag, + character::complete::{multispace0, multispace1, char}, + combinator::map, + multi::separated_list0, + sequence::terminated, + IResult, +}; + +pub fn parse_insert(input: &str) -> IResult<&str, Operation> { + let (input, _) = tag("INSERT")(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("INTO")(input)?; + let (input, _) = multispace1(input)?; + let (input, table_name) = parse_table_name(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = char('(')(input)?; + let (input, _) = multispace0(input)?; + let (input, column_names) = parse_columns(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(')')(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("VALUES")(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char('(')(input)?; + let (input, _) = multispace0(input)?; + let (input, values) = parse_values(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(')')(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(';')(input)?; + Ok(( + input, + Operation::Insert(table_name.to_string(), column_names.into_iter().zip(values).collect()), + )) +} + +pub fn parse_columns(input: &str) -> IResult<&str, Vec> { + separated_list0(terminated(char(','), multispace0), map(parse_identifier, |name|name.to_string()))(input) +} + +pub fn parse_values(input: &str) -> IResult<&str, Vec> { + separated_list0(terminated(char(','), multispace0), parse_db_value)(input) +} + + +#[cfg(test)] +mod tests { + use minisql::{operation::Operation, type_system::{IndexableValue, Value}}; + + use super::parse_insert; + + + #[test] + fn test_parse_insert() { + let sql = "INSERT INTO \"MyTable\" (id, data) VALUES(1, \"Text\");"; + let operation = parse_insert(sql).expect("should parse"); + match operation { + ("", Operation::Insert(table_name, insertion_values)) => { + assert_eq!(table_name, "MyTable"); + assert_eq!( + insertion_values, + vec![ + ("id".to_string(), Value::Indexable(IndexableValue::Int(1))), + ("data".to_string(), Value::Indexable(IndexableValue::String("Text".to_string()))) + ]); + } + _ => { + unreachable!() + } + } + } + + #[test] + fn test_parse_insert_with_spaces() { + let sql = "INSERT INTO \"MyTable\" ( id, data ) VALUES ( 1, \"Text\" ) ;"; + let operation = parse_insert(sql).expect("should parse"); + match operation { + ("", Operation::Insert(table_name, insertion_values)) => { + assert_eq!(table_name, "MyTable"); + assert_eq!(insertion_values, + vec![ + ("id".to_string(), Value::Indexable(IndexableValue::Int(1))), + ("data".to_string(), Value::Indexable(IndexableValue::String("Text".to_string()))) + ]); + } + _ => { + unreachable!() + } + } + } +} \ No newline at end of file diff --git a/parser/src/lib.rs b/parser/src/lib.rs new file mode 100644 index 0000000..1a4cb72 --- /dev/null +++ b/parser/src/lib.rs @@ -0,0 +1,16 @@ + +mod literal; +mod select; +mod common; +mod create; +mod insert; +mod delete; +mod index; +mod validation; +mod core; + +pub use core::parse_and_validate; +pub use core::Error; +pub use validation::validate_operation; + +pub use minisql; \ No newline at end of file diff --git a/parser/src/literal.rs b/parser/src/literal.rs new file mode 100644 index 0000000..2552e1e --- /dev/null +++ b/parser/src/literal.rs @@ -0,0 +1,164 @@ +use minisql::type_system::{IndexableValue, Value}; +use nom::{ + branch::alt, + character::complete::{u64, char, digit1, none_of}, + combinator::opt, + multi::many0, + sequence::{delimited, pair, preceded}, + IResult, error::make_error +}; + +pub fn parse_db_value(input: &str) -> IResult<&str, Value> { + alt(( + parse_string, + parse_number, + parse_int, + parse_uuid, + ))(input) +} + +pub fn parse_number(input: &str) -> IResult<&str, Value> { + // Parse the integer part + let (input, (sign, digits)) = pair(opt(char('-')), digit1)(input)?; + + // Parse the fractional part + let (input, frac_part) = opt(pair(char('.'), digit1))(input)?; + + match frac_part { + Some((_fsign, fdigits)) => { + // Combine integer and fractional parts + let combined_parts = format!( + "{}{}", + format!("{}{}", sign.unwrap_or('+'), digits), + format!(".{}", fdigits) + ); + // Parse the combined parts as a floating-point number + let value = combined_parts.parse::() + .map_err(|_| { + nom::Err::Failure(make_error(input, nom::error::ErrorKind::Fail)) + })?; + Ok((input, Value::Number(value))) + } + None => { + let value = format!("{}{}", sign.unwrap_or('+'), digits).parse::() + .map_err(|_| { + nom::Err::Failure(make_error(input, nom::error::ErrorKind::Fail)) + })?; + Ok((input, Value::Indexable(IndexableValue::Int(value)))) + } + } +} + +pub fn parse_int(input: &str) -> IResult<&str, Value> { + u64(input).map(|(input, v)| { + (input, Value::Indexable(IndexableValue::Int(v))) + }) +} + +fn escape_tab(input:&str) -> IResult<&str, char> { + let (input, _) = preceded(char('\\'), char('t'))(input)?; + Ok((input, '\t')) +} + +fn escape_backslash(input:&str) -> IResult<&str, char> { + let (input, _) = preceded(char('\\'), char('\\'))(input)?; + Ok((input, '\\')) +} + +fn escape_newline(input:&str) -> IResult<&str, char> { + let (input, _) = preceded(char('\\'), char('n'))(input)?; + Ok((input, '\n')) +} + +fn escape_carriegereturn(input:&str) -> IResult<&str, char> { + let (input, _) = preceded(char('\\'), char('r'))(input)?; + Ok((input, '\r')) +} + +fn escape_doublequote(input:&str) -> IResult<&str, char> { + preceded(char('\\'), char('"'))(input) +} + +pub fn parse_string(input: &str) -> IResult<&str, Value> { + // Parse the content inside the double quotes + let (input, content) = delimited( + char('"'), + many0(alt(( + escape_backslash, + escape_carriegereturn, + escape_newline, + escape_doublequote, + escape_tab, + none_of(r#"\""#) + ))), + char('"'), + )(input)?; + + // Combine the characters into a string + let value: String = content.into_iter().collect(); + + Ok((input, Value::Indexable(IndexableValue::String(value)))) +} + +fn parse_uuid(input: &str) -> IResult<&str, Value> { + // TODO: make it actually uuid + u64(input).map(|(input, v)| { + (input, Value::Indexable(IndexableValue::Uuid(v))) + }) +} + +#[cfg(test)] +mod tests { + use minisql::type_system::{IndexableValue, Value}; + use crate::literal::{parse_db_value, parse_string}; + + + #[test] + fn test_string_parser() { + assert_eq!(parse_string(r#""simple""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("simple")))))); + assert_eq!(parse_string(r#""\"\t\r\n\\""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("\"\t\r\n\\")))))); + assert_eq!(parse_string(r#""name is \"John\".""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("name is \"John\".")))))); + } + + #[test] + fn test_parse_db_value() { + let (input, value) = parse_db_value("5").expect("should parse"); + assert_eq!(input, ""); + assert_eq!(value, Value::Indexable(IndexableValue::Int(5))); + + let (input, value) = parse_db_value("5.5").expect("should parse"); + assert_eq!(input, ""); + assert_eq!(value, Value::Number(5.5)); + + let (_, _) = parse_db_value("\"STRING\"").expect("should parse"); + let (input, value) = parse_db_value("\"abcdefghkjklmnopqrstuvwxyz!@#$%^&*()_+ \"").expect("should parse"); + assert_eq!(input, ""); + assert_eq!(value, Value::Indexable(IndexableValue::String("abcdefghkjklmnopqrstuvwxyz!@#$%^&*()_+ ".to_string()))); + + } + + + #[test] + fn test_parse_positive_float() { + assert_eq!(parse_db_value("23.213313"), Ok(("", Value::Number(23.213313)))); + assert_eq!(parse_db_value("2241.9734"), Ok(("", Value::Number(2241.9734)))); + } + + #[test] + fn test_parse_negative_float() { + assert_eq!(parse_db_value("-9241.873654"), Ok(("", Value::Number(-9241.873654)))); + assert_eq!(parse_db_value("-62625.0"), Ok(("", Value::Number(-62625.0)))); + } + + #[test] + fn test_parse_float_between_0_and_1() { + assert_eq!(parse_db_value("0.873654"), Ok(("", Value::Number(0.873654)))); + assert_eq!(parse_db_value("0.62625"), Ok(("", Value::Number(0.62625)))); + } + + + #[test] + fn test_parse_int() { + assert_eq!(parse_db_value("5134616"), Ok(("", Value::Indexable(IndexableValue::Int(5134616))))); + } +} \ No newline at end of file diff --git a/parser/src/select.rs b/parser/src/select.rs new file mode 100644 index 0000000..2c6ab71 --- /dev/null +++ b/parser/src/select.rs @@ -0,0 +1,122 @@ +use crate::common::{parse_table_name, parse_column_name, parse_condition}; +use minisql::operation::{ColumnSelection, Operation}; +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{multispace0, multispace1, char}, + combinator::map, + error::Error, + multi::separated_list0, + sequence::terminated, + IResult, +}; + +pub fn parse_select(input: &str) -> IResult<&str, Operation> { + let (input, _) = tag("SELECT")(input)?; + let (input, _) = multispace1(input)?; + + let (input, column_selection) = try_parse_column_selection(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = tag("FROM")(input)?; + let (input, _) = multispace1(input)?; + let (input, table_name) = parse_table_name(input)?; + let (input, _) = multispace0(input)?; + let (input, condition) = parse_condition(input)?; + let (input, _) = multispace0(input)?; + // TODO: make it optional? + let (input, _) = tag(";")(input)?; + Ok(( + input, + Operation::Select(table_name.to_string(), column_selection, condition), + )) +} + +pub fn try_parse_column_selection(input: &str) -> IResult<&str, ColumnSelection> { + let all_parser = map(tag::<&str, &str, Error<&str>>("*"), |_| { + ColumnSelection::All + }); + let columns_parser = map( + separated_list0(terminated(char(','), multispace0), parse_column_name), + |names| ColumnSelection::Columns(names), + ); + alt((all_parser, columns_parser))(input) +} + +#[cfg(test)] +mod tests { + use minisql::operation::{ColumnSelection, Operation}; + use crate::{common::{parse_column_name, parse_table_name}, select::parse_select}; + + + #[test] + fn test_parse_select_all() { + let sql = "SELECT * FROM \"MyTable\";"; + let operation = parse_select(sql).expect("should parse"); + match operation { + ("", Operation::Select(table_name, column_selection, maybe_condition)) => { + assert_eq!(table_name, "MyTable"); + assert!(matches!(column_selection, ColumnSelection::All)); + assert!(matches!(maybe_condition, None)); + } + (input, _) => { + println!("Input to be parsed: {}", input); + panic!("expected select operation") + } + } + } + + #[test] + fn test_parse_column_name() { + parse_column_name("1abc").expect_err("variable names should not start with number"); + } + + #[test] + fn test_parse_table_name() { + parse_table_name("\"\"").expect_err("Empty table names are not allowed"); + } + + #[test] + fn test_parse_select_columns() { + let sql = "SELECT name , email FROM \"AddressBook\" ;"; + let operation = parse_select(sql).expect("should parse"); + match operation { + ("", Operation::Select(table_name, column_selection, maybe_condition)) => { + assert_eq!(table_name, "AddressBook"); + assert!(matches!(column_selection, ColumnSelection::Columns(_))); + match column_selection { + ColumnSelection::Columns(column_names) => { + assert_eq!(column_names, vec!["name", "email"]); + } + _ => { + panic!("should select columns") + } + } + assert!(matches!(maybe_condition, None)); + } + (input, _) => { + println!("Input to be parsed: {}", input); + panic!("expected select operation") + } + } + } + + #[test] + fn test_parse_select_where() { + use minisql::operation::Condition; + let sql = "SELECT * FROM \"AddressBook\" WHERE id = 5 ;"; + let operation = parse_select(sql).expect("should parse"); + match operation { + ("", Operation::Select(table_name, column_selection, maybe_condition)) => { + assert_eq!(table_name, "AddressBook"); + assert!(matches!(column_selection, ColumnSelection::All)); + assert!(matches!(maybe_condition, Some(Condition::Eq(_, _)))); + } + (input, _) => { + println!("Input to be parsed: {}", input); + panic!("expected select operation") + } + } + } + // TODO: a test with multiple statements + // TODO: allow underscores in identifiers +} \ No newline at end of file diff --git a/parser/src/validation.rs b/parser/src/validation.rs new file mode 100644 index 0000000..2be6dce --- /dev/null +++ b/parser/src/validation.rs @@ -0,0 +1,206 @@ + +use std::collections::HashSet; + +use minisql::{operation::{ColumnSelection, Condition, InsertionValues, Operation}, schema::TableSchema, type_system::{DbType, IndexableValue, Value}}; + + +#[derive(Debug)] +pub enum ValidationError { + TableDoesNotExist(String), + TableExists(String), + ColumnDoesNotExist(String), + BadColumnPosition(usize), + DuplicateColumn(String), + TypeMismatch, + ValueForRequiredColumnIsMissing(String) +} + +pub fn type_of(value: &Value) -> DbType { + match value { + Value::Indexable(IndexableValue::Int(_)) => DbType::Int, + Value::Indexable(IndexableValue::String(_)) => DbType::String, + Value::Number(_) => DbType::Number, + Value::Indexable(IndexableValue::Uuid(_)) => DbType::Uuid + } +} + +/// Validates the operation based on db_metadata +pub fn validate_operation(operation: &Operation, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + match operation { + Operation::Select(table_name, column_selection, condition) => { + validate_select(table_name, column_selection, condition, db_metadata)?; + }, + Operation::Insert(table_name, insertion_values) => { + validate_insert(&table_name, insertion_values, db_metadata)?; + }, + Operation::Delete(table_name, condition) => { + validate_delete(table_name, condition, db_metadata)?; + }, + // Operation::Update(table_name, insertion_values, condition) => { + // validate_update(table_name, insertion_values, db_metadata)?; + // }, + Operation::CreateTable(table_name, schema) => { + validate_create(table_name, schema, db_metadata)?; + }, + Operation::CreateIndex(table_name, column_name) => { + validate_create_index(table_name, column_name, db_metadata)?; + }, + // Operation::DropTable(table_name) => { + // validate_drop(table_name, db_metadata)?; + // } + } + Ok(()) +} + +// pub fn validate_drop(table_name: &str, db_metadata: &Vec<(String, TableSchema)>) -> Result<(), ValidationError> { +// db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) +// .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; +// Ok(()) +// } + +pub fn validate_create(table_name: &str, schema: &TableSchema, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + if db_metadata.iter().find(|(tname, _)| table_name.eq(tname)).is_some() { + return Err(ValidationError::TableExists(table_name.to_string())); + } + let mut column_names = HashSet::new(); + for (name, _) in &schema.column_name_position_mapping { + if column_names.contains(name) { + return Err(ValidationError::DuplicateColumn(name.clone())); + } else { + column_names.insert(name.clone()); + } + } + + // TODO: Ensure it has a primary key?? + Ok(()) +} + +pub fn validate_select(table_name: &str, column_selection: &ColumnSelection, condition: &Option, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) + .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; + match column_selection { + ColumnSelection::Columns(columns) => { + columns.iter().find(|c| { + !schema.column_name_position_mapping.contains_left(*c) + }).map_or_else(||Ok(()), |c| Err(ValidationError::ColumnDoesNotExist(c.to_string())))?; + } + _ => {} + } + validate_condition(condition, schema)?; + Ok(()) +} + +// pub fn validate_update(table_name: &str, insertion_values: &InsertionValues, db_metadata: &Vec<(String, TableSchema)>) -> Result<(), ValidationError> { +// let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) +// .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; +// let mut column_names = HashSet::new(); +// // Find duplicate columns +// for (name, _) in insertion_values { +// if column_names.contains(name) { +// return Err(ValidationError::DuplicateColumn(name.clone())); +// } else { +// column_names.insert(name.clone()); +// } +// } +// // Ensure columns exist in schema +// let column_value_type: Vec<_> = insertion_values.iter().map(|(column, value)| { +// (column, value, schema.column_name_position_mapping.iter().find(|(name, _) | { +// (*name).eq(column) +// }).map(|(_, t)| schema.types.get(*t as usize))) +// }).collect(); +// if let Some((name, _, _)) = column_value_type.iter().find(|(_, _, t)| { +// t.is_none() +// }) { +// return Err(ValidationError::ColumnDoesNotExist((*name).clone())); +// } + +// // Check types +// if let Some((_, _, _)) = column_value_type.iter().find(|(_, value, t)| { +// if let Some(Some(column_type)) = t { +// !type_of(value).eq(column_type) +// } else { +// false +// } +// }) { +// // TODO: Add column name information +// return Err(ValidationError::TypeMismatch); +// } +// Ok(()) +// } + +pub fn validate_insert(table_name: &str, insertion_values: &InsertionValues, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) + .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; + let inserted_columns: HashSet = HashSet::from_iter(insertion_values.iter().map(|(name, _)| name.clone())); + // TODO: primary key is not required + for (column_name, _) in &schema.column_name_position_mapping { + if !inserted_columns.contains(column_name) { + return Err(ValidationError::ValueForRequiredColumnIsMissing(column_name.clone())) + } + } + // Ensure columns exist in schema + let column_value_type: Vec<_> = insertion_values.iter().map(|(column, value)| { + (column, value, schema.column_name_position_mapping.iter().find(|(name, _) | { + (*name).eq(column) + }).map(|(_, t)| schema.types.get(*t as usize))) + }).collect(); + if let Some((name, _, _)) = column_value_type.iter().find(|(_, _, t)| { + match t { + Some(Some(_)) => false, + _ => true + } + }) { + return Err(ValidationError::ColumnDoesNotExist((*name).clone())); + } + + // Check types + if let Some((_, _, _)) = column_value_type.iter().find(|(_, value, t)| { + if let Some(Some(t)) = t { + !type_of(value).eq(t) + } else { + false + } + }) { + // TODO: Add column name information + return Err(ValidationError::TypeMismatch); + } + Ok(()) +} + +pub fn validate_delete(table_name: &str, condition: &Option, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) + .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; + validate_condition(condition, schema)?; + Ok(()) +} + +fn validate_condition(condition: &Option, schema: &TableSchema) -> Result<(), ValidationError> { + match condition { + Some(c) => { + match c { + Condition::Eq(left, right) => { + let position = schema.column_name_position_mapping.get_by_left(left) + .ok_or(ValidationError::ColumnDoesNotExist(left.clone()))?; + let column_type = schema.types.get(*position as usize) + .ok_or(ValidationError::BadColumnPosition(*position))?; + if !column_type.eq(&type_of(right)) { + return Err(ValidationError::TypeMismatch); + } + } + } + } + None => {} + } + Ok(()) +} + +fn validate_create_index(table_name: &str, column_name: &str, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + // Ensure table exists + let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) + .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; + // Ensure column exists + if !schema.column_name_position_mapping.contains_left(column_name) { + return Err(ValidationError::ColumnDoesNotExist(column_name.to_string())); + } + Ok(()) +} \ No newline at end of file diff --git a/server/Cargo.toml b/server/Cargo.toml index bca61ec..f542cc7 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -8,4 +8,6 @@ edition = "2021" [dependencies] tokio = { version = "1.35.1", features = ["full"] } anyhow = "1.0.76" -proto = { path = "../proto" } \ No newline at end of file +proto = { path = "../proto" } +minisql = { path = "../minisql" } +parser = { path = "../parser" } diff --git a/server/src/main.rs b/server/src/main.rs index bda6dfd..d256374 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -1,3 +1,5 @@ +use minisql::interpreter::State; +use parser::{parse_and_validate, Error}; use proto::handshake::response::HandshakeResponse; use proto::handshake::server::do_server_handshake; use proto::message::backend::{ @@ -38,6 +40,7 @@ async fn handle_stream(mut stream: TcpStream) -> anyhow::Result<()> { let request = do_server_handshake(&mut writer, &mut reader, response).await?; println!("Handshake complete:\n{request:?}"); + let mut state = State::new(); loop { println!("Waiting for next message"); @@ -50,16 +53,25 @@ async fn handle_stream(mut stream: TcpStream) -> anyhow::Result<()> { } FrontendMessage::Query(data) => { println!("Received Query: {:?}", data); - if data.query.as_str().contains("car") { - println!("Sending error message"); - send_error_response(&mut writer, "Car not found").await?; - } else if data.query.as_str().to_lowercase().contains("select") { - println!("Sending table"); - send_query_response(&mut writer).await?; - } else { - println!("Sending empty query"); - send_empty_query(&mut writer).await?; - } + let metadata = state.metadata(); + match parse_and_validate(data.query.as_str().to_string(), &metadata) { + Ok(operation) => { + match state.interpret(operation) { + Ok(_) => { + send_query_response(&mut writer).await?; + } + Err(err) => { + send_error_response(&mut writer, &format!("error interpreting: {:?}", err)).await?; + } + } + }, + Err(Error::ParsingError(err)) => { + send_error_response(&mut writer, &format!("parsing error: {:?}", err)).await?; + } + Err(Error::ValidationError(v)) => { + send_error_response(&mut writer, &format!("validation error: {:?}", v)).await?; + } + }; send_ready_for_query(&mut writer).await?; } } From 6000b1f2423bfab31dd47478fc7dee6a6c349bca Mon Sep 17 00:00:00 2001 From: Yuriy Dupyn <2153100+omedusyo@users.noreply.github.com> Date: Fri, 26 Jan 2024 19:45:15 +0100 Subject: [PATCH 2/8] Put parsing details into one module --- parser/src/core.rs | 4 ++-- parser/src/lib.rs | 10 ++-------- parser/src/{ => parsing}/common.rs | 6 +++--- parser/src/{ => parsing}/create.rs | 6 +++--- parser/src/{ => parsing}/delete.rs | 6 +++--- parser/src/{ => parsing}/index.rs | 6 +++--- parser/src/{ => parsing}/insert.rs | 4 ++-- parser/src/{ => parsing}/literal.rs | 4 ++-- parser/src/parsing/mod.rs | 7 +++++++ parser/src/{ => parsing}/select.rs | 6 +++--- 10 files changed, 30 insertions(+), 29 deletions(-) rename parser/src/{ => parsing}/common.rs (96%) rename parser/src/{ => parsing}/create.rs (97%) rename parser/src/{ => parsing}/delete.rs (90%) rename parser/src/{ => parsing}/index.rs (95%) rename parser/src/{ => parsing}/insert.rs (98%) rename parser/src/{ => parsing}/literal.rs (98%) create mode 100644 parser/src/parsing/mod.rs rename parser/src/{ => parsing}/select.rs (96%) diff --git a/parser/src/core.rs b/parser/src/core.rs index 778973d..1f97797 100644 --- a/parser/src/core.rs +++ b/parser/src/core.rs @@ -1,7 +1,7 @@ use minisql::{operation::Operation, schema::TableSchema}; use nom::{branch::alt, multi::many0, IResult}; -use crate::{create::parse_create, delete::parse_delete, index::parse_create_index, insert::parse_insert, select::parse_select, validation::{validate_operation, ValidationError}}; +use crate::{parsing::{create::parse_create, delete::parse_delete, index::parse_create_index, insert::parse_insert, select::parse_select}, validation::{validate_operation, ValidationError}}; #[derive(Debug)] pub enum Error { @@ -38,4 +38,4 @@ pub fn parse_and_validate(query: String, db_metadata: &Vec<(String, &TableSchema // #[test] // fn test_select() { // parse_and_validate("SELECT * FROM users;".to_string(), &Vec::new()).unwrap(); -// } \ No newline at end of file +// } diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 1a4cb72..94b121b 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -1,11 +1,5 @@ -mod literal; -mod select; -mod common; -mod create; -mod insert; -mod delete; -mod index; +mod parsing; mod validation; mod core; @@ -13,4 +7,4 @@ pub use core::parse_and_validate; pub use core::Error; pub use validation::validate_operation; -pub use minisql; \ No newline at end of file +pub use minisql; diff --git a/parser/src/common.rs b/parser/src/parsing/common.rs similarity index 96% rename from parser/src/common.rs rename to parser/src/parsing/common.rs index 0cd90c1..23fb866 100644 --- a/parser/src/common.rs +++ b/parser/src/parsing/common.rs @@ -8,7 +8,7 @@ use nom::{ }; use minisql::{operation::Condition, type_system::DbType}; -use crate::literal::parse_db_value; +use super::literal::parse_db_value; pub fn parse_table_name(input: &str) -> IResult<&str, &str> { alt(( @@ -68,7 +68,7 @@ fn parse_equality(input: &str) -> IResult<&str, Condition> { #[cfg(test)] mod tests { use minisql::{operation::Condition, type_system::DbType}; - use crate::common::{parse_db_type, parse_equality}; + use crate::parsing::common::{parse_db_type, parse_equality}; #[test] fn test_parse_equality() { @@ -92,4 +92,4 @@ mod tests { assert!(matches!(parse_db_type("NUMBER").expect("should parse").1, DbType::Number)); assert!(matches!(parse_db_type("Unknown"), Err(_))); } -} \ No newline at end of file +} diff --git a/parser/src/create.rs b/parser/src/parsing/create.rs similarity index 97% rename from parser/src/create.rs rename to parser/src/parsing/create.rs index 4cda32d..94e3538 100644 --- a/parser/src/create.rs +++ b/parser/src/parsing/create.rs @@ -7,7 +7,7 @@ use nom::{ IResult, combinator::opt, }; -use crate::common::{parse_table_name, parse_identifier, parse_db_type}; +use super::common::{parse_table_name, parse_identifier, parse_db_type}; pub fn parse_create(input: &str) -> IResult<&str, Operation> { let (input, _) = tag("CREATE")(input)?; @@ -69,7 +69,7 @@ pub fn parse_column_definition(input: &str) -> IResult<&str, (ColumnName, DbType #[cfg(test)] mod tests { use minisql::operation::Operation; - use crate::create::parse_create; + use crate::parsing::create::parse_create; #[test] fn test_parse_create_no_spaces() { @@ -106,4 +106,4 @@ mod tests { } } -} \ No newline at end of file +} diff --git a/parser/src/delete.rs b/parser/src/parsing/delete.rs similarity index 90% rename from parser/src/delete.rs rename to parser/src/parsing/delete.rs index e1f5a4c..af71cec 100644 --- a/parser/src/delete.rs +++ b/parser/src/parsing/delete.rs @@ -5,7 +5,7 @@ use nom::{ IResult, }; -use crate::common::{parse_table_name, parse_condition}; +use super::common::{parse_table_name, parse_condition}; pub fn parse_delete(input: &str) -> IResult<&str, Operation> { let (input, _) = tag("DELETE")(input)?; @@ -26,7 +26,7 @@ pub fn parse_delete(input: &str) -> IResult<&str, Operation> { #[cfg(test)] mod tests { use minisql::operation::Operation; - use crate::delete::parse_delete; + use crate::parsing::delete::parse_delete; #[test] fn test_parse_delete() { @@ -35,4 +35,4 @@ mod tests { } // TODO: add test with condition -} \ No newline at end of file +} diff --git a/parser/src/index.rs b/parser/src/parsing/index.rs similarity index 95% rename from parser/src/index.rs rename to parser/src/parsing/index.rs index 80f6a96..3130a6b 100644 --- a/parser/src/index.rs +++ b/parser/src/parsing/index.rs @@ -5,7 +5,7 @@ use nom::{ IResult, combinator::opt, }; -use crate::common::{parse_identifier, parse_table_name}; +use super::common::{parse_identifier, parse_table_name}; pub fn parse_create_index(input: &str) -> IResult<&str, Operation> { let (input, _) = tag("CREATE")(input)?; @@ -39,7 +39,7 @@ pub fn parse_create_index(input: &str) -> IResult<&str, Operation> { #[cfg(test)] mod tests { use minisql::operation::Operation; - use crate::index::parse_create_index; + use crate::parsing::index::parse_create_index; #[test] @@ -67,4 +67,4 @@ mod tests { _ => {} } } -} \ No newline at end of file +} diff --git a/parser/src/insert.rs b/parser/src/parsing/insert.rs similarity index 98% rename from parser/src/insert.rs rename to parser/src/parsing/insert.rs index 340e6bf..365be68 100644 --- a/parser/src/insert.rs +++ b/parser/src/parsing/insert.rs @@ -1,4 +1,4 @@ -use crate::{literal::parse_db_value, common::{parse_table_name, parse_identifier}}; +use super::{literal::parse_db_value, common::{parse_table_name, parse_identifier}}; use minisql::{operation::Operation, type_system::Value}; use nom::{ bytes::complete::tag, @@ -91,4 +91,4 @@ mod tests { } } } -} \ No newline at end of file +} diff --git a/parser/src/literal.rs b/parser/src/parsing/literal.rs similarity index 98% rename from parser/src/literal.rs rename to parser/src/parsing/literal.rs index 2552e1e..44e7910 100644 --- a/parser/src/literal.rs +++ b/parser/src/parsing/literal.rs @@ -110,7 +110,7 @@ fn parse_uuid(input: &str) -> IResult<&str, Value> { #[cfg(test)] mod tests { use minisql::type_system::{IndexableValue, Value}; - use crate::literal::{parse_db_value, parse_string}; + use crate::parsing::literal::{parse_db_value, parse_string}; #[test] @@ -161,4 +161,4 @@ mod tests { fn test_parse_int() { assert_eq!(parse_db_value("5134616"), Ok(("", Value::Indexable(IndexableValue::Int(5134616))))); } -} \ No newline at end of file +} diff --git a/parser/src/parsing/mod.rs b/parser/src/parsing/mod.rs new file mode 100644 index 0000000..482deb4 --- /dev/null +++ b/parser/src/parsing/mod.rs @@ -0,0 +1,7 @@ +pub(crate) mod literal; +pub(crate) mod select; +pub(crate) mod common; +pub(crate) mod create; +pub(crate) mod insert; +pub(crate) mod delete; +pub(crate) mod index; diff --git a/parser/src/select.rs b/parser/src/parsing/select.rs similarity index 96% rename from parser/src/select.rs rename to parser/src/parsing/select.rs index 2c6ab71..c3c4292 100644 --- a/parser/src/select.rs +++ b/parser/src/parsing/select.rs @@ -1,4 +1,4 @@ -use crate::common::{parse_table_name, parse_column_name, parse_condition}; +use super::common::{parse_table_name, parse_column_name, parse_condition}; use minisql::operation::{ColumnSelection, Operation}; use nom::{ branch::alt, @@ -45,7 +45,7 @@ pub fn try_parse_column_selection(input: &str) -> IResult<&str, ColumnSelection> #[cfg(test)] mod tests { use minisql::operation::{ColumnSelection, Operation}; - use crate::{common::{parse_column_name, parse_table_name}, select::parse_select}; + use crate::parsing::{common::{parse_column_name, parse_table_name}, select::parse_select}; #[test] @@ -119,4 +119,4 @@ mod tests { } // TODO: a test with multiple statements // TODO: allow underscores in identifiers -} \ No newline at end of file +} From 677fd19bece270ad89513d7862c3aa8501880ffb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20Moravec?= Date: Sat, 27 Jan 2024 16:34:23 +0100 Subject: [PATCH 3/8] fix: NUMBER type name --- parser/src/parsing/common.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parser/src/parsing/common.rs b/parser/src/parsing/common.rs index 23fb866..f71d587 100644 --- a/parser/src/parsing/common.rs +++ b/parser/src/parsing/common.rs @@ -32,12 +32,12 @@ pub fn parse_column_name(input: &str) -> IResult<&str, String> { } pub fn parse_db_type(input: &str) -> IResult<&str, DbType> { - let (input, type_name) = alt((tag("STRING"), tag("INT"), tag("Float"), tag("UUID")))(input)?; + let (input, type_name) = alt((tag("STRING"), tag("INT"), tag("NUMBER"), tag("UUID")))(input)?; let db_type = match type_name { "STRING" => DbType::String, "INT" => DbType::Int, "UUID" => DbType::Uuid, - "Float" => DbType::Number, + "NUMBER" => DbType::Number, _ => return Err(nom::Err::Failure(make_error(input, nom::error::ErrorKind::IsNot))) }; Ok((input, db_type)) From 5ced11c40d0fab7f952a163fb5303ec34cf48e78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20Moravec?= Date: Sat, 27 Jan 2024 16:34:44 +0100 Subject: [PATCH 4/8] feat: integrate thiserror --- parser/src/core.rs | 9 ++++++--- parser/src/validation.rs | 10 +++++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/parser/src/core.rs b/parser/src/core.rs index 1f97797..248c5c6 100644 --- a/parser/src/core.rs +++ b/parser/src/core.rs @@ -1,12 +1,15 @@ use minisql::{operation::Operation, schema::TableSchema}; use nom::{branch::alt, multi::many0, IResult}; +use thiserror::Error; use crate::{parsing::{create::parse_create, delete::parse_delete, index::parse_create_index, insert::parse_insert, select::parse_select}, validation::{validate_operation, ValidationError}}; -#[derive(Debug)] +#[derive(Debug, Error)] pub enum Error { + #[error("parsing error: {0}")] ParsingError(String), - ValidationError(ValidationError) + #[error("validation error: {0}")] + ValidationError(#[from] ValidationError) } pub fn parse_statement<'a>(input: &'a str) -> IResult<&str, Operation> { @@ -31,7 +34,7 @@ pub fn parse_and_validate(query: String, db_metadata: &Vec<(String, &TableSchema Error::ParsingError(err.to_string()) })?; - validate_operation(&op, db_metadata).map_err(|err| Error::ValidationError(err))?; + validate_operation(&op, db_metadata)?; Ok(op) } diff --git a/parser/src/validation.rs b/parser/src/validation.rs index 2be6dce..795e04f 100644 --- a/parser/src/validation.rs +++ b/parser/src/validation.rs @@ -1,17 +1,25 @@ use std::collections::HashSet; +use thiserror::Error; use minisql::{operation::{ColumnSelection, Condition, InsertionValues, Operation}, schema::TableSchema, type_system::{DbType, IndexableValue, Value}}; -#[derive(Debug)] +#[derive(Debug, Error)] pub enum ValidationError { + #[error("table {0} does not exist")] TableDoesNotExist(String), + #[error("table {0} already exists")] TableExists(String), + #[error("column {0} does not exist")] ColumnDoesNotExist(String), + #[error("bad column position {0}")] BadColumnPosition(usize), + #[error("duplicate column {0}")] DuplicateColumn(String), + #[error("type mismatch")] TypeMismatch, + #[error("value for required column {0} is missing")] ValueForRequiredColumnIsMissing(String) } From 9999d67b8f98240af2f193d0967a86aa62db3398 Mon Sep 17 00:00:00 2001 From: Yuriy Dupyn <2153100+omedusyo@users.noreply.github.com> Date: Sat, 27 Jan 2024 17:14:11 +0100 Subject: [PATCH 5/8] Cleanup validation.rs --- minisql/src/schema.rs | 19 ++++ parser/src/validation.rs | 210 +++++++++++++++++++++------------------ 2 files changed, 134 insertions(+), 95 deletions(-) diff --git a/minisql/src/schema.rs b/minisql/src/schema.rs index ba9759a..c56b4da 100644 --- a/minisql/src/schema.rs +++ b/minisql/src/schema.rs @@ -36,6 +36,24 @@ impl TableSchema { self.types[column_position] } + pub fn get_columns(&self) -> Vec<&ColumnName> { + self.column_name_position_mapping.iter().map(|(name, _)| name).collect() + } + + pub fn does_column_exist(&self, column_name: &ColumnName) -> bool { + self.column_name_position_mapping.contains_left(column_name) + } + + pub fn get_column_position(&self, column_name: &ColumnName) -> Option { + self.column_name_position_mapping.get_by_left(column_name).copied() + } + + pub fn get_type_at(&self, column_name: &ColumnName) -> Option { + let position = self.get_column_position(column_name)?; + self.types.get(position).copied() + } + + // TODO: Get rid of this after validation is merged fn get_column(&self, column_name: &ColumnName) -> DbResult<(DbType, ColumnPosition)> { match self.column_name_position_mapping.get_by_left(column_name) { Some(column_position) => match self.types.get(*column_position) { @@ -52,6 +70,7 @@ impl TableSchema { } } + // TODO: Get rid of this after validation is merged pub fn column_position_from_column_name( &self, column_name: &ColumnName, diff --git a/parser/src/validation.rs b/parser/src/validation.rs index 2be6dce..d978c0c 100644 --- a/parser/src/validation.rs +++ b/parser/src/validation.rs @@ -1,98 +1,107 @@ use std::collections::HashSet; -use minisql::{operation::{ColumnSelection, Condition, InsertionValues, Operation}, schema::TableSchema, type_system::{DbType, IndexableValue, Value}}; +use minisql::{operation::{ColumnSelection, Condition, InsertionValues, Operation}, schema::{TableSchema, ColumnName, TableName}, type_system::DbType}; #[derive(Debug)] pub enum ValidationError { - TableDoesNotExist(String), - TableExists(String), - ColumnDoesNotExist(String), - BadColumnPosition(usize), + TableDoesNotExist(TableName), + TableAlreadyExists(TableName), + ColumnsDoNotExist(Vec), DuplicateColumn(String), - TypeMismatch, - ValueForRequiredColumnIsMissing(String) + TypeMismatch(TypeMismatch), + RequiredColumnsAreMissing(Vec) } -pub fn type_of(value: &Value) -> DbType { - match value { - Value::Indexable(IndexableValue::Int(_)) => DbType::Int, - Value::Indexable(IndexableValue::String(_)) => DbType::String, - Value::Number(_) => DbType::Number, - Value::Indexable(IndexableValue::Uuid(_)) => DbType::Uuid - } +#[derive(Debug)] +pub struct TypeMismatch { + pub column_name: ColumnName, + pub received_type: DbType, + pub expected_type: DbType, } +pub type DbSchema<'a> = Vec<(TableName, &'a TableSchema)>; + /// Validates the operation based on db_metadata -pub fn validate_operation(operation: &Operation, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { +pub fn validate_operation(operation: &Operation, db_schema: &DbSchema) -> Result<(), ValidationError> { match operation { Operation::Select(table_name, column_selection, condition) => { - validate_select(table_name, column_selection, condition, db_metadata)?; + validate_select(table_name, column_selection, condition, db_schema)?; }, Operation::Insert(table_name, insertion_values) => { - validate_insert(&table_name, insertion_values, db_metadata)?; + validate_insert(&table_name, insertion_values, db_schema)?; }, Operation::Delete(table_name, condition) => { - validate_delete(table_name, condition, db_metadata)?; + validate_delete(table_name, condition, db_schema)?; }, // Operation::Update(table_name, insertion_values, condition) => { // validate_update(table_name, insertion_values, db_metadata)?; // }, Operation::CreateTable(table_name, schema) => { - validate_create(table_name, schema, db_metadata)?; + validate_create(table_name, schema, db_schema)?; }, Operation::CreateIndex(table_name, column_name) => { - validate_create_index(table_name, column_name, db_metadata)?; + validate_create_index(table_name, column_name, db_schema)?; }, // Operation::DropTable(table_name) => { - // validate_drop(table_name, db_metadata)?; + // validate_drop(table_name, db_schema)?; // } } Ok(()) } +fn validate_table_exists<'a>(db_schema: &DbSchema<'a>, table_name: &'a TableName) -> Result<&'a TableSchema, ValidationError> { + db_schema.iter().find(|(tname, _)| table_name.eq(tname)) + .ok_or(ValidationError::TableDoesNotExist(table_name.to_string())) + .map(|(_, table_schema)| table_schema).copied() +} + + // pub fn validate_drop(table_name: &str, db_metadata: &Vec<(String, TableSchema)>) -> Result<(), ValidationError> { // db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) // .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; // Ok(()) // } -pub fn validate_create(table_name: &str, schema: &TableSchema, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { - if db_metadata.iter().find(|(tname, _)| table_name.eq(tname)).is_some() { - return Err(ValidationError::TableExists(table_name.to_string())); - } - let mut column_names = HashSet::new(); - for (name, _) in &schema.column_name_position_mapping { - if column_names.contains(name) { - return Err(ValidationError::DuplicateColumn(name.clone())); - } else { - column_names.insert(name.clone()); - } +pub fn validate_create(table_name: &TableName, schema: &TableSchema, db_schema: &DbSchema) -> Result<(), ValidationError> { + if let Some(_) = get_table_schema(db_schema, table_name) { + return Err(ValidationError::TableAlreadyExists(table_name.to_string())); } + find_first_duplicate(&schema.get_columns()) + .map_or_else( + || Ok(()), + |duplicate_column| Err(ValidationError::DuplicateColumn(duplicate_column.to_string())) + )?; + // TODO: Ensure it has a primary key?? Ok(()) } -pub fn validate_select(table_name: &str, column_selection: &ColumnSelection, condition: &Option, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { - let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) - .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; +pub fn validate_select(table_name: &TableName, column_selection: &ColumnSelection, condition: &Option, db_schema: &Vec<(TableName, &TableSchema)>) -> Result<(), ValidationError> { + let schema = validate_table_exists(db_schema, table_name)?; match column_selection { ColumnSelection::Columns(columns) => { - columns.iter().find(|c| { - !schema.column_name_position_mapping.contains_left(*c) - }).map_or_else(||Ok(()), |c| Err(ValidationError::ColumnDoesNotExist(c.to_string())))?; + let non_existant_columns: Vec = + columns.iter().filter_map(|column| + if schema.does_column_exist(&column) { + Some(column.clone()) + } else { + None + }).collect(); + if non_existant_columns.len() > 0 { + Err(ValidationError::ColumnsDoNotExist(non_existant_columns)) + } else { + validate_condition(condition, schema) + } } - _ => {} + ColumnSelection::All => Ok(()) } - validate_condition(condition, schema)?; - Ok(()) } // pub fn validate_update(table_name: &str, insertion_values: &InsertionValues, db_metadata: &Vec<(String, TableSchema)>) -> Result<(), ValidationError> { -// let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) -// .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; +// let schema = validate_table_exists(db_schema, table_name)?; // let mut column_names = HashSet::new(); // // Find duplicate columns // for (name, _) in insertion_values { @@ -111,7 +120,7 @@ pub fn validate_select(table_name: &str, column_selection: &ColumnSelection, con // if let Some((name, _, _)) = column_value_type.iter().find(|(_, _, t)| { // t.is_none() // }) { -// return Err(ValidationError::ColumnDoesNotExist((*name).clone())); +// return Err(ValidationError::ColumnsDoNotExist(vec![(*name).clone())]); // } // // Check types @@ -128,63 +137,56 @@ pub fn validate_select(table_name: &str, column_selection: &ColumnSelection, con // Ok(()) // } -pub fn validate_insert(table_name: &str, insertion_values: &InsertionValues, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { - let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) - .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; - let inserted_columns: HashSet = HashSet::from_iter(insertion_values.iter().map(|(name, _)| name.clone())); - // TODO: primary key is not required - for (column_name, _) in &schema.column_name_position_mapping { - if !inserted_columns.contains(column_name) { - return Err(ValidationError::ValueForRequiredColumnIsMissing(column_name.clone())) - } +pub fn validate_insert(table_name: &TableName, insertion_values: &InsertionValues, db_schema: &DbSchema) -> Result<(), ValidationError> { + let schema = validate_table_exists(db_schema, table_name)?; + + // Check for duplicate columns in insertion_values. + let columns_in_query_vec: Vec<&ColumnName> = insertion_values.iter().map(|(column_name, _)| column_name).collect(); + find_first_duplicate(&columns_in_query_vec) + .map_or_else( + || Ok(()), + |duplicate_column| Err(ValidationError::DuplicateColumn(duplicate_column.to_string())) + )?; + + // Check that the set of columns in the insertion_values is the same as the set of required columns of the table. + let columns_in_query: HashSet<&ColumnName> = HashSet::from_iter(columns_in_query_vec); + let columns_in_schema: HashSet<&ColumnName> = HashSet::from_iter(schema.get_columns()); + let non_existant_columns = Vec::from_iter(columns_in_query.difference(&columns_in_schema)); + if non_existant_columns.len() > 0 { + return Err(ValidationError::ColumnsDoNotExist(non_existant_columns.iter().map(|str| str.to_string()).collect())); } - // Ensure columns exist in schema - let column_value_type: Vec<_> = insertion_values.iter().map(|(column, value)| { - (column, value, schema.column_name_position_mapping.iter().find(|(name, _) | { - (*name).eq(column) - }).map(|(_, t)| schema.types.get(*t as usize))) - }).collect(); - if let Some((name, _, _)) = column_value_type.iter().find(|(_, _, t)| { - match t { - Some(Some(_)) => false, - _ => true - } - }) { - return Err(ValidationError::ColumnDoesNotExist((*name).clone())); + let missing_required_columns = Vec::from_iter(columns_in_schema.difference(&columns_in_query)); + if missing_required_columns.len() > 0 { + return Err(ValidationError::RequiredColumnsAreMissing(missing_required_columns.iter().map(|str| str.to_string()).collect())); } // Check types - if let Some((_, _, _)) = column_value_type.iter().find(|(_, value, t)| { - if let Some(Some(t)) = t { - !type_of(value).eq(t) - } else { - false + for (column_name, value) in insertion_values { + let expected_type = schema.get_type_at(column_name).ok_or(ValidationError::ColumnsDoNotExist(vec![column_name.to_string()]))?; // By the previous validation steps this is never gonna trigger an error. + let value_type = value.to_type(); + if value_type != expected_type { + return Err(ValidationError::TypeMismatch(TypeMismatch { column_name: column_name.to_string(), received_type: value_type, expected_type })); } - }) { - // TODO: Add column name information - return Err(ValidationError::TypeMismatch); } + Ok(()) } -pub fn validate_delete(table_name: &str, condition: &Option, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { - let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) - .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; +pub fn validate_delete(table_name: &TableName, condition: &Option, db_schema: &DbSchema) -> Result<(), ValidationError> { + let schema = validate_table_exists(db_schema, table_name)?; validate_condition(condition, schema)?; Ok(()) } fn validate_condition(condition: &Option, schema: &TableSchema) -> Result<(), ValidationError> { match condition { - Some(c) => { - match c { - Condition::Eq(left, right) => { - let position = schema.column_name_position_mapping.get_by_left(left) - .ok_or(ValidationError::ColumnDoesNotExist(left.clone()))?; - let column_type = schema.types.get(*position as usize) - .ok_or(ValidationError::BadColumnPosition(*position))?; - if !column_type.eq(&type_of(right)) { - return Err(ValidationError::TypeMismatch); + Some(condition) => { + match condition { + Condition::Eq(column_name, value) => { + let expected_type: DbType = schema.get_type_at(column_name).ok_or(ValidationError::ColumnsDoNotExist(vec![column_name.to_string()]))?; + let value_type: DbType = value.to_type(); + if !expected_type.eq(&value_type) { + return Err(ValidationError::TypeMismatch(TypeMismatch { column_name: column_name.to_string(), received_type: value_type, expected_type })); } } } @@ -194,13 +196,31 @@ fn validate_condition(condition: &Option, schema: &TableSchema) -> Re Ok(()) } -fn validate_create_index(table_name: &str, column_name: &str, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { - // Ensure table exists - let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) - .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; - // Ensure column exists - if !schema.column_name_position_mapping.contains_left(column_name) { - return Err(ValidationError::ColumnDoesNotExist(column_name.to_string())); +fn validate_create_index(table_name: &TableName, column_name: &ColumnName, db_schema: &DbSchema) -> Result<(), ValidationError> { + let schema = validate_table_exists(db_schema, table_name)?; + if schema.does_column_exist(column_name) { + Ok(()) + } else { + Err(ValidationError::ColumnsDoNotExist(vec![column_name.to_string()])) } - Ok(()) -} \ No newline at end of file +} + +// ===Helpers=== +fn find_first_duplicate(xs: &[A]) -> Option<&A> +where A: Eq + std::hash::Hash +{ + let mut already_seen_elements: HashSet<&A> = HashSet::new(); + for x in xs { + if already_seen_elements.contains(x) { + return Some(x); + } else { + already_seen_elements.insert(&x); + } + } + None +} + +fn get_table_schema<'a>(db_schema: &DbSchema<'a>, table_name: &'a TableName) -> Option<&'a TableSchema> { + let (_, table_schema) = db_schema.iter().find(|(tname, _)| table_name.eq(tname))?; + Some(table_schema) +} From 4e5959a53a5c1852c50a52a799de51e0ba120955 Mon Sep 17 00:00:00 2001 From: Yuriy Dupyn <2153100+omedusyo@users.noreply.github.com> Date: Sat, 27 Jan 2024 17:26:00 +0100 Subject: [PATCH 6/8] Fix formatting of validation errors --- parser/src/validation.rs | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/parser/src/validation.rs b/parser/src/validation.rs index 870b11a..2c20c24 100644 --- a/parser/src/validation.rs +++ b/parser/src/validation.rs @@ -11,29 +11,20 @@ pub enum ValidationError { TableDoesNotExist(TableName), #[error("table {0} already exists")] TableAlreadyExists(TableName), - // TODO - // #[error("columns {0} do not exist")] - #[error("columns do not exist")] + #[error("columns {0:?} do not exist")] ColumnsDoNotExist(Vec), #[error("duplicate column {0}")] DuplicateColumn(ColumnName), - // TODO: You need to actually print the error message - #[error("type mismatch")] - TypeMismatch(TypeMismatch), - // TODO - // #[error("values for required columns {0} are missing")] - #[error("values for required columns are missing")] + #[error("type mismatch at column `{column_name:?}` (expected {expected_type:?}, found {received_type:?})")] + TypeMismatch { + column_name: ColumnName, + received_type: DbType, + expected_type: DbType, + }, + #[error("values for required columns {0:?} are missing")] RequiredColumnsAreMissing(Vec) } -// TODO: Add derive(Error) -#[derive(Debug)] -pub struct TypeMismatch { - pub column_name: ColumnName, - pub received_type: DbType, - pub expected_type: DbType, -} - pub type DbSchema<'a> = Vec<(TableName, &'a TableSchema)>; /// Validates the operation based on db_metadata @@ -178,7 +169,7 @@ pub fn validate_insert(table_name: &TableName, insertion_values: &InsertionValue let expected_type = schema.get_type_at(column_name).ok_or(ValidationError::ColumnsDoNotExist(vec![column_name.to_string()]))?; // By the previous validation steps this is never gonna trigger an error. let value_type = value.to_type(); if value_type != expected_type { - return Err(ValidationError::TypeMismatch(TypeMismatch { column_name: column_name.to_string(), received_type: value_type, expected_type })); + return Err(ValidationError::TypeMismatch { column_name: column_name.to_string(), received_type: value_type, expected_type }); } } @@ -199,7 +190,7 @@ fn validate_condition(condition: &Option, schema: &TableSchema) -> Re let expected_type: DbType = schema.get_type_at(column_name).ok_or(ValidationError::ColumnsDoNotExist(vec![column_name.to_string()]))?; let value_type: DbType = value.to_type(); if !expected_type.eq(&value_type) { - return Err(ValidationError::TypeMismatch(TypeMismatch { column_name: column_name.to_string(), received_type: value_type, expected_type })); + return Err(ValidationError::TypeMismatch { column_name: column_name.to_string(), received_type: value_type, expected_type }); } } } From cf76cc4d10e269408b79f141976e70e6d0ec4e58 Mon Sep 17 00:00:00 2001 From: Yuriy Dupyn <2153100+omedusyo@users.noreply.github.com> Date: Sat, 27 Jan 2024 18:11:12 +0100 Subject: [PATCH 7/8] Restore schema fields to private --- minisql/src/schema.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/minisql/src/schema.rs b/minisql/src/schema.rs index c56b4da..e029606 100644 --- a/minisql/src/schema.rs +++ b/minisql/src/schema.rs @@ -12,8 +12,8 @@ use std::collections::HashMap; pub struct TableSchema { table_name: TableName, // used for descriptive errors primary_key: ColumnPosition, - pub column_name_position_mapping: BiMap, - pub types: Vec, + column_name_position_mapping: BiMap, + types: Vec, } pub type TableName = String; From 464c0b6698215b0c56bd4682fc82ef7eedffca4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20Moravec?= Date: Sat, 27 Jan 2024 18:47:43 +0100 Subject: [PATCH 8/8] fix: parse uuid with 'u' prefix --- parser/src/parsing/literal.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/parser/src/parsing/literal.rs b/parser/src/parsing/literal.rs index 44e7910..13d2b83 100644 --- a/parser/src/parsing/literal.rs +++ b/parser/src/parsing/literal.rs @@ -100,17 +100,17 @@ pub fn parse_string(input: &str) -> IResult<&str, Value> { Ok((input, Value::Indexable(IndexableValue::String(value)))) } -fn parse_uuid(input: &str) -> IResult<&str, Value> { - // TODO: make it actually uuid - u64(input).map(|(input, v)| { +pub fn parse_uuid(input: &str) -> IResult<&str, Value> { + let (input, value) = pair(char('u'), u64)(input).map(|(input, (_, v))| { (input, Value::Indexable(IndexableValue::Uuid(v))) - }) + })?; + Ok((input, value)) } #[cfg(test)] mod tests { use minisql::type_system::{IndexableValue, Value}; - use crate::parsing::literal::{parse_db_value, parse_string}; + use crate::parsing::literal::{parse_db_value, parse_string, parse_uuid}; #[test] @@ -161,4 +161,9 @@ mod tests { fn test_parse_int() { assert_eq!(parse_db_value("5134616"), Ok(("", Value::Indexable(IndexableValue::Int(5134616))))); } + + #[test] + fn test_parse_uuid() { + assert_eq!(parse_uuid("u131515"), Ok(("", Value::Indexable(IndexableValue::Uuid(131515))))) + } }