diff --git a/Cargo.lock b/Cargo.lock index a284cd2..5a3a6e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -77,9 +77,9 @@ version = "0.1.74" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 2.0.41", ] [[package]] @@ -184,9 +184,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" dependencies = [ "heck", - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 2.0.41", ] [[package]] @@ -200,6 +200,8 @@ name = "client" version = "0.1.0" dependencies = [ "anyhow", + "minisql", + "parser", "proto", "tokio", ] @@ -261,6 +263,12 @@ version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "minisql" version = "0.1.0" @@ -290,6 +298,38 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "nom" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" +dependencies = [ + "memchr", + "version_check", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nom-peg" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3fefa2869e8c4f92ee5135cbeba457eebf1f24e188616bcbd334abb51be6a3" +dependencies = [ + "nom 4.2.3", + "proc-macro2 0.4.30", + "quote 0.6.13", + "syn 0.15.44", +] + [[package]] name = "num_cpus" version = "1.16.0" @@ -332,6 +372,17 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "parser" +version = "0.1.0" +dependencies = [ + "bimap", + "minisql", + "nom 7.1.3", + "nom-peg", + "thiserror", +] + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -344,6 +395,15 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "proc-macro2" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +dependencies = [ + "unicode-xid", +] + [[package]] name = "proc-macro2" version = "1.0.70" @@ -363,13 +423,22 @@ dependencies = [ "tokio", ] +[[package]] +name = "quote" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +dependencies = [ + "proc-macro2 0.4.30", +] + [[package]] name = "quote" version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ - "proc-macro2", + "proc-macro2 1.0.70", ] [[package]] @@ -438,9 +507,9 @@ version = "1.0.193" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 2.0.41", ] [[package]] @@ -487,14 +556,25 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "syn" +version = "0.15.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "unicode-xid", +] + [[package]] name = "syn" version = "2.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" dependencies = [ - "proc-macro2", - "quote", + "proc-macro2 1.0.70", + "quote 1.0.33", "unicode-ident", ] @@ -513,9 +593,9 @@ version = "1.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 2.0.41", ] [[package]] @@ -543,9 +623,9 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ - "proc-macro2", - "quote", - "syn", + "proc-macro2 1.0.70", + "quote 1.0.33", + "syn 2.0.41", ] [[package]] @@ -554,12 +634,24 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" + [[package]] name = "utf8parse" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" + [[package]] name = "virtue" version = "0.0.13" diff --git a/Cargo.toml b/Cargo.toml index 714e4bf..57d0219 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,5 +4,6 @@ members = [ "minisql", "proto", "server", - "client" + "client", + "parser" ] diff --git a/client/Cargo.toml b/client/Cargo.toml index 9cf09e6..a8012c6 100644 --- a/client/Cargo.toml +++ b/client/Cargo.toml @@ -8,4 +8,6 @@ edition = "2021" [dependencies] tokio = { version = "1.35.1", features = ["full"] } anyhow = "1.0.76" -proto = { path = "../proto" } \ No newline at end of file +proto = { path = "../proto" } +minisql = { path = "../minisql" } +parser = { path = "../parser" } diff --git a/client/src/main.rs b/client/src/main.rs index e77ea5c..fc845e9 100644 --- a/client/src/main.rs +++ b/client/src/main.rs @@ -31,7 +31,7 @@ async fn main() -> anyhow::Result<()> { query: "SELECT * FROM users;".to_string().into(), })).await?; writer.flush().await?; - + let mut line = String::new(); loop { let msg: BackendMessage = reader.read_proto().await?; match msg { @@ -46,7 +46,17 @@ async fn main() -> anyhow::Result<()> { }, BackendMessage::ReadyForQuery(data) => { println!("Ready for query: {:?}", data); - break; + line.clear(); + let res = std::io::stdin().read_line(&mut line); + if let Ok(_) = res { + if line.eq("exit") { + break; + } + writer.write_proto(FrontendMessage::Query(QueryData { + query: line.clone().into(), + })).await?; + writer.flush().await?; + } }, m => { println!("Unexpected message: {:?}", m); diff --git a/minisql/src/interpreter.rs b/minisql/src/interpreter.rs index a6c1235..90e4f02 100644 --- a/minisql/src/interpreter.rs +++ b/minisql/src/interpreter.rs @@ -58,6 +58,16 @@ impl State { } } + /// TODO: return a reference to avoid allocations + pub fn metadata<'a>(&'a self) -> Vec<(String, &'a TableSchema)> { + let mut m = Vec::new(); + for (name, pos) in &self.table_name_position_mapping { + let table_schema = self.tables.get(*pos).unwrap().schema(); + m.push((name.clone(), table_schema)); + } + m + } + fn table_from_name<'a>(&'a self, table_name: &TableName) -> DbResult<&'a Table> { match self.table_name_position_mapping.get_by_left(table_name) { Some(table_position) => { @@ -654,4 +664,4 @@ pub fn example() { println!("{:?}", response); println!(); } -} \ No newline at end of file +} diff --git a/minisql/src/schema.rs b/minisql/src/schema.rs index 4f2bf54..ba9759a 100644 --- a/minisql/src/schema.rs +++ b/minisql/src/schema.rs @@ -12,8 +12,8 @@ use std::collections::HashMap; pub struct TableSchema { table_name: TableName, // used for descriptive errors primary_key: ColumnPosition, - column_name_position_mapping: BiMap, - types: Vec, + pub column_name_position_mapping: BiMap, + pub types: Vec, } pub type TableName = String; @@ -113,7 +113,7 @@ impl TableSchema { } } - fn number_of_columns(&self) -> usize { + pub fn number_of_columns(&self) -> usize { self.column_name_position_mapping.len() } diff --git a/minisql/src/type_system.rs b/minisql/src/type_system.rs index 5f1a0ca..8b95d7b 100644 --- a/minisql/src/type_system.rs +++ b/minisql/src/type_system.rs @@ -1,7 +1,7 @@ use crate::error::TypeConversionError; // ==============Types================ -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum DbType { String, Int, diff --git a/parser/Cargo.toml b/parser/Cargo.toml new file mode 100644 index 0000000..ff211a0 --- /dev/null +++ b/parser/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "parser" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +nom-peg = "0.1.1" +nom = "7.1.3" +minisql = { path = "../minisql" } +bimap = "0.6.3" +thiserror = "1" \ No newline at end of file diff --git a/parser/src/common.rs b/parser/src/common.rs new file mode 100644 index 0000000..0cd90c1 --- /dev/null +++ b/parser/src/common.rs @@ -0,0 +1,95 @@ +use nom::{ + character::complete::{alphanumeric1, char, multispace0, anychar, multispace1}, + combinator::peek, + error::make_error, + sequence::{delimited, terminated}, + bytes::complete::tag, + IResult, branch::alt, +}; +use minisql::{operation::Condition, type_system::DbType}; + +use crate::literal::parse_db_value; + +pub fn parse_table_name(input: &str) -> IResult<&str, &str> { + alt(( + delimited(char('"'), alphanumeric1, char('"')), + parse_identifier + ))(input) +} + +pub fn parse_identifier(input: &str) -> IResult<&str, &str> { + // TODO: allow underscores + let (_, first) = peek(anychar)(input)?; + if first.is_alphabetic() { + alphanumeric1(input) + } else { + Err(nom::Err::Error(make_error(input, nom::error::ErrorKind::Alpha))) + } +} + +pub fn parse_column_name(input: &str) -> IResult<&str, String> { + terminated(parse_identifier, multispace0)(input).map(|(rest, name)| (rest, name.to_string())) +} + +pub fn parse_db_type(input: &str) -> IResult<&str, DbType> { + let (input, type_name) = alt((tag("STRING"), tag("INT"), tag("Float"), tag("UUID")))(input)?; + let db_type = match type_name { + "STRING" => DbType::String, + "INT" => DbType::Int, + "UUID" => DbType::Uuid, + "Float" => DbType::Number, + _ => return Err(nom::Err::Failure(make_error(input, nom::error::ErrorKind::IsNot))) + }; + Ok((input, db_type)) +} + +pub fn parse_condition(input: &str) -> IResult<&str, Option> { + match tag::<&str, &str, nom::error::Error<&str>>("WHERE")(input) { + Ok((input, _)) => { + let (input, _) = multispace1(input)?; + let (input, condition) = parse_equality(input)?; + Ok((input, Some(condition))) + } + Err(_) => { + Ok((input, None)) + } + } +} + +fn parse_equality(input: &str) -> IResult<&str, Condition> { + let (input, column_name) = parse_column_name(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char('=')(input)?; + let (input, _) = multispace0(input)?; + let (input, db_value) = parse_db_value(input)?; + Ok((input, Condition::Eq(column_name, db_value))) +} + +#[cfg(test)] +mod tests { + use minisql::{operation::Condition, type_system::DbType}; + use crate::common::{parse_db_type, parse_equality}; + + #[test] + fn test_parse_equality() { + use minisql::type_system::{IndexableValue, Value}; + match parse_equality("id = 1") { + Ok(("", Condition::Eq(column_name, value))) => { + assert!(column_name.eq("id")); + assert_eq!(value, Value::Indexable(IndexableValue::Int(1))) + } + _ => { + panic!("should parse"); + } + } + } + + #[test] + fn test_parse_db_type() { + assert!(matches!(parse_db_type("INT").expect("should parse").1, DbType::Int)); + assert!(matches!(parse_db_type("STRING").expect("should parse").1, DbType::String)); + assert!(matches!(parse_db_type("UUID").expect("should parse").1, DbType::Uuid)); + assert!(matches!(parse_db_type("NUMBER").expect("should parse").1, DbType::Number)); + assert!(matches!(parse_db_type("Unknown"), Err(_))); + } +} \ No newline at end of file diff --git a/parser/src/core.rs b/parser/src/core.rs new file mode 100644 index 0000000..778973d --- /dev/null +++ b/parser/src/core.rs @@ -0,0 +1,41 @@ +use minisql::{operation::Operation, schema::TableSchema}; +use nom::{branch::alt, multi::many0, IResult}; + +use crate::{create::parse_create, delete::parse_delete, index::parse_create_index, insert::parse_insert, select::parse_select, validation::{validate_operation, ValidationError}}; + +#[derive(Debug)] +pub enum Error { + ParsingError(String), + ValidationError(ValidationError) +} + +pub fn parse_statement<'a>(input: &'a str) -> IResult<&str, Operation> { + alt(( + parse_insert, + parse_create, + parse_delete, + //parse_drop, + parse_select, + // parse_update, + parse_create_index + ))(input) +} + +pub fn parse_statements<'a>(input: &'a str) -> IResult<&str, Vec> { + many0(parse_statement)(input) +} + +pub fn parse_and_validate(query: String, db_metadata: &Vec<(String, &TableSchema)>) -> Result { + let (_, op) = parse_statement(query.as_str()) + .map_err(|err| { + Error::ParsingError(err.to_string()) + })?; + + validate_operation(&op, db_metadata).map_err(|err| Error::ValidationError(err))?; + Ok(op) +} + +// #[test] +// fn test_select() { +// parse_and_validate("SELECT * FROM users;".to_string(), &Vec::new()).unwrap(); +// } \ No newline at end of file diff --git a/parser/src/create.rs b/parser/src/create.rs new file mode 100644 index 0000000..4cda32d --- /dev/null +++ b/parser/src/create.rs @@ -0,0 +1,109 @@ +use minisql::{operation::Operation, schema::{ColumnName, TableSchema}, type_system::DbType}; +use nom::{ + bytes::complete::tag, + character::complete::{char, multispace0, multispace1}, + multi::separated_list0, + sequence::terminated, + IResult, combinator::opt, +}; + +use crate::common::{parse_table_name, parse_identifier, parse_db_type}; + +pub fn parse_create(input: &str) -> IResult<&str, Operation> { + let (input, _) = tag("CREATE")(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("TABLE")(input)?; + let (input, _) = multispace1(input)?; + let (input, table_name) = parse_table_name(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char('(')(input)?; + let (input, _) = multispace0(input)?; + let (input, column_definitions) = parse_column_definitions(input)?; + let mut column_name_position_mapping = Vec::new(); + let mut types: Vec = Vec::new(); + let mut primary_key = None; + for (position, (column_name, db_type, pk)) in column_definitions.iter().enumerate() { + types.push(db_type.clone()); + if *pk { + primary_key = Some(position); + } + column_name_position_mapping.push((column_name.clone(), position)); + } + + let (input, _) = char(')')(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(';')(input)?; + let schema = TableSchema::new( + table_name.to_string(), + primary_key.unwrap_or_default(), + column_name_position_mapping, + types + ); + Ok(( + input, + Operation::CreateTable(table_name.to_string(), schema), + )) +} + +pub fn parse_column_definitions(input: &str) -> IResult<&str, Vec<(ColumnName, DbType, bool)>> { + separated_list0(terminated(char(','), multispace0), parse_column_definition)(input) +} + +fn parse_primary_key(input: &str) -> IResult<&str, &str> { + let (input, _) = multispace1(input)?; + let (input, _) = tag("PRIMARY")(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("KEY")(input)?; + Ok((input, "PRIMARY KEY")) +} + +pub fn parse_column_definition(input: &str) -> IResult<&str, (ColumnName, DbType, bool)> { + let (input, identifier) = parse_identifier(input)?; + let (input, _) = multispace1(input)?; + let (input, db_type) = parse_db_type(input)?; + let (input, pk) = opt(parse_primary_key)(input).map(|(input, pk)| (input, pk.is_some()))?; + let (input, _) = multispace0(input)?; + Ok((input, (identifier.to_string(), db_type, pk))) +} + +#[cfg(test)] +mod tests { + use minisql::operation::Operation; + use crate::create::parse_create; + + #[test] + fn test_parse_create_no_spaces() { + parse_create("CREATE TABLE \"Table1\"(id UUID ,column1 INT);").expect("should parse"); + } + + #[test] + fn test_parse_create_primary_key() { + parse_create("CREATE TABLE \"Table1\"(id UUID PRIMARY KEY,column1 INT);").expect("should parse"); + } + + #[test] + fn test_parse_create_no_quotes_table_name() { + parse_create("CREATE TABLE Table1(id UUID PRIMARY KEY,column1 INT);").expect("should parse"); + } + + #[test] + fn test_parse_create_primary_key_with_spaces() { + parse_create("CREATE TABLE \"Table1\" ( id UUID PRIMARY KEY , column1 INT ) ;").expect("should parse"); + } + + #[test] + fn test_parse_create() { + let (_, create) = parse_create("CREATE TABLE \"Table1\"( id UUID , column1 INT );").expect("should parse"); + assert!(matches!(create, Operation::CreateTable(_ ,_))); + match create { + Operation::CreateTable(name, schema) => { + assert_eq!(name, "Table1"); + assert_eq!(schema.number_of_columns(), 2); + assert_eq!(schema.column_position_from_column_name(&"id".to_string()).unwrap(), 0); + assert_eq!(schema.column_position_from_column_name(&"column1".to_string()).unwrap(), 1); + } + _ => {} + } + + } +} \ No newline at end of file diff --git a/parser/src/delete.rs b/parser/src/delete.rs new file mode 100644 index 0000000..e1f5a4c --- /dev/null +++ b/parser/src/delete.rs @@ -0,0 +1,38 @@ +use minisql::operation::Operation; +use nom::{ + bytes::complete::tag, + character::complete::{char, multispace0, multispace1}, + IResult, +}; + +use crate::common::{parse_table_name, parse_condition}; + +pub fn parse_delete(input: &str) -> IResult<&str, Operation> { + let (input, _) = tag("DELETE")(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("FROM")(input)?; + let (input, _) = multispace1(input)?; + let (input, table_name) = parse_table_name(input)?; + let (input, _) = multispace0(input)?; + let (input, condition) = parse_condition(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(';')(input)?; + Ok(( + input, + Operation::Delete(table_name.to_string(), condition), + )) +} + +#[cfg(test)] +mod tests { + use minisql::operation::Operation; + use crate::delete::parse_delete; + + #[test] + fn test_parse_delete() { + let (_, operation) = parse_delete("DELETE FROM \"T1\" WHERE id = 1 ;").expect("should parse"); + assert!(matches!(operation, Operation::Delete(_, _))) + } + +// TODO: add test with condition +} \ No newline at end of file diff --git a/parser/src/index.rs b/parser/src/index.rs new file mode 100644 index 0000000..80f6a96 --- /dev/null +++ b/parser/src/index.rs @@ -0,0 +1,70 @@ +use minisql::operation::Operation; +use nom::{ + bytes::complete::tag, + character::complete::{char, multispace0, multispace1}, + IResult, combinator::opt, +}; + +use crate::common::{parse_identifier, parse_table_name}; + +pub fn parse_create_index(input: &str) -> IResult<&str, Operation> { + let (input, _) = tag("CREATE")(input)?; + let unique = |input| -> IResult<&str, bool> { + let (input, _) = multispace1(input)?; + let (input, _) = tag("UNIQUE")(input)?; + Ok((input, true)) + }; + let (input, _) = opt(unique)(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("INDEX")(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = parse_identifier(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("ON")(input)?; + let (input, _) = multispace1(input)?; + let (input, table_name) = parse_table_name(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char('(')(input)?; + let (input, _) = multispace0(input)?; + let (input, column_name) = parse_identifier(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(')')(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(';')(input)?; + let operation = Operation::CreateIndex(table_name.to_string(), column_name.to_string()); + Ok((input, operation)) +} + + +#[cfg(test)] +mod tests { + use minisql::operation::Operation; + use crate::index::parse_create_index; + + + #[test] + fn test_create_index() { + let (_, operation) = parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" (email);").expect("should parse"); + assert!(matches!(operation, Operation::CreateIndex(_, _))); + match operation { + Operation::CreateIndex(table_name, column_name) => { + assert_eq!(table_name, "contacts"); + assert_eq!(column_name, "email"); + } + _ => {} + } + } + + #[test] + fn test_create_index_with_spaces() { + let (_, operation) = parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" ( email ) ;").expect("should parse"); + assert!(matches!(operation, Operation::CreateIndex(_, _))); + match operation { + Operation::CreateIndex(table_name, column_name) => { + assert_eq!(table_name, "contacts"); + assert_eq!(column_name, "email"); + } + _ => {} + } + } +} \ No newline at end of file diff --git a/parser/src/insert.rs b/parser/src/insert.rs new file mode 100644 index 0000000..340e6bf --- /dev/null +++ b/parser/src/insert.rs @@ -0,0 +1,94 @@ +use crate::{literal::parse_db_value, common::{parse_table_name, parse_identifier}}; +use minisql::{operation::Operation, type_system::Value}; +use nom::{ + bytes::complete::tag, + character::complete::{multispace0, multispace1, char}, + combinator::map, + multi::separated_list0, + sequence::terminated, + IResult, +}; + +pub fn parse_insert(input: &str) -> IResult<&str, Operation> { + let (input, _) = tag("INSERT")(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("INTO")(input)?; + let (input, _) = multispace1(input)?; + let (input, table_name) = parse_table_name(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = char('(')(input)?; + let (input, _) = multispace0(input)?; + let (input, column_names) = parse_columns(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(')')(input)?; + let (input, _) = multispace1(input)?; + let (input, _) = tag("VALUES")(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char('(')(input)?; + let (input, _) = multispace0(input)?; + let (input, values) = parse_values(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(')')(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = char(';')(input)?; + Ok(( + input, + Operation::Insert(table_name.to_string(), column_names.into_iter().zip(values).collect()), + )) +} + +pub fn parse_columns(input: &str) -> IResult<&str, Vec> { + separated_list0(terminated(char(','), multispace0), map(parse_identifier, |name|name.to_string()))(input) +} + +pub fn parse_values(input: &str) -> IResult<&str, Vec> { + separated_list0(terminated(char(','), multispace0), parse_db_value)(input) +} + + +#[cfg(test)] +mod tests { + use minisql::{operation::Operation, type_system::{IndexableValue, Value}}; + + use super::parse_insert; + + + #[test] + fn test_parse_insert() { + let sql = "INSERT INTO \"MyTable\" (id, data) VALUES(1, \"Text\");"; + let operation = parse_insert(sql).expect("should parse"); + match operation { + ("", Operation::Insert(table_name, insertion_values)) => { + assert_eq!(table_name, "MyTable"); + assert_eq!( + insertion_values, + vec![ + ("id".to_string(), Value::Indexable(IndexableValue::Int(1))), + ("data".to_string(), Value::Indexable(IndexableValue::String("Text".to_string()))) + ]); + } + _ => { + unreachable!() + } + } + } + + #[test] + fn test_parse_insert_with_spaces() { + let sql = "INSERT INTO \"MyTable\" ( id, data ) VALUES ( 1, \"Text\" ) ;"; + let operation = parse_insert(sql).expect("should parse"); + match operation { + ("", Operation::Insert(table_name, insertion_values)) => { + assert_eq!(table_name, "MyTable"); + assert_eq!(insertion_values, + vec![ + ("id".to_string(), Value::Indexable(IndexableValue::Int(1))), + ("data".to_string(), Value::Indexable(IndexableValue::String("Text".to_string()))) + ]); + } + _ => { + unreachable!() + } + } + } +} \ No newline at end of file diff --git a/parser/src/lib.rs b/parser/src/lib.rs new file mode 100644 index 0000000..1a4cb72 --- /dev/null +++ b/parser/src/lib.rs @@ -0,0 +1,16 @@ + +mod literal; +mod select; +mod common; +mod create; +mod insert; +mod delete; +mod index; +mod validation; +mod core; + +pub use core::parse_and_validate; +pub use core::Error; +pub use validation::validate_operation; + +pub use minisql; \ No newline at end of file diff --git a/parser/src/literal.rs b/parser/src/literal.rs new file mode 100644 index 0000000..2552e1e --- /dev/null +++ b/parser/src/literal.rs @@ -0,0 +1,164 @@ +use minisql::type_system::{IndexableValue, Value}; +use nom::{ + branch::alt, + character::complete::{u64, char, digit1, none_of}, + combinator::opt, + multi::many0, + sequence::{delimited, pair, preceded}, + IResult, error::make_error +}; + +pub fn parse_db_value(input: &str) -> IResult<&str, Value> { + alt(( + parse_string, + parse_number, + parse_int, + parse_uuid, + ))(input) +} + +pub fn parse_number(input: &str) -> IResult<&str, Value> { + // Parse the integer part + let (input, (sign, digits)) = pair(opt(char('-')), digit1)(input)?; + + // Parse the fractional part + let (input, frac_part) = opt(pair(char('.'), digit1))(input)?; + + match frac_part { + Some((_fsign, fdigits)) => { + // Combine integer and fractional parts + let combined_parts = format!( + "{}{}", + format!("{}{}", sign.unwrap_or('+'), digits), + format!(".{}", fdigits) + ); + // Parse the combined parts as a floating-point number + let value = combined_parts.parse::() + .map_err(|_| { + nom::Err::Failure(make_error(input, nom::error::ErrorKind::Fail)) + })?; + Ok((input, Value::Number(value))) + } + None => { + let value = format!("{}{}", sign.unwrap_or('+'), digits).parse::() + .map_err(|_| { + nom::Err::Failure(make_error(input, nom::error::ErrorKind::Fail)) + })?; + Ok((input, Value::Indexable(IndexableValue::Int(value)))) + } + } +} + +pub fn parse_int(input: &str) -> IResult<&str, Value> { + u64(input).map(|(input, v)| { + (input, Value::Indexable(IndexableValue::Int(v))) + }) +} + +fn escape_tab(input:&str) -> IResult<&str, char> { + let (input, _) = preceded(char('\\'), char('t'))(input)?; + Ok((input, '\t')) +} + +fn escape_backslash(input:&str) -> IResult<&str, char> { + let (input, _) = preceded(char('\\'), char('\\'))(input)?; + Ok((input, '\\')) +} + +fn escape_newline(input:&str) -> IResult<&str, char> { + let (input, _) = preceded(char('\\'), char('n'))(input)?; + Ok((input, '\n')) +} + +fn escape_carriegereturn(input:&str) -> IResult<&str, char> { + let (input, _) = preceded(char('\\'), char('r'))(input)?; + Ok((input, '\r')) +} + +fn escape_doublequote(input:&str) -> IResult<&str, char> { + preceded(char('\\'), char('"'))(input) +} + +pub fn parse_string(input: &str) -> IResult<&str, Value> { + // Parse the content inside the double quotes + let (input, content) = delimited( + char('"'), + many0(alt(( + escape_backslash, + escape_carriegereturn, + escape_newline, + escape_doublequote, + escape_tab, + none_of(r#"\""#) + ))), + char('"'), + )(input)?; + + // Combine the characters into a string + let value: String = content.into_iter().collect(); + + Ok((input, Value::Indexable(IndexableValue::String(value)))) +} + +fn parse_uuid(input: &str) -> IResult<&str, Value> { + // TODO: make it actually uuid + u64(input).map(|(input, v)| { + (input, Value::Indexable(IndexableValue::Uuid(v))) + }) +} + +#[cfg(test)] +mod tests { + use minisql::type_system::{IndexableValue, Value}; + use crate::literal::{parse_db_value, parse_string}; + + + #[test] + fn test_string_parser() { + assert_eq!(parse_string(r#""simple""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("simple")))))); + assert_eq!(parse_string(r#""\"\t\r\n\\""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("\"\t\r\n\\")))))); + assert_eq!(parse_string(r#""name is \"John\".""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("name is \"John\".")))))); + } + + #[test] + fn test_parse_db_value() { + let (input, value) = parse_db_value("5").expect("should parse"); + assert_eq!(input, ""); + assert_eq!(value, Value::Indexable(IndexableValue::Int(5))); + + let (input, value) = parse_db_value("5.5").expect("should parse"); + assert_eq!(input, ""); + assert_eq!(value, Value::Number(5.5)); + + let (_, _) = parse_db_value("\"STRING\"").expect("should parse"); + let (input, value) = parse_db_value("\"abcdefghkjklmnopqrstuvwxyz!@#$%^&*()_+ \"").expect("should parse"); + assert_eq!(input, ""); + assert_eq!(value, Value::Indexable(IndexableValue::String("abcdefghkjklmnopqrstuvwxyz!@#$%^&*()_+ ".to_string()))); + + } + + + #[test] + fn test_parse_positive_float() { + assert_eq!(parse_db_value("23.213313"), Ok(("", Value::Number(23.213313)))); + assert_eq!(parse_db_value("2241.9734"), Ok(("", Value::Number(2241.9734)))); + } + + #[test] + fn test_parse_negative_float() { + assert_eq!(parse_db_value("-9241.873654"), Ok(("", Value::Number(-9241.873654)))); + assert_eq!(parse_db_value("-62625.0"), Ok(("", Value::Number(-62625.0)))); + } + + #[test] + fn test_parse_float_between_0_and_1() { + assert_eq!(parse_db_value("0.873654"), Ok(("", Value::Number(0.873654)))); + assert_eq!(parse_db_value("0.62625"), Ok(("", Value::Number(0.62625)))); + } + + + #[test] + fn test_parse_int() { + assert_eq!(parse_db_value("5134616"), Ok(("", Value::Indexable(IndexableValue::Int(5134616))))); + } +} \ No newline at end of file diff --git a/parser/src/select.rs b/parser/src/select.rs new file mode 100644 index 0000000..2c6ab71 --- /dev/null +++ b/parser/src/select.rs @@ -0,0 +1,122 @@ +use crate::common::{parse_table_name, parse_column_name, parse_condition}; +use minisql::operation::{ColumnSelection, Operation}; +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{multispace0, multispace1, char}, + combinator::map, + error::Error, + multi::separated_list0, + sequence::terminated, + IResult, +}; + +pub fn parse_select(input: &str) -> IResult<&str, Operation> { + let (input, _) = tag("SELECT")(input)?; + let (input, _) = multispace1(input)?; + + let (input, column_selection) = try_parse_column_selection(input)?; + let (input, _) = multispace0(input)?; + let (input, _) = tag("FROM")(input)?; + let (input, _) = multispace1(input)?; + let (input, table_name) = parse_table_name(input)?; + let (input, _) = multispace0(input)?; + let (input, condition) = parse_condition(input)?; + let (input, _) = multispace0(input)?; + // TODO: make it optional? + let (input, _) = tag(";")(input)?; + Ok(( + input, + Operation::Select(table_name.to_string(), column_selection, condition), + )) +} + +pub fn try_parse_column_selection(input: &str) -> IResult<&str, ColumnSelection> { + let all_parser = map(tag::<&str, &str, Error<&str>>("*"), |_| { + ColumnSelection::All + }); + let columns_parser = map( + separated_list0(terminated(char(','), multispace0), parse_column_name), + |names| ColumnSelection::Columns(names), + ); + alt((all_parser, columns_parser))(input) +} + +#[cfg(test)] +mod tests { + use minisql::operation::{ColumnSelection, Operation}; + use crate::{common::{parse_column_name, parse_table_name}, select::parse_select}; + + + #[test] + fn test_parse_select_all() { + let sql = "SELECT * FROM \"MyTable\";"; + let operation = parse_select(sql).expect("should parse"); + match operation { + ("", Operation::Select(table_name, column_selection, maybe_condition)) => { + assert_eq!(table_name, "MyTable"); + assert!(matches!(column_selection, ColumnSelection::All)); + assert!(matches!(maybe_condition, None)); + } + (input, _) => { + println!("Input to be parsed: {}", input); + panic!("expected select operation") + } + } + } + + #[test] + fn test_parse_column_name() { + parse_column_name("1abc").expect_err("variable names should not start with number"); + } + + #[test] + fn test_parse_table_name() { + parse_table_name("\"\"").expect_err("Empty table names are not allowed"); + } + + #[test] + fn test_parse_select_columns() { + let sql = "SELECT name , email FROM \"AddressBook\" ;"; + let operation = parse_select(sql).expect("should parse"); + match operation { + ("", Operation::Select(table_name, column_selection, maybe_condition)) => { + assert_eq!(table_name, "AddressBook"); + assert!(matches!(column_selection, ColumnSelection::Columns(_))); + match column_selection { + ColumnSelection::Columns(column_names) => { + assert_eq!(column_names, vec!["name", "email"]); + } + _ => { + panic!("should select columns") + } + } + assert!(matches!(maybe_condition, None)); + } + (input, _) => { + println!("Input to be parsed: {}", input); + panic!("expected select operation") + } + } + } + + #[test] + fn test_parse_select_where() { + use minisql::operation::Condition; + let sql = "SELECT * FROM \"AddressBook\" WHERE id = 5 ;"; + let operation = parse_select(sql).expect("should parse"); + match operation { + ("", Operation::Select(table_name, column_selection, maybe_condition)) => { + assert_eq!(table_name, "AddressBook"); + assert!(matches!(column_selection, ColumnSelection::All)); + assert!(matches!(maybe_condition, Some(Condition::Eq(_, _)))); + } + (input, _) => { + println!("Input to be parsed: {}", input); + panic!("expected select operation") + } + } + } + // TODO: a test with multiple statements + // TODO: allow underscores in identifiers +} \ No newline at end of file diff --git a/parser/src/validation.rs b/parser/src/validation.rs new file mode 100644 index 0000000..2be6dce --- /dev/null +++ b/parser/src/validation.rs @@ -0,0 +1,206 @@ + +use std::collections::HashSet; + +use minisql::{operation::{ColumnSelection, Condition, InsertionValues, Operation}, schema::TableSchema, type_system::{DbType, IndexableValue, Value}}; + + +#[derive(Debug)] +pub enum ValidationError { + TableDoesNotExist(String), + TableExists(String), + ColumnDoesNotExist(String), + BadColumnPosition(usize), + DuplicateColumn(String), + TypeMismatch, + ValueForRequiredColumnIsMissing(String) +} + +pub fn type_of(value: &Value) -> DbType { + match value { + Value::Indexable(IndexableValue::Int(_)) => DbType::Int, + Value::Indexable(IndexableValue::String(_)) => DbType::String, + Value::Number(_) => DbType::Number, + Value::Indexable(IndexableValue::Uuid(_)) => DbType::Uuid + } +} + +/// Validates the operation based on db_metadata +pub fn validate_operation(operation: &Operation, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + match operation { + Operation::Select(table_name, column_selection, condition) => { + validate_select(table_name, column_selection, condition, db_metadata)?; + }, + Operation::Insert(table_name, insertion_values) => { + validate_insert(&table_name, insertion_values, db_metadata)?; + }, + Operation::Delete(table_name, condition) => { + validate_delete(table_name, condition, db_metadata)?; + }, + // Operation::Update(table_name, insertion_values, condition) => { + // validate_update(table_name, insertion_values, db_metadata)?; + // }, + Operation::CreateTable(table_name, schema) => { + validate_create(table_name, schema, db_metadata)?; + }, + Operation::CreateIndex(table_name, column_name) => { + validate_create_index(table_name, column_name, db_metadata)?; + }, + // Operation::DropTable(table_name) => { + // validate_drop(table_name, db_metadata)?; + // } + } + Ok(()) +} + +// pub fn validate_drop(table_name: &str, db_metadata: &Vec<(String, TableSchema)>) -> Result<(), ValidationError> { +// db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) +// .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; +// Ok(()) +// } + +pub fn validate_create(table_name: &str, schema: &TableSchema, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + if db_metadata.iter().find(|(tname, _)| table_name.eq(tname)).is_some() { + return Err(ValidationError::TableExists(table_name.to_string())); + } + let mut column_names = HashSet::new(); + for (name, _) in &schema.column_name_position_mapping { + if column_names.contains(name) { + return Err(ValidationError::DuplicateColumn(name.clone())); + } else { + column_names.insert(name.clone()); + } + } + + // TODO: Ensure it has a primary key?? + Ok(()) +} + +pub fn validate_select(table_name: &str, column_selection: &ColumnSelection, condition: &Option, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) + .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; + match column_selection { + ColumnSelection::Columns(columns) => { + columns.iter().find(|c| { + !schema.column_name_position_mapping.contains_left(*c) + }).map_or_else(||Ok(()), |c| Err(ValidationError::ColumnDoesNotExist(c.to_string())))?; + } + _ => {} + } + validate_condition(condition, schema)?; + Ok(()) +} + +// pub fn validate_update(table_name: &str, insertion_values: &InsertionValues, db_metadata: &Vec<(String, TableSchema)>) -> Result<(), ValidationError> { +// let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) +// .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; +// let mut column_names = HashSet::new(); +// // Find duplicate columns +// for (name, _) in insertion_values { +// if column_names.contains(name) { +// return Err(ValidationError::DuplicateColumn(name.clone())); +// } else { +// column_names.insert(name.clone()); +// } +// } +// // Ensure columns exist in schema +// let column_value_type: Vec<_> = insertion_values.iter().map(|(column, value)| { +// (column, value, schema.column_name_position_mapping.iter().find(|(name, _) | { +// (*name).eq(column) +// }).map(|(_, t)| schema.types.get(*t as usize))) +// }).collect(); +// if let Some((name, _, _)) = column_value_type.iter().find(|(_, _, t)| { +// t.is_none() +// }) { +// return Err(ValidationError::ColumnDoesNotExist((*name).clone())); +// } + +// // Check types +// if let Some((_, _, _)) = column_value_type.iter().find(|(_, value, t)| { +// if let Some(Some(column_type)) = t { +// !type_of(value).eq(column_type) +// } else { +// false +// } +// }) { +// // TODO: Add column name information +// return Err(ValidationError::TypeMismatch); +// } +// Ok(()) +// } + +pub fn validate_insert(table_name: &str, insertion_values: &InsertionValues, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) + .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; + let inserted_columns: HashSet = HashSet::from_iter(insertion_values.iter().map(|(name, _)| name.clone())); + // TODO: primary key is not required + for (column_name, _) in &schema.column_name_position_mapping { + if !inserted_columns.contains(column_name) { + return Err(ValidationError::ValueForRequiredColumnIsMissing(column_name.clone())) + } + } + // Ensure columns exist in schema + let column_value_type: Vec<_> = insertion_values.iter().map(|(column, value)| { + (column, value, schema.column_name_position_mapping.iter().find(|(name, _) | { + (*name).eq(column) + }).map(|(_, t)| schema.types.get(*t as usize))) + }).collect(); + if let Some((name, _, _)) = column_value_type.iter().find(|(_, _, t)| { + match t { + Some(Some(_)) => false, + _ => true + } + }) { + return Err(ValidationError::ColumnDoesNotExist((*name).clone())); + } + + // Check types + if let Some((_, _, _)) = column_value_type.iter().find(|(_, value, t)| { + if let Some(Some(t)) = t { + !type_of(value).eq(t) + } else { + false + } + }) { + // TODO: Add column name information + return Err(ValidationError::TypeMismatch); + } + Ok(()) +} + +pub fn validate_delete(table_name: &str, condition: &Option, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) + .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; + validate_condition(condition, schema)?; + Ok(()) +} + +fn validate_condition(condition: &Option, schema: &TableSchema) -> Result<(), ValidationError> { + match condition { + Some(c) => { + match c { + Condition::Eq(left, right) => { + let position = schema.column_name_position_mapping.get_by_left(left) + .ok_or(ValidationError::ColumnDoesNotExist(left.clone()))?; + let column_type = schema.types.get(*position as usize) + .ok_or(ValidationError::BadColumnPosition(*position))?; + if !column_type.eq(&type_of(right)) { + return Err(ValidationError::TypeMismatch); + } + } + } + } + None => {} + } + Ok(()) +} + +fn validate_create_index(table_name: &str, column_name: &str, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> { + // Ensure table exists + let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname)) + .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?; + // Ensure column exists + if !schema.column_name_position_mapping.contains_left(column_name) { + return Err(ValidationError::ColumnDoesNotExist(column_name.to_string())); + } + Ok(()) +} \ No newline at end of file diff --git a/server/Cargo.toml b/server/Cargo.toml index 256b592..8d8c52c 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -6,10 +6,10 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +tokio = { version = "1.35.1", features = ["full"] } anyhow = "1.0.76" clap = { version = "4.4.18", features = ["derive"] } -tokio = { version = "1.35.1", features = ["full"] } -minisql = { path = "../minisql" } -proto = { path = "../proto" } async-trait = "0.1.74" rand = "0.8.5" +minisql = { path = "../minisql" } +proto = { path = "../proto" }