Add parsing (incl. validation)

Ensure identifiers start with alphabetical character

Rename parse_variable_name -> parse_column_name

Add DB value parsers and condition parser placeholder

Fix number parser, basic condition parser

Move select parser to select module

Add create statement parser

Move condition parser to common; add delete statement parser

Add drop statement parser

Add insert parser

Add update parser, combine operation parsers into one

Add initial validation, fix compiler warnings

Validation WIP

Allow more spaces in create statement, update TableSchema struct

Add create index parser and validator

Add todo in parse_identifier

Rework the new structure, many other changes
This commit is contained in:
Maxim Svistunov 2024-01-26 18:20:45 +01:00
parent 143dc0e5ce
commit 61c0a34253
20 changed files with 1138 additions and 39 deletions

124
Cargo.lock generated
View file

@ -29,9 +29,9 @@ version = "0.1.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9"
dependencies = [
"proc-macro2",
"quote",
"syn",
"proc-macro2 1.0.70",
"quote 1.0.33",
"syn 2.0.41",
]
[[package]]
@ -112,6 +112,8 @@ name = "client"
version = "0.1.0"
dependencies = [
"anyhow",
"minisql",
"parser",
"proto",
"tokio",
]
@ -150,6 +152,12 @@ version = "2.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "minisql"
version = "0.1.0"
@ -177,6 +185,38 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "nom"
version = "4.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6"
dependencies = [
"memchr",
"version_check",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "nom-peg"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a3fefa2869e8c4f92ee5135cbeba457eebf1f24e188616bcbd334abb51be6a3"
dependencies = [
"nom 4.2.3",
"proc-macro2 0.4.30",
"quote 0.6.13",
"syn 0.15.44",
]
[[package]]
name = "num_cpus"
version = "1.16.0"
@ -219,12 +259,32 @@ dependencies = [
"windows-targets",
]
[[package]]
name = "parser"
version = "0.1.0"
dependencies = [
"bimap",
"minisql",
"nom 7.1.3",
"nom-peg",
"thiserror",
]
[[package]]
name = "pin-project-lite"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
[[package]]
name = "proc-macro2"
version = "0.4.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759"
dependencies = [
"unicode-xid",
]
[[package]]
name = "proc-macro2"
version = "1.0.70"
@ -244,13 +304,22 @@ dependencies = [
"tokio",
]
[[package]]
name = "quote"
version = "0.6.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1"
dependencies = [
"proc-macro2 0.4.30",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
"proc-macro2 1.0.70",
]
[[package]]
@ -289,9 +358,9 @@ version = "1.0.193"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
dependencies = [
"proc-macro2",
"quote",
"syn",
"proc-macro2 1.0.70",
"quote 1.0.33",
"syn 2.0.41",
]
[[package]]
@ -299,6 +368,8 @@ name = "server"
version = "0.1.0"
dependencies = [
"anyhow",
"minisql",
"parser",
"proto",
"tokio",
]
@ -328,14 +399,25 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "syn"
version = "0.15.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5"
dependencies = [
"proc-macro2 0.4.30",
"quote 0.6.13",
"unicode-xid",
]
[[package]]
name = "syn"
version = "2.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
dependencies = [
"proc-macro2",
"quote",
"proc-macro2 1.0.70",
"quote 1.0.33",
"unicode-ident",
]
@ -354,9 +436,9 @@ version = "1.0.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
dependencies = [
"proc-macro2",
"quote",
"syn",
"proc-macro2 1.0.70",
"quote 1.0.33",
"syn 2.0.41",
]
[[package]]
@ -384,9 +466,9 @@ version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
dependencies = [
"proc-macro2",
"quote",
"syn",
"proc-macro2 1.0.70",
"quote 1.0.33",
"syn 2.0.41",
]
[[package]]
@ -395,6 +477,18 @@ version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-xid"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
[[package]]
name = "version_check"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
[[package]]
name = "virtue"
version = "0.0.13"

View file

@ -4,5 +4,6 @@ members = [
"minisql",
"proto",
"server",
"client"
"client",
"parser"
]

View file

@ -9,3 +9,5 @@ edition = "2021"
tokio = { version = "1.35.1", features = ["full"] }
anyhow = "1.0.76"
proto = { path = "../proto" }
minisql = { path = "../minisql" }
parser = { path = "../parser" }

View file

@ -31,7 +31,7 @@ async fn main() -> anyhow::Result<()> {
query: "SELECT * FROM users;".to_string().into(),
})).await?;
writer.flush().await?;
let mut line = String::new();
loop {
let msg: BackendMessage = reader.read_proto().await?;
match msg {
@ -46,7 +46,17 @@ async fn main() -> anyhow::Result<()> {
},
BackendMessage::ReadyForQuery(data) => {
println!("Ready for query: {:?}", data);
line.clear();
let res = std::io::stdin().read_line(&mut line);
if let Ok(_) = res {
if line.eq("exit") {
break;
}
writer.write_proto(FrontendMessage::Query(QueryData {
query: line.clone().into(),
})).await?;
writer.flush().await?;
}
},
m => {
println!("Unexpected message: {:?}", m);

View file

@ -20,7 +20,7 @@ pub struct State {
// #[derive(Debug)]
pub enum Response<'a> {
Selected(Box<dyn Iterator<Item=Row> + 'a>),
Selected(Box<dyn Iterator<Item=Row> + 'a + Send>),
Inserted,
Deleted(usize), // how many were deleted
TableCreated,
@ -48,13 +48,23 @@ impl std::fmt::Debug for Response<'_> {
}
impl State {
fn new() -> Self {
pub fn new() -> Self {
Self {
table_name_position_mapping: BiMap::new(),
tables: vec![],
}
}
/// TODO: return a reference to avoid allocations
pub fn metadata<'a>(&'a self) -> Vec<(String, &'a TableSchema)> {
let mut m = Vec::new();
for (name, pos) in &self.table_name_position_mapping {
let table_schema = self.tables.get(*pos).unwrap().schema();
m.push((name.clone(), table_schema));
}
m
}
fn table_from_name<'a>(&'a self, table_name: &TableName) -> DbResult<&'a Table> {
match self.table_name_position_mapping.get_by_left(table_name) {
Some(table_position) => {
@ -99,7 +109,7 @@ impl State {
let selected_rows = match maybe_condition {
None => {
let x = table.select_all_rows(selected_column_positions);
Box::new(x) as Box<dyn Iterator<Item=Row> + 'a>
Box::new(x) as Box<dyn Iterator<Item=Row> + 'a + Send>
},
Some(Condition::Eq(eq_column_name, value)) => {
@ -112,7 +122,7 @@ impl State {
eq_column_position,
value,
)?;
Box::new(x) as Box<dyn Iterator<Item=Row> + 'a>
Box::new(x) as Box<dyn Iterator<Item=Row> + 'a + Send>
}
};

View file

@ -12,15 +12,15 @@ use std::collections::HashMap;
pub struct TableSchema {
table_name: TableName, // used for descriptive errors
primary_key: ColumnPosition,
column_name_position_mapping: BiMap<ColumnName, ColumnPosition>,
types: Vec<DbType>,
pub column_name_position_mapping: BiMap<ColumnName, ColumnPosition>,
pub types: Vec<DbType>,
}
pub type TableName = String;
pub type ColumnName = String;
impl TableSchema {
pub(crate) fn new(table_name: TableName, primary_key: ColumnPosition, column_name_position_map: Vec<(ColumnName, ColumnPosition)>, types: Vec<DbType>) -> Self {
pub fn new(table_name: TableName, primary_key: ColumnPosition, column_name_position_map: Vec<(ColumnName, ColumnPosition)>, types: Vec<DbType>) -> Self {
let mut column_name_position_mapping: BiMap<ColumnName, ColumnPosition> = BiMap::new();
for (column_name, column_position) in column_name_position_map {
column_name_position_mapping.insert(column_name, column_position);
@ -113,7 +113,7 @@ impl TableSchema {
}
}
fn number_of_columns(&self) -> usize {
pub fn number_of_columns(&self) -> usize {
self.column_name_position_mapping.len()
}

View file

@ -1,5 +1,5 @@
// ==============Types================
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DbType {
String,
Int,

13
parser/Cargo.toml Normal file
View file

@ -0,0 +1,13 @@
[package]
name = "parser"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
nom-peg = "0.1.1"
nom = "7.1.3"
minisql = { path = "../minisql" }
bimap = "0.6.3"
thiserror = "1"

95
parser/src/common.rs Normal file
View file

@ -0,0 +1,95 @@
use nom::{
character::complete::{alphanumeric1, char, multispace0, anychar, multispace1},
combinator::peek,
error::make_error,
sequence::{delimited, terminated},
bytes::complete::tag,
IResult, branch::alt,
};
use minisql::{operation::Condition, type_system::DbType};
use crate::literal::parse_db_value;
pub fn parse_table_name(input: &str) -> IResult<&str, &str> {
alt((
delimited(char('"'), alphanumeric1, char('"')),
parse_identifier
))(input)
}
pub fn parse_identifier(input: &str) -> IResult<&str, &str> {
// TODO: allow underscores
let (_, first) = peek(anychar)(input)?;
if first.is_alphabetic() {
alphanumeric1(input)
} else {
Err(nom::Err::Error(make_error(input, nom::error::ErrorKind::Alpha)))
}
}
pub fn parse_column_name(input: &str) -> IResult<&str, String> {
terminated(parse_identifier, multispace0)(input).map(|(rest, name)| (rest, name.to_string()))
}
pub fn parse_db_type(input: &str) -> IResult<&str, DbType> {
let (input, type_name) = alt((tag("STRING"), tag("INT"), tag("Float"), tag("UUID")))(input)?;
let db_type = match type_name {
"STRING" => DbType::String,
"INT" => DbType::Int,
"UUID" => DbType::Uuid,
"Float" => DbType::Number,
_ => return Err(nom::Err::Failure(make_error(input, nom::error::ErrorKind::IsNot)))
};
Ok((input, db_type))
}
pub fn parse_condition(input: &str) -> IResult<&str, Option<Condition>> {
match tag::<&str, &str, nom::error::Error<&str>>("WHERE")(input) {
Ok((input, _)) => {
let (input, _) = multispace1(input)?;
let (input, condition) = parse_equality(input)?;
Ok((input, Some(condition)))
}
Err(_) => {
Ok((input, None))
}
}
}
fn parse_equality(input: &str) -> IResult<&str, Condition> {
let (input, column_name) = parse_column_name(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char('=')(input)?;
let (input, _) = multispace0(input)?;
let (input, db_value) = parse_db_value(input)?;
Ok((input, Condition::Eq(column_name, db_value)))
}
#[cfg(test)]
mod tests {
use minisql::{operation::Condition, type_system::DbType};
use crate::common::{parse_db_type, parse_equality};
#[test]
fn test_parse_equality() {
use minisql::type_system::{IndexableValue, Value};
match parse_equality("id = 1") {
Ok(("", Condition::Eq(column_name, value))) => {
assert!(column_name.eq("id"));
assert_eq!(value, Value::Indexable(IndexableValue::Int(1)))
}
_ => {
panic!("should parse");
}
}
}
#[test]
fn test_parse_db_type() {
assert!(matches!(parse_db_type("INT").expect("should parse").1, DbType::Int));
assert!(matches!(parse_db_type("STRING").expect("should parse").1, DbType::String));
assert!(matches!(parse_db_type("UUID").expect("should parse").1, DbType::Uuid));
assert!(matches!(parse_db_type("NUMBER").expect("should parse").1, DbType::Number));
assert!(matches!(parse_db_type("Unknown"), Err(_)));
}
}

41
parser/src/core.rs Normal file
View file

@ -0,0 +1,41 @@
use minisql::{operation::Operation, schema::TableSchema};
use nom::{branch::alt, multi::many0, IResult};
use crate::{create::parse_create, delete::parse_delete, index::parse_create_index, insert::parse_insert, select::parse_select, validation::{validate_operation, ValidationError}};
#[derive(Debug)]
pub enum Error {
ParsingError(String),
ValidationError(ValidationError)
}
pub fn parse_statement<'a>(input: &'a str) -> IResult<&str, Operation> {
alt((
parse_insert,
parse_create,
parse_delete,
//parse_drop,
parse_select,
// parse_update,
parse_create_index
))(input)
}
pub fn parse_statements<'a>(input: &'a str) -> IResult<&str, Vec<Operation>> {
many0(parse_statement)(input)
}
pub fn parse_and_validate(query: String, db_metadata: &Vec<(String, &TableSchema)>) -> Result<Operation, Error> {
let (_, op) = parse_statement(query.as_str())
.map_err(|err| {
Error::ParsingError(err.to_string())
})?;
validate_operation(&op, db_metadata).map_err(|err| Error::ValidationError(err))?;
Ok(op)
}
// #[test]
// fn test_select() {
// parse_and_validate("SELECT * FROM users;".to_string(), &Vec::new()).unwrap();
// }

109
parser/src/create.rs Normal file
View file

@ -0,0 +1,109 @@
use minisql::{operation::Operation, schema::{ColumnName, TableSchema}, type_system::DbType};
use nom::{
bytes::complete::tag,
character::complete::{char, multispace0, multispace1},
multi::separated_list0,
sequence::terminated,
IResult, combinator::opt,
};
use crate::common::{parse_table_name, parse_identifier, parse_db_type};
pub fn parse_create(input: &str) -> IResult<&str, Operation> {
let (input, _) = tag("CREATE")(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("TABLE")(input)?;
let (input, _) = multispace1(input)?;
let (input, table_name) = parse_table_name(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char('(')(input)?;
let (input, _) = multispace0(input)?;
let (input, column_definitions) = parse_column_definitions(input)?;
let mut column_name_position_mapping = Vec::new();
let mut types: Vec<DbType> = Vec::new();
let mut primary_key = None;
for (position, (column_name, db_type, pk)) in column_definitions.iter().enumerate() {
types.push(db_type.clone());
if *pk {
primary_key = Some(position);
}
column_name_position_mapping.push((column_name.clone(), position));
}
let (input, _) = char(')')(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(';')(input)?;
let schema = TableSchema::new(
table_name.to_string(),
primary_key.unwrap_or_default(),
column_name_position_mapping,
types
);
Ok((
input,
Operation::CreateTable(table_name.to_string(), schema),
))
}
pub fn parse_column_definitions(input: &str) -> IResult<&str, Vec<(ColumnName, DbType, bool)>> {
separated_list0(terminated(char(','), multispace0), parse_column_definition)(input)
}
fn parse_primary_key(input: &str) -> IResult<&str, &str> {
let (input, _) = multispace1(input)?;
let (input, _) = tag("PRIMARY")(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("KEY")(input)?;
Ok((input, "PRIMARY KEY"))
}
pub fn parse_column_definition(input: &str) -> IResult<&str, (ColumnName, DbType, bool)> {
let (input, identifier) = parse_identifier(input)?;
let (input, _) = multispace1(input)?;
let (input, db_type) = parse_db_type(input)?;
let (input, pk) = opt(parse_primary_key)(input).map(|(input, pk)| (input, pk.is_some()))?;
let (input, _) = multispace0(input)?;
Ok((input, (identifier.to_string(), db_type, pk)))
}
#[cfg(test)]
mod tests {
use minisql::operation::Operation;
use crate::create::parse_create;
#[test]
fn test_parse_create_no_spaces() {
parse_create("CREATE TABLE \"Table1\"(id UUID ,column1 INT);").expect("should parse");
}
#[test]
fn test_parse_create_primary_key() {
parse_create("CREATE TABLE \"Table1\"(id UUID PRIMARY KEY,column1 INT);").expect("should parse");
}
#[test]
fn test_parse_create_no_quotes_table_name() {
parse_create("CREATE TABLE Table1(id UUID PRIMARY KEY,column1 INT);").expect("should parse");
}
#[test]
fn test_parse_create_primary_key_with_spaces() {
parse_create("CREATE TABLE \"Table1\" ( id UUID PRIMARY KEY , column1 INT ) ;").expect("should parse");
}
#[test]
fn test_parse_create() {
let (_, create) = parse_create("CREATE TABLE \"Table1\"( id UUID , column1 INT );").expect("should parse");
assert!(matches!(create, Operation::CreateTable(_ ,_)));
match create {
Operation::CreateTable(name, schema) => {
assert_eq!(name, "Table1");
assert_eq!(schema.number_of_columns(), 2);
assert_eq!(schema.column_position_from_column_name(&"id".to_string()).unwrap(), 0);
assert_eq!(schema.column_position_from_column_name(&"column1".to_string()).unwrap(), 1);
}
_ => {}
}
}
}

38
parser/src/delete.rs Normal file
View file

@ -0,0 +1,38 @@
use minisql::operation::Operation;
use nom::{
bytes::complete::tag,
character::complete::{char, multispace0, multispace1},
IResult,
};
use crate::common::{parse_table_name, parse_condition};
pub fn parse_delete(input: &str) -> IResult<&str, Operation> {
let (input, _) = tag("DELETE")(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("FROM")(input)?;
let (input, _) = multispace1(input)?;
let (input, table_name) = parse_table_name(input)?;
let (input, _) = multispace0(input)?;
let (input, condition) = parse_condition(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(';')(input)?;
Ok((
input,
Operation::Delete(table_name.to_string(), condition),
))
}
#[cfg(test)]
mod tests {
use minisql::operation::Operation;
use crate::delete::parse_delete;
#[test]
fn test_parse_delete() {
let (_, operation) = parse_delete("DELETE FROM \"T1\" WHERE id = 1 ;").expect("should parse");
assert!(matches!(operation, Operation::Delete(_, _)))
}
// TODO: add test with condition
}

70
parser/src/index.rs Normal file
View file

@ -0,0 +1,70 @@
use minisql::operation::Operation;
use nom::{
bytes::complete::tag,
character::complete::{char, multispace0, multispace1},
IResult, combinator::opt,
};
use crate::common::{parse_identifier, parse_table_name};
pub fn parse_create_index(input: &str) -> IResult<&str, Operation> {
let (input, _) = tag("CREATE")(input)?;
let unique = |input| -> IResult<&str, bool> {
let (input, _) = multispace1(input)?;
let (input, _) = tag("UNIQUE")(input)?;
Ok((input, true))
};
let (input, _) = opt(unique)(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("INDEX")(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = parse_identifier(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("ON")(input)?;
let (input, _) = multispace1(input)?;
let (input, table_name) = parse_table_name(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char('(')(input)?;
let (input, _) = multispace0(input)?;
let (input, column_name) = parse_identifier(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(')')(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(';')(input)?;
let operation = Operation::CreateIndex(table_name.to_string(), column_name.to_string());
Ok((input, operation))
}
#[cfg(test)]
mod tests {
use minisql::operation::Operation;
use crate::index::parse_create_index;
#[test]
fn test_create_index() {
let (_, operation) = parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" (email);").expect("should parse");
assert!(matches!(operation, Operation::CreateIndex(_, _)));
match operation {
Operation::CreateIndex(table_name, column_name) => {
assert_eq!(table_name, "contacts");
assert_eq!(column_name, "email");
}
_ => {}
}
}
#[test]
fn test_create_index_with_spaces() {
let (_, operation) = parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" ( email ) ;").expect("should parse");
assert!(matches!(operation, Operation::CreateIndex(_, _)));
match operation {
Operation::CreateIndex(table_name, column_name) => {
assert_eq!(table_name, "contacts");
assert_eq!(column_name, "email");
}
_ => {}
}
}
}

94
parser/src/insert.rs Normal file
View file

@ -0,0 +1,94 @@
use crate::{literal::parse_db_value, common::{parse_table_name, parse_identifier}};
use minisql::{operation::Operation, type_system::Value};
use nom::{
bytes::complete::tag,
character::complete::{multispace0, multispace1, char},
combinator::map,
multi::separated_list0,
sequence::terminated,
IResult,
};
pub fn parse_insert(input: &str) -> IResult<&str, Operation> {
let (input, _) = tag("INSERT")(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("INTO")(input)?;
let (input, _) = multispace1(input)?;
let (input, table_name) = parse_table_name(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = char('(')(input)?;
let (input, _) = multispace0(input)?;
let (input, column_names) = parse_columns(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(')')(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("VALUES")(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char('(')(input)?;
let (input, _) = multispace0(input)?;
let (input, values) = parse_values(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(')')(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(';')(input)?;
Ok((
input,
Operation::Insert(table_name.to_string(), column_names.into_iter().zip(values).collect()),
))
}
pub fn parse_columns(input: &str) -> IResult<&str, Vec<String>> {
separated_list0(terminated(char(','), multispace0), map(parse_identifier, |name|name.to_string()))(input)
}
pub fn parse_values(input: &str) -> IResult<&str, Vec<Value>> {
separated_list0(terminated(char(','), multispace0), parse_db_value)(input)
}
#[cfg(test)]
mod tests {
use minisql::{operation::Operation, type_system::{IndexableValue, Value}};
use super::parse_insert;
#[test]
fn test_parse_insert() {
let sql = "INSERT INTO \"MyTable\" (id, data) VALUES(1, \"Text\");";
let operation = parse_insert(sql).expect("should parse");
match operation {
("", Operation::Insert(table_name, insertion_values)) => {
assert_eq!(table_name, "MyTable");
assert_eq!(
insertion_values,
vec![
("id".to_string(), Value::Indexable(IndexableValue::Int(1))),
("data".to_string(), Value::Indexable(IndexableValue::String("Text".to_string())))
]);
}
_ => {
unreachable!()
}
}
}
#[test]
fn test_parse_insert_with_spaces() {
let sql = "INSERT INTO \"MyTable\" ( id, data ) VALUES ( 1, \"Text\" ) ;";
let operation = parse_insert(sql).expect("should parse");
match operation {
("", Operation::Insert(table_name, insertion_values)) => {
assert_eq!(table_name, "MyTable");
assert_eq!(insertion_values,
vec![
("id".to_string(), Value::Indexable(IndexableValue::Int(1))),
("data".to_string(), Value::Indexable(IndexableValue::String("Text".to_string())))
]);
}
_ => {
unreachable!()
}
}
}
}

16
parser/src/lib.rs Normal file
View file

@ -0,0 +1,16 @@
mod literal;
mod select;
mod common;
mod create;
mod insert;
mod delete;
mod index;
mod validation;
mod core;
pub use core::parse_and_validate;
pub use core::Error;
pub use validation::validate_operation;
pub use minisql;

164
parser/src/literal.rs Normal file
View file

@ -0,0 +1,164 @@
use minisql::type_system::{IndexableValue, Value};
use nom::{
branch::alt,
character::complete::{u64, char, digit1, none_of},
combinator::opt,
multi::many0,
sequence::{delimited, pair, preceded},
IResult, error::make_error
};
pub fn parse_db_value(input: &str) -> IResult<&str, Value> {
alt((
parse_string,
parse_number,
parse_int,
parse_uuid,
))(input)
}
pub fn parse_number(input: &str) -> IResult<&str, Value> {
// Parse the integer part
let (input, (sign, digits)) = pair(opt(char('-')), digit1)(input)?;
// Parse the fractional part
let (input, frac_part) = opt(pair(char('.'), digit1))(input)?;
match frac_part {
Some((_fsign, fdigits)) => {
// Combine integer and fractional parts
let combined_parts = format!(
"{}{}",
format!("{}{}", sign.unwrap_or('+'), digits),
format!(".{}", fdigits)
);
// Parse the combined parts as a floating-point number
let value = combined_parts.parse::<f64>()
.map_err(|_| {
nom::Err::Failure(make_error(input, nom::error::ErrorKind::Fail))
})?;
Ok((input, Value::Number(value)))
}
None => {
let value = format!("{}{}", sign.unwrap_or('+'), digits).parse::<u64>()
.map_err(|_| {
nom::Err::Failure(make_error(input, nom::error::ErrorKind::Fail))
})?;
Ok((input, Value::Indexable(IndexableValue::Int(value))))
}
}
}
pub fn parse_int(input: &str) -> IResult<&str, Value> {
u64(input).map(|(input, v)| {
(input, Value::Indexable(IndexableValue::Int(v)))
})
}
fn escape_tab(input:&str) -> IResult<&str, char> {
let (input, _) = preceded(char('\\'), char('t'))(input)?;
Ok((input, '\t'))
}
fn escape_backslash(input:&str) -> IResult<&str, char> {
let (input, _) = preceded(char('\\'), char('\\'))(input)?;
Ok((input, '\\'))
}
fn escape_newline(input:&str) -> IResult<&str, char> {
let (input, _) = preceded(char('\\'), char('n'))(input)?;
Ok((input, '\n'))
}
fn escape_carriegereturn(input:&str) -> IResult<&str, char> {
let (input, _) = preceded(char('\\'), char('r'))(input)?;
Ok((input, '\r'))
}
fn escape_doublequote(input:&str) -> IResult<&str, char> {
preceded(char('\\'), char('"'))(input)
}
pub fn parse_string(input: &str) -> IResult<&str, Value> {
// Parse the content inside the double quotes
let (input, content) = delimited(
char('"'),
many0(alt((
escape_backslash,
escape_carriegereturn,
escape_newline,
escape_doublequote,
escape_tab,
none_of(r#"\""#)
))),
char('"'),
)(input)?;
// Combine the characters into a string
let value: String = content.into_iter().collect();
Ok((input, Value::Indexable(IndexableValue::String(value))))
}
fn parse_uuid(input: &str) -> IResult<&str, Value> {
// TODO: make it actually uuid
u64(input).map(|(input, v)| {
(input, Value::Indexable(IndexableValue::Uuid(v)))
})
}
#[cfg(test)]
mod tests {
use minisql::type_system::{IndexableValue, Value};
use crate::literal::{parse_db_value, parse_string};
#[test]
fn test_string_parser() {
assert_eq!(parse_string(r#""simple""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("simple"))))));
assert_eq!(parse_string(r#""\"\t\r\n\\""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("\"\t\r\n\\"))))));
assert_eq!(parse_string(r#""name is \"John\".""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("name is \"John\"."))))));
}
#[test]
fn test_parse_db_value() {
let (input, value) = parse_db_value("5").expect("should parse");
assert_eq!(input, "");
assert_eq!(value, Value::Indexable(IndexableValue::Int(5)));
let (input, value) = parse_db_value("5.5").expect("should parse");
assert_eq!(input, "");
assert_eq!(value, Value::Number(5.5));
let (_, _) = parse_db_value("\"STRING\"").expect("should parse");
let (input, value) = parse_db_value("\"abcdefghkjklmnopqrstuvwxyz!@#$%^&*()_+ \"").expect("should parse");
assert_eq!(input, "");
assert_eq!(value, Value::Indexable(IndexableValue::String("abcdefghkjklmnopqrstuvwxyz!@#$%^&*()_+ ".to_string())));
}
#[test]
fn test_parse_positive_float() {
assert_eq!(parse_db_value("23.213313"), Ok(("", Value::Number(23.213313))));
assert_eq!(parse_db_value("2241.9734"), Ok(("", Value::Number(2241.9734))));
}
#[test]
fn test_parse_negative_float() {
assert_eq!(parse_db_value("-9241.873654"), Ok(("", Value::Number(-9241.873654))));
assert_eq!(parse_db_value("-62625.0"), Ok(("", Value::Number(-62625.0))));
}
#[test]
fn test_parse_float_between_0_and_1() {
assert_eq!(parse_db_value("0.873654"), Ok(("", Value::Number(0.873654))));
assert_eq!(parse_db_value("0.62625"), Ok(("", Value::Number(0.62625))));
}
#[test]
fn test_parse_int() {
assert_eq!(parse_db_value("5134616"), Ok(("", Value::Indexable(IndexableValue::Int(5134616)))));
}
}

122
parser/src/select.rs Normal file
View file

@ -0,0 +1,122 @@
use crate::common::{parse_table_name, parse_column_name, parse_condition};
use minisql::operation::{ColumnSelection, Operation};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{multispace0, multispace1, char},
combinator::map,
error::Error,
multi::separated_list0,
sequence::terminated,
IResult,
};
pub fn parse_select(input: &str) -> IResult<&str, Operation> {
let (input, _) = tag("SELECT")(input)?;
let (input, _) = multispace1(input)?;
let (input, column_selection) = try_parse_column_selection(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = tag("FROM")(input)?;
let (input, _) = multispace1(input)?;
let (input, table_name) = parse_table_name(input)?;
let (input, _) = multispace0(input)?;
let (input, condition) = parse_condition(input)?;
let (input, _) = multispace0(input)?;
// TODO: make it optional?
let (input, _) = tag(";")(input)?;
Ok((
input,
Operation::Select(table_name.to_string(), column_selection, condition),
))
}
pub fn try_parse_column_selection(input: &str) -> IResult<&str, ColumnSelection> {
let all_parser = map(tag::<&str, &str, Error<&str>>("*"), |_| {
ColumnSelection::All
});
let columns_parser = map(
separated_list0(terminated(char(','), multispace0), parse_column_name),
|names| ColumnSelection::Columns(names),
);
alt((all_parser, columns_parser))(input)
}
#[cfg(test)]
mod tests {
use minisql::operation::{ColumnSelection, Operation};
use crate::{common::{parse_column_name, parse_table_name}, select::parse_select};
#[test]
fn test_parse_select_all() {
let sql = "SELECT * FROM \"MyTable\";";
let operation = parse_select(sql).expect("should parse");
match operation {
("", Operation::Select(table_name, column_selection, maybe_condition)) => {
assert_eq!(table_name, "MyTable");
assert!(matches!(column_selection, ColumnSelection::All));
assert!(matches!(maybe_condition, None));
}
(input, _) => {
println!("Input to be parsed: {}", input);
panic!("expected select operation")
}
}
}
#[test]
fn test_parse_column_name() {
parse_column_name("1abc").expect_err("variable names should not start with number");
}
#[test]
fn test_parse_table_name() {
parse_table_name("\"\"").expect_err("Empty table names are not allowed");
}
#[test]
fn test_parse_select_columns() {
let sql = "SELECT name , email FROM \"AddressBook\" ;";
let operation = parse_select(sql).expect("should parse");
match operation {
("", Operation::Select(table_name, column_selection, maybe_condition)) => {
assert_eq!(table_name, "AddressBook");
assert!(matches!(column_selection, ColumnSelection::Columns(_)));
match column_selection {
ColumnSelection::Columns(column_names) => {
assert_eq!(column_names, vec!["name", "email"]);
}
_ => {
panic!("should select columns")
}
}
assert!(matches!(maybe_condition, None));
}
(input, _) => {
println!("Input to be parsed: {}", input);
panic!("expected select operation")
}
}
}
#[test]
fn test_parse_select_where() {
use minisql::operation::Condition;
let sql = "SELECT * FROM \"AddressBook\" WHERE id = 5 ;";
let operation = parse_select(sql).expect("should parse");
match operation {
("", Operation::Select(table_name, column_selection, maybe_condition)) => {
assert_eq!(table_name, "AddressBook");
assert!(matches!(column_selection, ColumnSelection::All));
assert!(matches!(maybe_condition, Some(Condition::Eq(_, _))));
}
(input, _) => {
println!("Input to be parsed: {}", input);
panic!("expected select operation")
}
}
}
// TODO: a test with multiple statements
// TODO: allow underscores in identifiers
}

206
parser/src/validation.rs Normal file
View file

@ -0,0 +1,206 @@
use std::collections::HashSet;
use minisql::{operation::{ColumnSelection, Condition, InsertionValues, Operation}, schema::TableSchema, type_system::{DbType, IndexableValue, Value}};
#[derive(Debug)]
pub enum ValidationError {
TableDoesNotExist(String),
TableExists(String),
ColumnDoesNotExist(String),
BadColumnPosition(usize),
DuplicateColumn(String),
TypeMismatch,
ValueForRequiredColumnIsMissing(String)
}
pub fn type_of(value: &Value) -> DbType {
match value {
Value::Indexable(IndexableValue::Int(_)) => DbType::Int,
Value::Indexable(IndexableValue::String(_)) => DbType::String,
Value::Number(_) => DbType::Number,
Value::Indexable(IndexableValue::Uuid(_)) => DbType::Uuid
}
}
/// Validates the operation based on db_metadata
pub fn validate_operation(operation: &Operation, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> {
match operation {
Operation::Select(table_name, column_selection, condition) => {
validate_select(table_name, column_selection, condition, db_metadata)?;
},
Operation::Insert(table_name, insertion_values) => {
validate_insert(&table_name, insertion_values, db_metadata)?;
},
Operation::Delete(table_name, condition) => {
validate_delete(table_name, condition, db_metadata)?;
},
// Operation::Update(table_name, insertion_values, condition) => {
// validate_update(table_name, insertion_values, db_metadata)?;
// },
Operation::CreateTable(table_name, schema) => {
validate_create(table_name, schema, db_metadata)?;
},
Operation::CreateIndex(table_name, column_name) => {
validate_create_index(table_name, column_name, db_metadata)?;
},
// Operation::DropTable(table_name) => {
// validate_drop(table_name, db_metadata)?;
// }
}
Ok(())
}
// pub fn validate_drop(table_name: &str, db_metadata: &Vec<(String, TableSchema)>) -> Result<(), ValidationError> {
// db_metadata.iter().find(|(tname, _)| table_name.eq(tname))
// .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?;
// Ok(())
// }
pub fn validate_create(table_name: &str, schema: &TableSchema, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> {
if db_metadata.iter().find(|(tname, _)| table_name.eq(tname)).is_some() {
return Err(ValidationError::TableExists(table_name.to_string()));
}
let mut column_names = HashSet::new();
for (name, _) in &schema.column_name_position_mapping {
if column_names.contains(name) {
return Err(ValidationError::DuplicateColumn(name.clone()));
} else {
column_names.insert(name.clone());
}
}
// TODO: Ensure it has a primary key??
Ok(())
}
pub fn validate_select(table_name: &str, column_selection: &ColumnSelection, condition: &Option<Condition>, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> {
let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname))
.ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?;
match column_selection {
ColumnSelection::Columns(columns) => {
columns.iter().find(|c| {
!schema.column_name_position_mapping.contains_left(*c)
}).map_or_else(||Ok(()), |c| Err(ValidationError::ColumnDoesNotExist(c.to_string())))?;
}
_ => {}
}
validate_condition(condition, schema)?;
Ok(())
}
// pub fn validate_update(table_name: &str, insertion_values: &InsertionValues, db_metadata: &Vec<(String, TableSchema)>) -> Result<(), ValidationError> {
// let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname))
// .ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?;
// let mut column_names = HashSet::new();
// // Find duplicate columns
// for (name, _) in insertion_values {
// if column_names.contains(name) {
// return Err(ValidationError::DuplicateColumn(name.clone()));
// } else {
// column_names.insert(name.clone());
// }
// }
// // Ensure columns exist in schema
// let column_value_type: Vec<_> = insertion_values.iter().map(|(column, value)| {
// (column, value, schema.column_name_position_mapping.iter().find(|(name, _) | {
// (*name).eq(column)
// }).map(|(_, t)| schema.types.get(*t as usize)))
// }).collect();
// if let Some((name, _, _)) = column_value_type.iter().find(|(_, _, t)| {
// t.is_none()
// }) {
// return Err(ValidationError::ColumnDoesNotExist((*name).clone()));
// }
// // Check types
// if let Some((_, _, _)) = column_value_type.iter().find(|(_, value, t)| {
// if let Some(Some(column_type)) = t {
// !type_of(value).eq(column_type)
// } else {
// false
// }
// }) {
// // TODO: Add column name information
// return Err(ValidationError::TypeMismatch);
// }
// Ok(())
// }
pub fn validate_insert(table_name: &str, insertion_values: &InsertionValues, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> {
let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname))
.ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?;
let inserted_columns: HashSet<String> = HashSet::from_iter(insertion_values.iter().map(|(name, _)| name.clone()));
// TODO: primary key is not required
for (column_name, _) in &schema.column_name_position_mapping {
if !inserted_columns.contains(column_name) {
return Err(ValidationError::ValueForRequiredColumnIsMissing(column_name.clone()))
}
}
// Ensure columns exist in schema
let column_value_type: Vec<_> = insertion_values.iter().map(|(column, value)| {
(column, value, schema.column_name_position_mapping.iter().find(|(name, _) | {
(*name).eq(column)
}).map(|(_, t)| schema.types.get(*t as usize)))
}).collect();
if let Some((name, _, _)) = column_value_type.iter().find(|(_, _, t)| {
match t {
Some(Some(_)) => false,
_ => true
}
}) {
return Err(ValidationError::ColumnDoesNotExist((*name).clone()));
}
// Check types
if let Some((_, _, _)) = column_value_type.iter().find(|(_, value, t)| {
if let Some(Some(t)) = t {
!type_of(value).eq(t)
} else {
false
}
}) {
// TODO: Add column name information
return Err(ValidationError::TypeMismatch);
}
Ok(())
}
pub fn validate_delete(table_name: &str, condition: &Option<Condition>, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> {
let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname))
.ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?;
validate_condition(condition, schema)?;
Ok(())
}
fn validate_condition(condition: &Option<Condition>, schema: &TableSchema) -> Result<(), ValidationError> {
match condition {
Some(c) => {
match c {
Condition::Eq(left, right) => {
let position = schema.column_name_position_mapping.get_by_left(left)
.ok_or(ValidationError::ColumnDoesNotExist(left.clone()))?;
let column_type = schema.types.get(*position as usize)
.ok_or(ValidationError::BadColumnPosition(*position))?;
if !column_type.eq(&type_of(right)) {
return Err(ValidationError::TypeMismatch);
}
}
}
}
None => {}
}
Ok(())
}
fn validate_create_index(table_name: &str, column_name: &str, db_metadata: &Vec<(String, &TableSchema)>) -> Result<(), ValidationError> {
// Ensure table exists
let (_, schema) = db_metadata.iter().find(|(tname, _)| table_name.eq(tname))
.ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))?;
// Ensure column exists
if !schema.column_name_position_mapping.contains_left(column_name) {
return Err(ValidationError::ColumnDoesNotExist(column_name.to_string()));
}
Ok(())
}

View file

@ -9,3 +9,5 @@ edition = "2021"
tokio = { version = "1.35.1", features = ["full"] }
anyhow = "1.0.76"
proto = { path = "../proto" }
minisql = { path = "../minisql" }
parser = { path = "../parser" }

View file

@ -1,3 +1,5 @@
use minisql::interpreter::State;
use parser::{parse_and_validate, Error};
use proto::handshake::response::HandshakeResponse;
use proto::handshake::server::do_server_handshake;
use proto::message::backend::{
@ -38,6 +40,7 @@ async fn handle_stream(mut stream: TcpStream) -> anyhow::Result<()> {
let request = do_server_handshake(&mut writer, &mut reader, response).await?;
println!("Handshake complete:\n{request:?}");
let mut state = State::new();
loop {
println!("Waiting for next message");
@ -50,16 +53,25 @@ async fn handle_stream(mut stream: TcpStream) -> anyhow::Result<()> {
}
FrontendMessage::Query(data) => {
println!("Received Query: {:?}", data);
if data.query.as_str().contains("car") {
println!("Sending error message");
send_error_response(&mut writer, "Car not found").await?;
} else if data.query.as_str().to_lowercase().contains("select") {
println!("Sending table");
let metadata = state.metadata();
match parse_and_validate(data.query.as_str().to_string(), &metadata) {
Ok(operation) => {
match state.interpret(operation) {
Ok(_) => {
send_query_response(&mut writer).await?;
} else {
println!("Sending empty query");
send_empty_query(&mut writer).await?;
}
Err(err) => {
send_error_response(&mut writer, &format!("error interpreting: {:?}", err)).await?;
}
}
},
Err(Error::ParsingError(err)) => {
send_error_response(&mut writer, &format!("parsing error: {:?}", err)).await?;
}
Err(Error::ValidationError(v)) => {
send_error_response(&mut writer, &format!("validation error: {:?}", v)).await?;
}
};
send_ready_for_query(&mut writer).await?;
}
}