use std::collections::{BTreeMap, HashSet}; use thiserror::Error; use crate::syntax; use crate::syntax::{ColumnSchema, RawQuerySyntax, RawTableSchema}; use minisql::operation; use minisql::{ interpreter::DbSchema, operation::Operation, schema::{Column, ColumnName, TableName, TablePosition, TableSchema}, type_system::DbType, type_system::Value, }; #[derive(Debug, Error)] pub enum ValidationError { #[error("table {0} does not exist")] TableDoesNotExist(TableName), #[error("table {0} already exists")] TableAlreadyExists(TableName), #[error("columns {0:?} do not exist")] ColumnsDoNotExist(Vec), #[error("duplicate column {0}")] DuplicateColumn(ColumnName), #[error("primary key missing in table {0}")] PrimaryKeyMissing(TableName), #[error("multiple primary keys found in table {0}")] MultiplePrimaryKeysFound(TableName), #[error("attempt to index non-indexable column {1} in table {0}")] AttemptToIndexNonIndexableColumn(TableName, ColumnName), #[error("type mismatch at column `{column_name:?}` (expected {expected_type:?}, found {received_type:?})")] TypeMismatch { column_name: ColumnName, received_type: DbType, expected_type: DbType, }, #[error("values for required columns {0:?} are missing")] RequiredColumnsAreMissing(Vec), } /// Validates and converts the raw syntax into a proper interpreter operation based on db schema. pub fn validate_operation( syntax: RawQuerySyntax, db_schema: &DbSchema, ) -> Result { match syntax { RawQuerySyntax::Select(table_name, column_selection, condition) => { validate_select(table_name, column_selection, condition, db_schema) } RawQuerySyntax::Insert(table_name, insertion_values) => { validate_insert(table_name, insertion_values, db_schema) } RawQuerySyntax::Delete(table_name, condition) => { validate_delete(table_name, condition, db_schema) } RawQuerySyntax::CreateTable(schema) => validate_create_table(schema, db_schema), RawQuerySyntax::CreateIndex(table_name, column_name) => { validate_create_index(table_name, column_name, db_schema) } } } fn validate_table_exists<'a>( db_schema: &DbSchema<'a>, table_name: &'a TableName, ) -> Result<(TablePosition, &'a TableSchema), ValidationError> { db_schema .iter() .find(|(tname, _, _)| table_name.eq(tname)) .ok_or(ValidationError::TableDoesNotExist(table_name.to_string())) .map(|(_, table_position, table_schema)| (*table_position, *table_schema)) } fn validate_create_table( raw_table_schema: RawTableSchema, db_schema: &DbSchema, ) -> Result { let table_name: &TableName = &raw_table_schema.table_name; if get_table_schema(db_schema, table_name).is_some() { return Err(ValidationError::TableAlreadyExists(table_name.to_string())); } let table_schema: TableSchema = validate_table_schema(raw_table_schema)?; Ok(Operation::CreateTable(table_schema)) } fn validate_table_schema(raw_table_schema: RawTableSchema) -> Result { // check for duplicate columns find_first_duplicate(&raw_table_schema.get_columns()).map_or_else( || Ok(()), |duplicate_column| { Err(ValidationError::DuplicateColumn( duplicate_column.to_string(), )) }, )?; let mut primary_keys: Vec<(ColumnName, DbType)> = vec![]; let mut columns: Vec = vec![]; let mut types: Vec = vec![]; for ColumnSchema { column_name, type_, is_primary, } in raw_table_schema.columns { if is_primary { primary_keys.push((column_name.clone(), type_)) } columns.push(column_name); types.push(type_); } // Ensure it has exactly one primary key that has correct type. let number_of_primary_keys = primary_keys.len(); if number_of_primary_keys == 0 { Err(ValidationError::PrimaryKeyMissing( raw_table_schema.table_name.clone(), )) } else if number_of_primary_keys > 1 { Err(ValidationError::MultiplePrimaryKeysFound( raw_table_schema.table_name.clone(), )) } else { let (primary_column_name, primary_key_type) = primary_keys[0].clone(); if primary_key_type == DbType::Uuid { Ok(TableSchema::new( raw_table_schema.table_name, primary_column_name, columns, types, )) } else { Err(ValidationError::TypeMismatch { column_name: raw_table_schema.table_name.clone(), received_type: primary_key_type, expected_type: DbType::Uuid, }) } } } fn validate_select( table_name: TableName, column_selection: syntax::ColumnSelection, condition: Option, db_schema: &DbSchema, ) -> Result { let (table_position, schema) = validate_table_exists(db_schema, &table_name)?; match column_selection { syntax::ColumnSelection::Columns(columns) => { let non_existant_columns: Vec = columns .iter() .filter_map(|column| { if schema.does_column_exist(column) { None } else { Some(column.clone()) } }) .collect(); if non_existant_columns.is_empty() { let selection: operation::ColumnSelection = columns .iter() .filter_map(|column_name| schema.get_column(column_name)) .collect(); let validated_condition = validate_condition(condition, schema)?; Ok(Operation::Select( table_position, selection, validated_condition, )) } else { Err(ValidationError::ColumnsDoNotExist(non_existant_columns)) } } syntax::ColumnSelection::All => { let validated_condition = validate_condition(condition, schema)?; Ok(Operation::Select( table_position, schema.all_selection(), validated_condition, )) } } } fn validate_insert( table_name: TableName, insertion_values: syntax::InsertionValues, db_schema: &DbSchema, ) -> Result { let (table_position, schema) = validate_table_exists(db_schema, &table_name)?; // Check for duplicate columns in insertion_values. let columns_in_query_vec: Vec<&ColumnName> = insertion_values .iter() .map(|(column_name, _)| column_name) .collect(); find_first_duplicate(&columns_in_query_vec).map_or_else( || Ok(()), |duplicate_column| { Err(ValidationError::DuplicateColumn( duplicate_column.to_string(), )) }, )?; // Check that the set of columns in the insertion_values is the same as the set of required columns of the table. let columns_in_query: HashSet<&ColumnName> = HashSet::from_iter(columns_in_query_vec); let columns_in_schema: HashSet<&ColumnName> = HashSet::from_iter(schema.get_columns()); let non_existant_columns = Vec::from_iter(columns_in_query.difference(&columns_in_schema)); if !non_existant_columns.is_empty() { return Err(ValidationError::ColumnsDoNotExist( non_existant_columns .iter() .map(|column_name| column_name.to_string()) .collect(), )); } let missing_required_columns = Vec::from_iter(columns_in_schema.difference(&columns_in_query)); if !missing_required_columns.is_empty() { return Err(ValidationError::RequiredColumnsAreMissing( missing_required_columns .iter() .map(|str| str.to_string()) .collect(), )); } // Check types and prepare for creation of InsertionValues for the interpreter let mut values_map: BTreeMap = BTreeMap::new(); // The reason for using BTreeMap // instead of HashMap is that we need // to get the values in a vector // sorted by the key. for (column_name, value) in insertion_values { let (column, expected_type) = schema .get_typed_column(&column_name) .ok_or(ValidationError::ColumnsDoNotExist(vec![ column_name.to_string() ]))?; // By the previous validation steps this is never gonna trigger an error. let value_type = value.to_type(); if value_type != expected_type { return Err(ValidationError::TypeMismatch { column_name: column_name.to_string(), received_type: value_type, expected_type, }); } values_map.insert(column, value); } // WARNING: If you use `values_map: HashMap<_,_>`, this is not gonna sort values by key. let values: operation::InsertionValues = values_map.into_values().collect(); // Note that one of the values is id. Ok(Operation::Insert(table_position, values)) } fn validate_delete( table_name: TableName, condition: Option, db_schema: &DbSchema, ) -> Result { let (table_position, schema) = validate_table_exists(db_schema, &table_name)?; let validated_condition = validate_condition(condition, schema)?; Ok(Operation::Delete(table_position, validated_condition)) } fn validate_condition( condition: Option, schema: &TableSchema, ) -> Result, ValidationError> { match condition { Some(condition) => match condition { syntax::Condition::Eq(column_name, value) => { let (column, expected_type) = schema.get_typed_column(&column_name).ok_or( ValidationError::ColumnsDoNotExist(vec![column_name.to_string()]), )?; let value_type: DbType = value.to_type(); if expected_type.eq(&value_type) { Ok(Some(operation::Condition::Eq(column, value))) } else { Err(ValidationError::TypeMismatch { column_name: column_name.to_string(), received_type: value_type, expected_type, }) } } }, None => Ok(None), } } fn validate_create_index( table_name: TableName, column_name: ColumnName, db_schema: &DbSchema, ) -> Result { let (table_position, schema) = validate_table_exists(db_schema, &table_name)?; schema.get_typed_column(&column_name).map_or_else( || { Err(ValidationError::ColumnsDoNotExist(vec![ column_name.to_string() ])) }, |(column, type_)| { if type_.is_indexable() { Ok(Operation::CreateIndex(table_position, column)) } else { Err(ValidationError::AttemptToIndexNonIndexableColumn( column_name.clone(), table_name, )) } }, ) } // ===Helpers=== fn find_first_duplicate(ts: &[T]) -> Option<&T> where T: Eq + std::hash::Hash, { let mut already_seen_elements: HashSet<&T> = HashSet::new(); for t in ts { if already_seen_elements.contains(t) { return Some(t); } else { already_seen_elements.insert(t); } } None } fn get_table_schema<'a>( db_schema: &DbSchema<'a>, table_name: &'a TableName, ) -> Option<&'a TableSchema> { let (_, _, table_schema) = db_schema .iter() .find(|(tname, _, _)| table_name.eq(tname))?; Some(table_schema) } #[cfg(test)] mod tests { use super::*; use crate::syntax::{ColumnSchema, ColumnSelection, Condition, RawQuerySyntax, RawTableSchema}; use minisql::operation; use minisql::operation::Operation; use minisql::schema::TableSchema; use minisql::type_system::{IndexableValue, Value}; use Condition::*; use IndexableValue::*; use RawQuerySyntax::*; use Value::*; fn users_schema() -> TableSchema { TableSchema::new( "users".to_string(), "id".to_string(), vec!["id".to_string(), "name".to_string(), "age".to_string()], vec![DbType::Uuid, DbType::String, DbType::Int], ) } fn raw_users_schema() -> RawTableSchema { RawTableSchema { table_name: "users".to_string(), columns: vec![ ColumnSchema { column_name: "id".to_string(), type_: DbType::Uuid, is_primary: true, }, ColumnSchema { column_name: "name".to_string(), type_: DbType::String, is_primary: false, }, ColumnSchema { column_name: "age".to_string(), type_: DbType::Int, is_primary: false, }, ], } } fn db_schema(users_schema: &TableSchema) -> DbSchema { vec![("users".to_string(), 0, users_schema)] } fn empty_db_schema() -> DbSchema<'static> { vec![] } #[test] fn test_create_basic() { let db_schema: DbSchema = empty_db_schema(); let syntax: RawQuerySyntax = CreateTable(raw_users_schema()); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Ok(Operation::CreateTable(_)))); let Ok(Operation::CreateTable(schema)) = result else { panic!() }; assert!(schema.table_name() == "users"); } #[test] fn test_create_duplicates_in_schema() { let raw_users_schema = RawTableSchema { table_name: "users".to_string(), columns: vec![ ColumnSchema { column_name: "id".to_string(), type_: DbType::Uuid, is_primary: true, }, ColumnSchema { column_name: "name".to_string(), type_: DbType::String, is_primary: false, }, ColumnSchema { column_name: "name".to_string(), type_: DbType::Number, is_primary: false, }, ], }; let db_schema: DbSchema = empty_db_schema(); let syntax: RawQuerySyntax = CreateTable(raw_users_schema); let result = validate_operation(syntax, &db_schema); println!("{:?}", result); assert!(matches!(result, Err(ValidationError::DuplicateColumn(_)))); } #[test] fn test_create_primary_key_is_uuid() { let raw_users_schema = RawTableSchema { table_name: "users".to_string(), columns: vec![ ColumnSchema { column_name: "id".to_string(), type_: DbType::Int, is_primary: true, }, ColumnSchema { column_name: "name".to_string(), type_: DbType::String, is_primary: false, }, ColumnSchema { column_name: "age".to_string(), type_: DbType::Int, is_primary: false, }, ], }; let db_schema: DbSchema = empty_db_schema(); let syntax: RawQuerySyntax = CreateTable(raw_users_schema); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Err(ValidationError::TypeMismatch { .. }))); } #[test] fn test_create_multiple_primary_keys() { let raw_users_schema = RawTableSchema { table_name: "users".to_string(), columns: vec![ ColumnSchema { column_name: "id".to_string(), type_: DbType::Int, is_primary: true, }, ColumnSchema { column_name: "name".to_string(), type_: DbType::String, is_primary: true, }, ColumnSchema { column_name: "age".to_string(), type_: DbType::Int, is_primary: false, }, ], }; let db_schema: DbSchema = empty_db_schema(); let syntax: RawQuerySyntax = CreateTable(raw_users_schema); let result = validate_operation(syntax, &db_schema); assert!(matches!( result, Err(ValidationError::MultiplePrimaryKeysFound(_)) )); } #[test] fn test_create_already_exists() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let syntax: RawQuerySyntax = CreateTable(raw_users_schema()); let result = validate_operation(syntax, &db_schema); assert!(matches!( result, Err(ValidationError::TableAlreadyExists(_)) )); } // ====Select==== #[test] fn test_select_basic() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let users_position = 0; let id = 0; let name = 1; let age = 2; let syntax: RawQuerySyntax = Select("users".to_string(), ColumnSelection::All, None); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Ok(Operation::Select(_, _, _)))); let Ok(Operation::Select(table_position, column_selection, condition)) = result else { panic!() }; assert!(table_position == users_position); assert!(condition == None); assert!(column_selection == vec![id, name, age]); } #[test] fn test_select_non_existent_table() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let syntax: RawQuerySyntax = Select("does_not_exist".to_string(), ColumnSelection::All, None); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Err(ValidationError::TableDoesNotExist(_)))); } #[test] fn test_select_eq() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let users_position = 0; let id = 0; let name = 1; let age = 2; let syntax: RawQuerySyntax = Select( "users".to_string(), ColumnSelection::All, Some(Eq("age".to_string(), Indexable(Int(25)))), ); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Ok(Operation::Select(_, _, _)))); let Ok(Operation::Select(table_position, column_selection, condition)) = result else { panic!() }; assert!(table_position == users_position); assert!(column_selection == vec![id, name, age]); assert!(condition == Some(operation::Condition::Eq(age, Indexable(Int(25))))); } #[test] fn test_select_eq_columns_selection() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let users_position = 0; let name = 1; let age = 2; let syntax: RawQuerySyntax = Select( "users".to_string(), ColumnSelection::Columns(vec![ "age".to_string(), "name".to_string(), "age".to_string(), ]), None, ); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Ok(Operation::Select(_, _, _)))); let Ok(Operation::Select(table_position, column_selection, condition)) = result else { panic!() }; assert!(table_position == users_position); assert!(column_selection == vec![age, name, age]); assert!(condition == None); } #[test] fn test_select_eq_columns_selection_nonexistent_column_selected() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let syntax: RawQuerySyntax = Select( "users".to_string(), ColumnSelection::Columns(vec!["age".to_string(), "does_not_exist".to_string()]), None, ); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Err(ValidationError::ColumnsDoNotExist(_)))); } #[test] fn test_select_eq_non_existent_column() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let syntax: RawQuerySyntax = Select( "users".to_string(), ColumnSelection::All, Some(Eq("does_not_exist".to_string(), Indexable(Int(25)))), ); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Err(ValidationError::ColumnsDoNotExist(_)))); } #[test] fn test_select_eq_type_error() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let syntax: RawQuerySyntax = Select( "users".to_string(), ColumnSelection::All, Some(Eq("age".to_string(), Indexable(String("25".to_string())))), ); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Err(ValidationError::TypeMismatch { .. }))); } // ====Insert==== #[test] fn test_insert() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let users_position = 0; let syntax: RawQuerySyntax = Insert( "users".to_string(), vec![ ("name".to_string(), Indexable(String("Alice".to_string()))), ("id".to_string(), Indexable(Uuid(0))), ("age".to_string(), Indexable(Int(25))), ], ); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Ok(Operation::Insert(_, _)))); let Ok(Operation::Insert(table_position, values)) = result else { panic!() }; assert!(table_position == users_position); // Recall the order is // let id = 0; // let name = 1; // let age = 2; assert!( values == vec![ Indexable(Uuid(0)), Indexable(String("Alice".to_string())), Indexable(Int(25)) ] ); } #[test] fn test_insert_non_existent_column() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let syntax: RawQuerySyntax = Insert( "users".to_string(), vec![ ("name".to_string(), Indexable(String("Alice".to_string()))), ("id".to_string(), Indexable(Uuid(0))), ("age".to_string(), Indexable(Int(25))), ("does_not_exist".to_string(), Indexable(Int(25))), ], ); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Err(ValidationError::ColumnsDoNotExist(_)))); } #[test] fn test_insert_ill_typed_column() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let syntax: RawQuerySyntax = Insert( "users".to_string(), vec![ ("name".to_string(), Indexable(String("Alice".to_string()))), ("id".to_string(), Indexable(Uuid(0))), ("age".to_string(), Number(25.0)), ], ); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Err(ValidationError::TypeMismatch { .. }))); } // ====Delete==== #[test] fn test_delete_all() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let users_position = 0; let syntax: RawQuerySyntax = Delete("users".to_string(), None); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Ok(Operation::Delete(_, None)))); let Ok(Operation::Delete(table_position, _)) = result else { panic!() }; assert!(table_position == users_position); } #[test] fn test_delete_eq() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let users_position = 0; let age = 2; let syntax: RawQuerySyntax = Delete( "users".to_string(), Some(Eq("age".to_string(), Indexable(Int(25)))), ); let result = validate_operation(syntax, &db_schema); assert!(matches!( result, Ok(Operation::Delete(_, Some(operation::Condition::Eq(_, _)))) )); let Ok(Operation::Delete(table_position, Some(operation::Condition::Eq(column, value)))) = result else { panic!() }; assert!(table_position == users_position); assert!(column == age); assert!(value == Indexable(Int(25))); } // ====CreateIndex==== #[test] fn test_create_index() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let users_position = 0; let age = 2; let syntax: RawQuerySyntax = CreateIndex("users".to_string(), "age".to_string()); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Ok(Operation::CreateIndex(_, _)))); let Ok(Operation::CreateIndex(table_position, column)) = result else { panic!() }; assert!(table_position == users_position); assert!(column == age); } #[test] fn test_create_index_nonexistent_column() { let users_schema: TableSchema = users_schema(); let db_schema: DbSchema = db_schema(&users_schema); let syntax: RawQuerySyntax = CreateIndex("users".to_string(), "does_not_exist".to_string()); let result = validate_operation(syntax, &db_schema); assert!(matches!(result, Err(ValidationError::ColumnsDoNotExist(_)))); } }