minisql/parser/src/validation.rs
Yuriy Dupyn 11dc992476 Cleanup
2024-01-27 22:46:19 +01:00

193 lines
8.9 KiB
Rust

use std::collections::HashSet;
use std::collections::HashMap;
use thiserror::Error;
use crate::syntax;
use crate::syntax::RawQuerySyntax;
use minisql::operation;
use minisql::{operation::Operation, type_system::Value, schema::{TableSchema, ColumnName, TableName}, type_system::DbType, interpreter::{TablePosition, DbSchema}};
#[derive(Debug, Error)]
pub enum ValidationError {
#[error("table {0} does not exist")]
TableDoesNotExist(TableName),
#[error("table {0} already exists")]
TableAlreadyExists(TableName),
#[error("columns {0:?} do not exist")]
ColumnsDoNotExist(Vec<ColumnName>),
#[error("duplicate column {0}")]
DuplicateColumn(ColumnName),
#[error("type mismatch at column `{column_name:?}` (expected {expected_type:?}, found {received_type:?})")]
TypeMismatch {
column_name: ColumnName,
received_type: DbType,
expected_type: DbType,
},
#[error("values for required columns {0:?} are missing")]
RequiredColumnsAreMissing(Vec<ColumnName>)
}
/// Validates and converts the raw syntax into a proper interpreter operation based on db schema.
pub fn validate_operation(query: RawQuerySyntax, db_schema: &DbSchema) -> Result<Operation, ValidationError> {
match query {
RawQuerySyntax::Select(table_name, column_selection, condition) => {
validate_select(table_name, column_selection, condition, db_schema)
},
RawQuerySyntax::Insert(table_name, insertion_values) => {
validate_insert(table_name, insertion_values, db_schema)
},
RawQuerySyntax::Delete(table_name, condition) => {
validate_delete(table_name, condition, db_schema)
},
RawQuerySyntax::CreateTable(table_name, schema) => {
validate_create(table_name, schema, db_schema)
},
RawQuerySyntax::CreateIndex(table_name, column_name) => {
validate_create_index(table_name, column_name, db_schema)
},
}
}
fn validate_table_exists<'a>(db_schema: &DbSchema<'a>, table_name: &'a TableName) -> Result<(TablePosition, &'a TableSchema), ValidationError> {
db_schema.iter().find(|(tname, _, _)| table_name.eq(tname))
.ok_or(ValidationError::TableDoesNotExist(table_name.to_string()))
.map(|(_, table_position, table_schema)| (*table_position, *table_schema))
}
pub fn validate_create(table_name: TableName, table_schema: TableSchema, db_schema: &DbSchema) -> Result<Operation, ValidationError> {
if let Some(_) = get_table_schema(db_schema, &table_name) {
return Err(ValidationError::TableAlreadyExists(table_name.to_string()));
}
find_first_duplicate(&table_schema.get_columns())
.map_or_else(
|| Ok(()),
|duplicate_column| Err(ValidationError::DuplicateColumn(duplicate_column.to_string()))
)?;
// TODO: Ensure it has a primary key??
Ok(Operation::CreateTable(table_name, table_schema))
}
pub fn validate_select(table_name: TableName, column_selection: syntax::ColumnSelection, condition: Option<syntax::Condition>, db_schema: &DbSchema) -> Result<Operation, ValidationError> {
let (table_position, schema) = validate_table_exists(db_schema, &table_name)?;
match column_selection {
syntax::ColumnSelection::Columns(columns) => {
let non_existant_columns: Vec<ColumnName> =
columns.iter().filter_map(|column|
if schema.does_column_exist(&column) {
Some(column.clone())
} else {
None
}).collect();
if non_existant_columns.len() > 0 {
Err(ValidationError::ColumnsDoNotExist(non_existant_columns))
} else {
let selection: operation::ColumnSelection =
columns.iter().filter_map(|column_name| schema.get_column_position(column_name)).collect();
let validated_condition = validate_condition(condition, schema)?;
Ok(Operation::Select(table_position, selection, validated_condition))
}
}
syntax::ColumnSelection::All => {
let validated_condition = validate_condition(condition, schema)?;
Ok(Operation::Select(table_position, schema.all_selection(), validated_condition))
}
}
}
pub fn validate_insert(table_name: TableName, insertion_values: syntax::InsertionValues, db_schema: &DbSchema) -> Result<Operation, ValidationError> {
let (table_position, schema) = validate_table_exists(db_schema, &table_name)?;
// Check for duplicate columns in insertion_values.
let columns_in_query_vec: Vec<&ColumnName> = insertion_values.iter().map(|(column_name, _)| column_name).collect();
find_first_duplicate(&columns_in_query_vec)
.map_or_else(
|| Ok(()),
|duplicate_column| Err(ValidationError::DuplicateColumn(duplicate_column.to_string()))
)?;
// Check that the set of columns in the insertion_values is the same as the set of required columns of the table.
let columns_in_query: HashSet<&ColumnName> = HashSet::from_iter(columns_in_query_vec);
let columns_in_schema: HashSet<&ColumnName> = HashSet::from_iter(schema.get_columns());
let non_existant_columns = Vec::from_iter(columns_in_query.difference(&columns_in_schema));
if non_existant_columns.len() > 0 {
return Err(ValidationError::ColumnsDoNotExist(non_existant_columns.iter().map(|column_name| column_name.to_string()).collect()));
}
let missing_required_columns = Vec::from_iter(columns_in_schema.difference(&columns_in_query));
if missing_required_columns.len() > 0 {
return Err(ValidationError::RequiredColumnsAreMissing(missing_required_columns.iter().map(|str| str.to_string()).collect()));
}
// Check types and prepare for creation of InsertionValues for the interpreter
let mut values_map: HashMap<_, Value> = HashMap::new();
for (column_name, value) in insertion_values {
let (column, expected_type) = schema.get_column(&column_name).ok_or(ValidationError::ColumnsDoNotExist(vec![column_name.to_string()]))?; // By the previous validation steps this is never gonna trigger an error.
let value_type = value.to_type();
if value_type != expected_type {
return Err(ValidationError::TypeMismatch { column_name: column_name.to_string(), received_type: value_type, expected_type });
}
values_map.insert(column, value);
}
// These are values ordered by the column position
let values: operation::InsertionValues = values_map.into_values().collect();
Ok(Operation::Insert(table_position, values))
}
pub fn validate_delete(table_name: TableName, condition: Option<syntax::Condition>, db_schema: &DbSchema) -> Result<Operation, ValidationError> {
let (table_position, schema) = validate_table_exists(db_schema, &table_name)?;
let validated_condition = validate_condition(condition, schema)?;
Ok(Operation::Delete(table_position, validated_condition))
}
fn validate_condition(condition: Option<syntax::Condition>, schema: &TableSchema) -> Result<Option<operation::Condition>, ValidationError> {
match condition {
Some(condition) => {
match condition {
syntax::Condition::Eq(column_name, value) => {
let (column, expected_type) = schema.get_column(&column_name).ok_or(ValidationError::ColumnsDoNotExist(vec![column_name.to_string()]))?;
let value_type: DbType = value.to_type();
if expected_type.eq(&value_type) {
Ok(Some(operation::Condition::Eq(column, value)))
} else {
return Err(ValidationError::TypeMismatch { column_name: column_name.to_string(), received_type: value_type, expected_type });
}
}
}
}
None => Ok(None)
}
}
fn validate_create_index(table_name: TableName, column_name: ColumnName, db_schema: &DbSchema) -> Result<Operation, ValidationError> {
// TODO: You should disallow indexing of Number columns.
let (table_position, schema) = validate_table_exists(db_schema, &table_name)?;
schema
.get_column_position(&column_name)
.map_or_else(
|| Err(ValidationError::ColumnsDoNotExist(vec![column_name.to_string()])),
|column| Ok(Operation::CreateIndex(table_position, column))
)
}
// ===Helpers===
fn find_first_duplicate<T>(ts: &[T]) -> Option<&T>
where T: Eq + std::hash::Hash
{
let mut already_seen_elements: HashSet<&T> = HashSet::new();
for t in ts {
if already_seen_elements.contains(t) {
return Some(t);
} else {
already_seen_elements.insert(&t);
}
}
None
}
fn get_table_schema<'a>(db_schema: &DbSchema<'a>, table_name: &'a TableName) -> Option<&'a TableSchema> {
let (_, _, table_schema) = db_schema.iter().find(|(tname, _, _)| table_name.eq(tname))?;
Some(table_schema)
}