From 20615508a221800d99ba853ff0e02c071de8ce85 Mon Sep 17 00:00:00 2001 From: Yuriy Dupyn <2153100+omedusyo@users.noreply.github.com> Date: Mon, 11 Dec 2023 17:51:17 +0100 Subject: [PATCH] First attempt at SELECT --- src/main.rs | 217 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 161 insertions(+), 56 deletions(-) diff --git a/src/main.rs b/src/main.rs index 31b1c7b..da02d7b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, HashMap, HashSet}; // ==============SQL operations================ // TODO: Note that every operation has a table name. @@ -45,15 +45,23 @@ type UUID = u64; // TODO: What about nulls? I would rather not have that as in SQL, it sucks. // I would rather have non-nullable values by default, // and something like an explicit Option type for nulls. +#[derive(Debug, Clone, PartialEq)] enum DbValue { + Number(f64), // TODO: Can't put floats as keys in maps, since they don't implement Eq. What to + // do? + Indexable(IndexableDbValue), +} + +#[derive(Debug, Ord, Eq, Clone, PartialOrd, PartialEq)] +enum IndexableDbValue { String(String), Int(u64), - Number(f64), UUID(UUID), // TODO: what bout null? } // TODO: Can this be autogenerated from the values? +#[derive(Debug, Clone, Copy)] enum DbType { String, Int, @@ -65,10 +73,13 @@ impl DbValue { // TODO: Can this be autogenerated? fn to_type(self) -> DbType { match self { - Self::String(_) => DbType::String, - Self::Int(_) => DbType::Int, Self::Number(_) => DbType::Number, - Self::UUID(_) => DbType::UUID, + Self::Indexable(val) => + match val { + IndexableDbValue::String(_) => DbType::String, + IndexableDbValue::Int(_) => DbType::Int, + IndexableDbValue::UUID(_) => DbType::UUID, + } } } } @@ -90,13 +101,17 @@ struct Table { // TODO: Is this really indexed by DbValues? // Maybe we should have a separate index type for each type of value we're indexing over +// TODO: I should have a set of UUID, not just a single UUID, e.g. +// a user table can have multiple different users with the same name. struct ColumnIndex { - index: BTreeMap + index: BTreeMap> } // Note that it is nice to split metadata from the data because // then you can give the metadata to the parser without giving it the data. struct TableSchema { + table_name: TableName, // used for descriptive errors + primary_key: ColumnPosition, columns: HashMap } @@ -104,13 +119,12 @@ struct TableSchema { fn column_position(table_meta: TableSchema, column_name: ColumnName) -> Option { todo!() } - // Use `TablePosition` as index type Tables = Vec; type ColumnName = String; -type ColumnPosition = u32; +type ColumnPosition = usize; // Use `ColumnPosition` as index type Row = Vec; @@ -123,6 +137,11 @@ type Rows = // interface // insert(id, value) +fn select_columns(row: &Row, columns: &Vec) -> Row { + // row.column_position + todo!() +} + // ==============Interpreter================ struct State { table_positions: HashMap, @@ -134,7 +153,7 @@ impl State { todo!() } - fn attach_table(&mut self, table: Table) { + fn attach_table(&mut self, table_name: TableName, table: Table) { todo!() } } @@ -155,104 +174,190 @@ fn get_table<'tables_life: 'table_life, 'table_life>(tables: &'tables_life Table // because you'll be forced to put an iterator/slice into the Response data-structure. // Alternative is to pass a row-consumer to the functionas that knows how to communicate with // the client, but the details of communication are hidden behind an interface -fn interpret(table_name: TableName, operation: Operation, state: &mut State, consumer: impl SqlConsumer) -> () { +fn interpret(table_name: TableName, operation: Operation, state: &mut State, consumer: impl SqlConsumer) -> DbResult { // TODO: lock stuff use Operation::*; match operation { Select(table_name, column_selection, maybe_condition) => { let table: &Table = todo!(); - table.select_where(column_selection, maybe_condition, consumer) + Ok(Response::Selected(table.select_where(column_selection, maybe_condition)?)) }, Insert(table_name, values) => { let table: &mut Table = todo!(); - table.insert(values, consumer) + table.insert(values); + todo!() }, Delete(table_name, maybe_condition) => { let table: &mut Table = todo!(); - table.delete_where(maybe_condition, consumer) + table.delete_where(maybe_condition); + todo!() }, CreateTable(table_name, table_schema) => { - let table = Table::new(table_name, table_schema); - state.attach_table(table); + let table = Table::new(table_schema); + state.attach_table(table_name, table); todo!() }, CreateIndex(table_name, column_name) => { let table: &mut Table = todo!(); + let column_position: ColumnPosition = todo!(); - let index: ColumnIndex = ColumnIndex::new(table, column_name); - table.attach_index(index); - }, // TODO: Is this sufficient? - // + let index: ColumnIndex = ColumnIndex::new(); + table.attach_index(column_position, index); + todo!() + }, } } impl ColumnIndex { - fn new(table: &Table, column_name: ColumnName) -> ColumnIndex { - todo!() + fn new() -> Self { + Self { index: BTreeMap::new() } } } +impl TableSchema { + fn get_column(&self, column_name: &ColumnName) -> DbResult<(DbType, ColumnPosition)> { + match self.columns.get(column_name) { + Some((type_, column_position)) => Ok((*type_, *column_position)), + None => Err(Error::ColumnDoesNotExist(self.table_name.clone(), column_name.clone())) + } + } + + fn is_primary(&self, column_position: ColumnPosition) -> bool { + self.primary_key == column_position + } + + fn column_positions_from_column_names(&self, column_names: &[ColumnName]) -> DbResult> { + let mut positions: Vec = Vec::with_capacity(column_names.len()); + for column_name in column_names { + let (_, column_position) = self.get_column(column_name)?; + positions.push(column_position) + } + Ok(positions) + } + + fn column_positions_from_column_selection(&self, column_selection: &ColumnSelection) -> DbResult> { + match column_selection { + ColumnSelection::All => { + let mut column_positions: Vec = self.columns.values().map(|(_, column_position)| *column_position).collect(); + column_positions.sort(); + Ok(column_positions) + }, + + ColumnSelection::Columns(column_names) => { + self.column_positions_from_column_names(column_names) + }, + } + } + +} impl Table { - fn new(table_name: TableName, table_schema: TableSchema) -> Table { - todo!() + fn new(table_schema: TableSchema) -> Self { + Self { + schema: table_schema, + rows: BTreeMap::new(), + indexes: HashMap::new(), + } } - fn attach_index(&mut self, column_index: ColumnIndex) { - todo!() + fn attach_index(&mut self, column_position: ColumnPosition, column_index: ColumnIndex) { + self.indexes.insert(column_position, column_index); } - fn select_where(&self, column_selection: ColumnSelection, maybe_condition: Option, consumer: impl SqlConsumer) { + fn get_row_by_id(&self, id: UUID) -> Option { + self.rows.get(&id).cloned() + } + + fn get_rows_by_ids(&self, ids: HashSet) -> Vec { + ids.into_iter() + .filter_map(|id| self.get_row_by_id(id)) + .collect() + } + + fn get_rows_by_value(&self, column_position: ColumnPosition, value: &DbValue) -> Vec { + // brute-force search + self.rows.values() + .filter_map(|row| if row.get(column_position) == Some(value) { Some(row.clone()) } else { None }) + .collect() + } + + fn select_where(&self, column_selection: ColumnSelection, maybe_condition: Option) -> DbResult> { + let selected_column_positions = self.schema.column_positions_from_column_selection(&column_selection)?; match maybe_condition { - None => { - // .iter() will give us an iterator over all the rows + None => + Ok(self.rows.values().map(|row| select_columns(row, &selected_column_positions)).collect()), - // two choices - // 1. optimized version - // self.iter_with_columns(column_selection).for_each(|row| { - // consumer.send(row) - // }); - // 2. - // self.iter() - // .map(|row| row.select_columns(column_selection)) - // .for_each(|reduced_row| { - // consumer.send(row) - // }); - todo!() - }, - Some(Condition::Eq(column_name, value)) => { - // is column_name primary key? then it is easy - // self.get(id) - // is column_name indexed? Then get the index, and then it is not easy, because you - // may get a set of ids. - // what if it is not primary nor indexed? then you need to brute force your way - // through the whole table? - todo!() + Some(Condition::Eq(eq_column_name, value)) => { + let (type_, eq_column_position) = self.schema.get_column(&eq_column_name)?; + if self.schema.is_primary(eq_column_position) { + match value { + DbValue::Indexable(IndexableDbValue::UUID(uuid)) => { + match self.get_row_by_id(uuid) { + Some(row) => Ok(vec![select_columns(&row, &selected_column_positions)]), + None => Ok(vec![]), + } + }, + _ => Err(Error::ValueDoesNotMatchExpectedType(self.schema.table_name.clone(), eq_column_name.clone(), type_, value.clone())) + } + } else { + match value { + DbValue::Indexable(value) => { + match self.indexes.get(&eq_column_position) { + Some(column_index) => { + let ids = column_index.get(value); + Ok(self.get_rows_by_ids(ids).iter().map(|row| select_columns(row, &selected_column_positions)).collect()) + }, + None => { + Ok(self.get_rows_by_value(eq_column_position, &DbValue::Indexable(value)).iter().map(|row| select_columns(row, &selected_column_positions)).collect()) + } + } + }, + _ => { + Ok(self.get_rows_by_value(eq_column_position, &value).iter().map(|row| select_columns(row, &selected_column_positions)).collect()) + } + } + } } } } - fn insert(&mut self, values: InsertionValues, consumer: impl SqlConsumer) { + fn insert(&mut self, values: InsertionValues) { // 1. You need to update indices // 2. you simply insert the data todo!() } - fn delete_where(&mut self, maybe_condition: Option, consumer: impl SqlConsumer) { + fn delete_where(&mut self, maybe_condition: Option) { // kinda similar to select with respect to the conditions // update index todo!() } } -// enum Response { -// Selected(impl Iter), // TODO: How to do this? Some reference to an iterator somehow... slice..? -// Inserted(???), -// Deleted(usize), // how many were deleted -// } +impl ColumnIndex { + fn get(&self, value: IndexableDbValue) -> HashSet { + match self.index.get(&value) { + Some(set) => set.clone(), + None => HashSet::new(), + } + } +} + +enum Response { + Selected(Vec), + Inserted(), + Deleted(usize), // how many were deleted +} + +type DbResult = Result; + +enum Error { + ColumnDoesNotExist(TableName, ColumnName), + ValueDoesNotMatchExpectedType(TableName, ColumnName, DbType, DbValue) +} fn main() { println!("Hello, world!");