Merge remote-tracking branch 'origin/main' into interpreter

2023-12-28 09:37:48 +01:00 · 2023-12-28 09:37:48 +01:00 · bd3dbe2365
commit bd3dbe2365
parent 77f4ae514e bf885670c6
3 changed files with 14 additions and 9 deletions
--- a/minisql/Cargo.toml
+++ b/minisql/Cargo.toml
@ -0,0 +1,9 @@
+[package]
+name = "minisql"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+bimap = "0.6.3"
--- a/minisql/src/main.rs
+++ b/minisql/src/main.rs
@ -0,0 +1,563 @@
+use std::collections::{BTreeMap, HashMap, HashSet};
+use bimap::BiMap;
+
+// ==============SQL operations================
+// TODO: Note that every operation has a table name.
+//       Perhaps consider factoring the table name out
+//       and think of the operations as operating on a unique table.
+enum Operation {
+    Select(TableName, ColumnSelection, Option<Condition>),
+    Insert(TableName, InsertionValues),
+    Delete(TableName, Option<Condition>),
+    // Update(...),
+    CreateTable(TableName, TableSchema),
+    CreateIndex(TableName, ColumnName), // TODO: Is this sufficient?
+    // DropTable(TableName),
+}
+
+type InsertionValues = Vec<(ColumnName, DbValue)>;
+
+enum ColumnSelection {
+    All,
+    Columns(Vec<ColumnName>),
+}
+
+enum Condition {
+    // And(Box<Condition>, Box<Condition>),
+    // Or(Box<Condition>, Box<Condition>),
+    // Not(Box<Condition>),
+
+    Eq(ColumnName, DbValue),
+    // LessOrEqual(ColumnName, DbValue),
+    // Less(ColumnName, DbValue),
+
+    // StringCondition(StringCondition),
+}
+
+// enum StringCondition {
+//     Prefix(ColumnName, String),
+//     Substring(ColumnName, String),
+// }
+
+
+// ==============Values and Types================
+type UUID = u64;
+
+// TODO: What about nulls? I would rather not have that as in SQL, it sucks.
+//       I would rather have non-nullable values by default,
+//       and something like an explicit Option type for nulls.
+#[derive(Debug, Clone, PartialEq)]
+enum DbValue {
+    Number(f64), // TODO: Can't put floats as keys in maps, since they don't implement Eq. What to
+                 // do?
+    Indexable(IndexableDbValue),
+}
+
+#[derive(Debug, Ord, Eq, Clone, PartialOrd, PartialEq)]
+enum IndexableDbValue {
+    String(String),
+    Int(u64),
+    UUID(UUID),
+    // TODO: what bout null?
+}
+
+// TODO: Can this be autogenerated from the values?
+#[derive(Debug, Clone, Copy)]
+enum DbType {
+    String,
+    Int,
+    Number,
+    UUID,
+}
+
+impl DbValue {
+    // TODO: Can this be autogenerated?
+    fn to_type(self) -> DbType {
+        match self {
+            Self::Number(_) => DbType::Number,
+            Self::Indexable(val) =>
+                match val {
+                    IndexableDbValue::String(_) => DbType::String,
+                    IndexableDbValue::Int(_) => DbType::Int,
+                    IndexableDbValue::UUID(_) => DbType::UUID,
+                }
+        }
+    }
+}
+
+
+// ==============Tables================
+// table-metadata and data
+
+type TableName = String;
+type TablePosition = usize;
+
+struct Table {
+    schema: TableSchema,
+    rows: Rows, // TODO: Consider wrapping this in a lock. Also consider if we need to have the
+                // same lock for both rows and indexes
+    indexes:
+        HashMap<ColumnPosition, ColumnIndex> // TODO: Consider generalizing `ColumnPosition` to something that would also apply to a pair of `ColumnNames` etc
+}
+
+// TODO: Is this really indexed by DbValues?
+//       Maybe we should have a separate index type for each type of value we're indexing over
+// TODO: I should have a set of UUID, not just a single UUID, e.g.
+//          a user table can have multiple different users with the same name.
+struct ColumnIndex {
+    index: BTreeMap<IndexableDbValue, HashSet<UUID>>
+}
+
+// Note that it is nice to split metadata from the data because
+// then you can give the metadata to the parser without giving it the data.
+struct TableSchema {
+    table_name: TableName, // used for descriptive errors
+    primary_key: ColumnPosition,
+    column_name_position_mapping: BiMap<ColumnName, ColumnPosition>,
+    types: Vec<DbType>,
+}
+
+// Use `TablePosition` as index
+type Tables = Vec<Table>;
+
+
+type ColumnName = String;
+type ColumnPosition = usize;
+
+// Use `ColumnPosition` as index
+type Row = Vec<DbValue>;
+
+type Rows =
+    // TODO: This should be some sort of an interface to a dictionary
+    // s.t. in the background it may modify stuff in memory or talk to the disk
+    BTreeMap<UUID, Row>;
+
+    // interface
+    // insert(id, value)
+
+fn select_columns(row: &Row, columns: &Vec<ColumnPosition>) -> Row {
+    // row.column_position
+    todo!()
+}
+
+// ==============Interpreter================
+struct State {
+    table_name_position_mapping: BiMap<TableName, TablePosition>,
+    tables: Vec<Table>,
+}
+
+impl State {
+    fn table_from_name<'b: 'a, 'a>(&'b self, table_name: &TableName) -> DbResult<&'a Table> {
+        match self.table_name_position_mapping.get_by_left(table_name) {
+            Some(table_position) => {
+                let table = &self.tables[*table_position];
+                Ok(table)
+            },
+            None => Err(Error::TableDoesNotExist(table_name.clone()))
+        }
+    }
+
+    fn table_from_name_mut<'b: 'a, 'a>(&'b mut self, table_name: &TableName) -> DbResult<&'a mut Table> {
+        match self.table_name_position_mapping.get_by_left(table_name) {
+            Some(table_position) => {
+                let table = &mut self.tables[*table_position];
+                Ok(table)
+            },
+            None => Err(Error::TableDoesNotExist(table_name.clone()))
+        }
+    }
+
+    fn attach_table(&mut self, table_name: TableName, table: Table) {
+        let new_table_position: TablePosition = self.tables.len();
+        self.table_name_position_mapping.insert(table_name, new_table_position);
+        self.tables.push(table);
+    }
+}
+
+// TODO: Give a better name to something that you can respond to with rows
+trait SqlConsumer {
+    // TODO: 
+}
+
+// TODO: Decide if we want for this to return a response (but then you have to deal with lifetimes,
+//       because you'll be forced to put an iterator/slice into the Response data-structure.
+//       Alternative is to pass a row-consumer to the functionas that knows how to communicate with
+//       the client, but the details of communication are hidden behind an interface
+fn interpret(table_name: TableName, operation: Operation, state: &mut State, consumer: impl SqlConsumer) -> DbResult<Response>  {
+    // TODO: lock stuff
+    use Operation::*;
+
+    match operation {
+        Select(table_name, column_selection, maybe_condition) => {
+            let table: &Table = state.table_from_name(&table_name)?;
+            Ok(Response::Selected(table.select_where(column_selection, maybe_condition)?))
+        },
+        Insert(table_name, values) => {
+            let table: &mut Table = state.table_from_name_mut(&table_name)?;
+
+            let _ = table.insert(values)?;
+            Ok(Response::Inserted)
+        },
+        Delete(table_name, maybe_condition) => {
+            let table: &mut Table = state.table_from_name_mut(&table_name)?;
+
+            let rows_affected = table.delete_where(maybe_condition)?;
+            Ok(Response::Deleted(rows_affected))
+        },
+        CreateTable(table_name, table_schema) => {
+            let table = Table::new(table_schema);
+            state.attach_table(table_name, table);
+            Ok(Response::TableCreated)
+        },
+        CreateIndex(table_name, column_name) => {
+            // TODO: This is incomplete. It can happen that an index is created
+            //       after the table has some rows for a while.
+            //       In such a case the index needs to be built over all those existing rows.
+            let table: &mut Table = state.table_from_name_mut(&table_name)?;
+            let column_position: ColumnPosition = table.schema.column_position_from_column_name(&column_name)?;
+
+            let index: ColumnIndex = ColumnIndex::new(&table.rows);
+            table.attach_index(column_position, index); 
+            Ok(Response::IndexCreated)
+        },
+    }
+}
+
+impl ColumnIndex {
+    fn new(rows: &Rows) -> Self {
+        let index = BTreeMap::new();
+        // TODO: Take into account already existing rows
+        todo!();
+        Self { index }
+    }
+}
+
+impl TableSchema {
+    fn get_column(&self, column_name: &ColumnName) -> DbResult<(DbType, ColumnPosition)> {
+        match self.column_name_position_mapping.get_by_left(column_name) { 
+            Some(column_position) => {
+                match self.types.get(*column_position) {
+                    Some(type_) => {
+                        Ok((*type_, *column_position))
+                    },
+                    None => {
+                        Err(Error::MissingTypeAnnotationOfColumn(self.table_name.clone(), *column_position))
+                    }
+                }
+            },
+            None => Err(Error::ColumnDoesNotExist(self.table_name.clone(), column_name.clone()))
+        }
+    }
+
+    fn column_position_from_column_name(&self, column_name: &ColumnName) -> DbResult<ColumnPosition> {
+        self.get_column(column_name).map(|(_, column_position)| column_position)
+    }
+
+    fn is_primary(&self, column_position: ColumnPosition) -> bool {
+        self.primary_key == column_position
+    }
+
+    fn column_positions_from_column_names(&self, column_names: &[ColumnName]) -> DbResult<Vec<ColumnPosition>> {
+        let mut positions: Vec<ColumnPosition> = Vec::with_capacity(column_names.len());
+        for column_name in column_names {
+            let column_position = self.column_position_from_column_name(column_name)?;
+            positions.push(column_position)
+        }
+        Ok(positions)
+    }
+
+    fn column_name_from_column_position(&self, column_position: ColumnPosition) -> DbResult<ColumnName> {
+        match self.column_name_position_mapping.get_by_right(&column_position) {
+            Some(column_name) => Ok(column_name.clone()),
+            None => Err(Error::ColumnPositionDoesNotExist(self.table_name.clone(), column_position))
+        }
+    }
+
+    fn column_positions_from_column_selection(&self, column_selection: &ColumnSelection) -> DbResult<Vec<ColumnPosition>> {
+        match column_selection {
+            ColumnSelection::All => {
+                let mut column_positions: Vec<ColumnPosition> = self.column_name_position_mapping.iter().map(|(_, column_position)| *column_position).collect();
+                column_positions.sort();
+                Ok(column_positions)
+            },
+
+            ColumnSelection::Columns(column_names) => {
+                self.column_positions_from_column_names(column_names)
+            },
+        }
+    }
+
+    fn number_of_columns(&self) -> usize {
+        self.column_name_position_mapping.len()
+    }
+
+    fn row_from_insertion_values(&self, insertion_values: InsertionValues) -> DbResult<(UUID, Row)> {
+        // TODO: There should be proper validation of the insertion_values.
+        //       And it shouldn't really be done here.
+        //
+        //       In the below we don't check for duplicate column names
+        //
+        let number_of_columns = self.number_of_columns();
+        if number_of_columns != insertion_values.len() {
+            return Err(Error::MismatchBetweenInsertValuesAndColumns(self.table_name.clone(), insertion_values))
+        }
+
+        let mut row: Vec<DbValue> = Vec::with_capacity(number_of_columns);
+
+        let mut values: HashMap<ColumnName, DbValue> = HashMap::new();
+        for (column_name, db_value) in &insertion_values {
+            values.insert(column_name.clone(), db_value.clone());
+        }
+
+        for column_position in 0..number_of_columns {
+            let column_name: ColumnName = self.column_name_from_column_position(column_position)?;
+            match values.get(&column_name) {
+                Some(db_value) => {
+                    row.push(db_value.clone())
+                },
+                None => {
+                    return Err(Error::MissingColumnInInsertValues(self.table_name.clone(), column_name, insertion_values))
+                }
+            }
+        }
+
+        let id = match row.get(self.primary_key) {
+            Some(val) => {
+                match val {
+                    DbValue::Indexable(IndexableDbValue::UUID(id)) => {
+                        id
+                    },
+                    _ =>
+                        unreachable!()
+                }
+            },
+            None =>
+                unreachable!()
+        };
+
+        Ok((*id, row))
+    }
+
+}
+
+impl Table {
+    fn new(table_schema: TableSchema) -> Self {
+        Self {
+            schema: table_schema,
+            rows: BTreeMap::new(),
+            indexes: HashMap::new(),
+        }
+    }
+
+    fn attach_index(&mut self, column_position: ColumnPosition, column_index: ColumnIndex) {
+        self.indexes.insert(column_position, column_index);
+    }
+
+    fn get_row_by_id(&self, id: UUID) -> Option<Row> {
+        self.rows.get(&id).cloned()
+    }
+
+    fn get_rows_by_ids(&self, ids: HashSet<UUID>) -> Vec<Row> {
+        ids.into_iter()
+            .filter_map(|id| self.get_row_by_id(id))
+            .collect()
+    }
+
+    fn get_rows_by_value(&self, column_position: ColumnPosition, value: &DbValue) -> Vec<Row> {
+        // brute-force search
+        self.rows.values()
+            .filter_map(|row| if row.get(column_position) == Some(value) { Some(row.clone()) } else { None })
+            .collect()
+    }
+
+    fn delete_row_by_id(&mut self, id: UUID) -> usize {
+        if let Some(row) = self.rows.remove(&id) {
+            let mut something_was_deleted = false;
+            for (column_position, column_index) in &mut self.indexes {
+                if let DbValue::Indexable(value) = &row[*column_position] {
+                    something_was_deleted = something_was_deleted || column_index.remove(value, id);
+                };
+            }
+            if something_was_deleted { 1 } else { 0 }
+        } else {
+            0
+        }
+    }
+
+    fn delete_rows_by_ids(&mut self, ids: HashSet<UUID>) -> usize {
+        let mut total_count = 0;
+        for id in ids {
+            total_count += self.delete_row_by_id(id)
+        }
+        total_count
+    }
+
+    fn delete_rows_by_value(&mut self, column_position: ColumnPosition, value: &DbValue) -> usize {
+        let matched_ids: HashSet<UUID> = self.rows.iter()
+            .filter_map(|(id, row)| if row.get(column_position) == Some(value) { Some(*id) } else { None })
+            .collect();
+        self.delete_rows_by_ids(matched_ids)
+    }
+
+    fn select_where(&self, column_selection: ColumnSelection, maybe_condition: Option<Condition>) -> DbResult<Vec<Row>> {
+        let selected_column_positions = self.schema.column_positions_from_column_selection(&column_selection)?;
+        match maybe_condition {
+            None =>
+                Ok(self.rows.values().map(|row| select_columns(row, &selected_column_positions)).collect()),
+
+            Some(Condition::Eq(eq_column_name, value)) => {
+                let (type_, eq_column_position) = self.schema.get_column(&eq_column_name)?;
+                if self.schema.is_primary(eq_column_position) {
+                    match value {
+                        DbValue::Indexable(IndexableDbValue::UUID(uuid)) => {
+                            match self.get_row_by_id(uuid) {
+                                Some(row) => Ok(vec![select_columns(&row, &selected_column_positions)]),
+                                None => Ok(vec![]),
+                            }
+                        },
+                        _ => Err(Error::ValueDoesNotMatchExpectedType(self.schema.table_name.clone(), eq_column_name.clone(), type_, value.clone()))
+                    }
+                } else {
+                    match value {
+                        DbValue::Indexable(value) => {
+                            match self.indexes.get(&eq_column_position) {
+                                Some(column_index) => {
+                                    let ids = column_index.get(value);
+                                    Ok(self.get_rows_by_ids(ids).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
+                                },
+                                None => {
+                                    Ok(self.get_rows_by_value(eq_column_position, &DbValue::Indexable(value)).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
+                                }
+                            }
+                        },
+                        _ => {
+                            Ok(self.get_rows_by_value(eq_column_position, &value).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    fn insert(&mut self, values: InsertionValues) -> DbResult<()> {
+        let (id, row) = self.schema.row_from_insertion_values(values)?;
+
+        if self.rows.get(&id).is_some() {
+            return Err(Error::AttemptingToInsertAlreadyPresentId(self.schema.table_name.clone(), id))
+        }
+
+        for (column_position, column_index) in &mut self.indexes {
+            match row.get(*column_position) {
+                Some(DbValue::Indexable(val)) => {
+                    column_index.add(val.clone(), id)
+                },
+                Some(_) => {},
+                None => return Err(Error::ColumnPositionDoesNotExist(self.schema.table_name.clone(), *column_position))
+            }
+        }
+
+        let _ = self.rows.insert(id, row);
+        Ok(())
+    }
+
+    fn delete_where(&mut self, maybe_condition: Option<Condition>) -> DbResult<usize> {
+        // kinda similar to select with respect to the conditions
+        // update index
+        match maybe_condition {
+            None => {
+                // delete all
+                let number_of_rows = self.rows.len();
+                self.rows = BTreeMap::new();
+                self.indexes = HashMap::new();
+                Ok(number_of_rows)
+            },
+
+            Some(Condition::Eq(eq_column_name, value)) => {
+                let (type_, eq_column_position) = self.schema.get_column(&eq_column_name)?;
+                if self.schema.is_primary(eq_column_position) {
+                    match value {
+                        DbValue::Indexable(IndexableDbValue::UUID(uuid)) => {
+                            Ok(self.delete_row_by_id(uuid))
+                        },
+                        _ =>
+                            return Err(Error::ValueDoesNotMatchExpectedType(self.schema.table_name.clone(), eq_column_name.clone(), type_, value.clone()))
+                    }
+
+                } else {
+                    match value {
+                        DbValue::Indexable(value) => {
+                            match self.indexes.get(&eq_column_position) {
+                                Some(column_index) => {
+                                    let ids = column_index.get(value);
+                                    Ok(self.delete_rows_by_ids(ids))
+                                },
+                                None =>
+                                    Ok(self.delete_rows_by_value(eq_column_position, &DbValue::Indexable(value)))
+                            }
+                        },
+                        _ =>
+                            Ok(self.delete_rows_by_value(eq_column_position, &value))
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl ColumnIndex {
+    fn get(&self, value: IndexableDbValue) -> HashSet<UUID> {
+        match self.index.get(&value) {
+            Some(set) => set.clone(),
+            None => HashSet::new(),
+        }
+    }
+
+    fn add(&mut self, value: IndexableDbValue, id: UUID) {
+        match self.index.get_mut(&value) {
+            Some(ids) => {
+                ids.insert(id);
+            },
+            None => {
+                self.index.insert(value, HashSet::from([id]));
+            }
+        }
+    }
+
+    fn remove(&mut self, value: &IndexableDbValue, id_to_be_removed: UUID) -> bool {
+        match self.index.get_mut(value) {
+            Some(ids) => {
+                let was_present = ids.remove(&id_to_be_removed);
+                was_present
+            },
+            None => {
+                false
+            }
+        }
+    }
+}
+
+enum Response {
+    Selected(Vec<Row>),
+    Inserted,
+    Deleted(usize), // how many were deleted
+    TableCreated,
+    IndexCreated,
+} 
+
+type DbResult<A> = Result<A, Error>;
+
+// #[derive(Debug)]
+enum Error {
+    TableDoesNotExist(TableName),
+    ColumnDoesNotExist(TableName, ColumnName),
+    ColumnPositionDoesNotExist(TableName, ColumnPosition),
+    ValueDoesNotMatchExpectedType(TableName, ColumnName, DbType, DbValue),
+    AttemptingToInsertAlreadyPresentId(TableName, UUID),
+    MissingTypeAnnotationOfColumn(TableName, ColumnPosition),
+    MissingColumnInInsertValues(TableName, ColumnName, InsertionValues),
+    MismatchBetweenInsertValuesAndColumns(TableName, InsertionValues),
+}
+
+fn main() {
+    println!("Hello, world!");
+}