diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8cf2bff --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +tmp_repl.txt diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..e87788c --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "minisql" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..df68143 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "minisql" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/DESIGN.md b/DESIGN.md index 78c71a7..449f983 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -69,11 +69,41 @@ which will store the database as a file `path/to/db/my-db.db` and open a TCP ser how will the parsing output look like? Consider something like ``` +// TODO: Parser has access to all table metadata + // Could also be called `SQLAbstractSyntaxTree` -enum Operations { - Select(Vector, TableName), - Update(...) +enum Operation { + Select(TableName, ColumnSelection, Option), + Insert(TableName, Vec<(ColumnName, DbValue)>), // String because we don't yet know which type of value this is for sure + Delete(TableName, Option), + // Update(...), } + +enum ColumnSelection { + All, + Columns(Vec), +} + +enum Condition = { + // And(Condition, Condition), + // Or(Condition, Condition), + // Not(Condition), + + Eq(ColumnName, DbValue) + // LessOrEqual(ColumnName, DbValue) + // Less(ColumnName, DbValue) + + // StringCondition(StringCondition) +} + +enum StringCondition { + Prefix(ColumnName, String) + Substring(ColumnName, String) +} + + + +INSERT 123 ``` * We also have to write an interpreter for these operations. How will the db-state be represented in memory? For example how can we implement a table? @@ -82,11 +112,46 @@ enum Operations { enum DbValue { DbString(String), DbNumber(Float), - DbByte(u8), DbUUID(u32) } +// We also need a type of db-types +enum DbType { + TString, + TNumber, + TId, +} + +value_to_type(db_val: DbValue) -> DbType + + +// table-metadata and data + +type TableName = String + +// Note that it is nice to split metadata from the data because +// then you can give the metadata to the parser without giving it the data. +struct TableMetaData { + name: TableName, // TODO: Is this really necessary? probably not + columns: Vec<(ColumnName, DbType, ColumnPosition)> +} + +fn column_position(TableMetaData, ColumnName) -> ColumnPosition + +struct Table { + meta: TableMetaData, + rows: Rows // defined below + indexes: + BTree // TODO: Consider generalizing ColumnName to semething that would also apply to a pair of ColumnNames etc +} + +type Tables = HashMap + +// We also need a function that for a given value computes its type (for validation) + + type ColumnName = String +type ColumnPosition = u32 // The below type is a type of a table row type Row = HashMap @@ -94,6 +159,9 @@ type Row = HashMap // Or you know... some appropriate Dictionary Type HashMap::make![("id", 1), ("name", "Alice"), ("salary", 20.0)] : Row +type Rows = + BTree + // possible optimization: have a mapping // column names ~> indexes // so that we could represent rows as @@ -112,6 +180,47 @@ e.g. Row ~> vec![DbUUID 1, DbSTring "Alice"] Vec> ``` +* Interpreter +``` +trait SqlConsumer { + // TODO: + ??? +} + +fn interpret(operation: Operation, tables: &mut Tables, consumer: T) -> () { + // TODO: lock stuff + match operation { + Select(table_name, column_selection, maybe_condition) => { + let table: Table = ... + // TODO: Wrap this into a response + select(table, column_selection, maybe_condition, consumer) + }, + Insert(table_name, Vec<(ColumnName, DbValue)>) => { + insert(table, ???) + } + Delete(table_name, maybe_condition) => { + + } + } +} + + response = interpret(...) + knows_how_to_respond(response, client) + + +enum Response { + Selected(impl Iter) // TODO: How to do this? Some reference to an iterator somehow... slice..? + Inserted(???), + Deleted(usize), // how many were deleted +} + +fn select(table: Table, ColumnName + + + +``` + + * TODO: Consider streaming the response to the client and not just dumping 10K rows at once. diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..ba61402 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,223 @@ +use std::collections::{BTreeMap, HashMap}; + +// ==============SQL operations================ +// TODO: Note that every operation has a table name. +// Perhaps consider factoring the table name out +// and think of the operations as operating on a unique table. +enum Operation { + Select(TableName, ColumnSelection, Option), + Insert(TableName, InsertionValues), + Delete(TableName, Option), + // Update(...), + // + CreateTable(TableName, TableSchema), + CreateIndex(TableName, ColumnName), // TODO: Is this sufficient? + // DropTable(TableName), +} + +type InsertionValues = Vec<(ColumnName, DbValue)>; + +enum ColumnSelection { + All, + Columns(Vec), +} + +enum Condition { + // And(Box, Box), + // Or(Box, Box), + // Not(Box), + + Eq(ColumnName, DbValue), + // LessOrEqual(ColumnName, DbValue), + // Less(ColumnName, DbValue), + + // StringCondition(StringCondition), +} + +// enum StringCondition { +// Prefix(ColumnName, String), +// Substring(ColumnName, String), +// } + + +// ==============Values and Types================ +type UUID = u64; + +// TODO: What about nulls? I would rather not have that as in SQL, it sucks. +// I would rather have non-nullable values by default, +// and something like an explicit Option type for nulls. +enum DbValue { + String(String), + Int(u64), + Number(f64), + UUID(UUID), +} + +// TODO: Can this be autogenerated from the values? +enum DbType { + String, + Int, + Number, + UUID, +} + +impl DbValue { + // TODO: Can this be autogenerated? + fn to_type(self) -> DbType { + match self { + Self::String(_) => DbType::String, + Self::Int(_) => DbType::Int, + Self::Number(_) => DbType::Number, + Self::UUID(_) => DbType::UUID, + } + } +} + + +// ==============Tables================ +// table-metadata and data + +type TableName = String; +type TablePosition = u32; + +struct Table { + schema: TableSchema, + rows: Rows, + indexes: + HashMap // TODO: Consider generalizing `ColumnPosition` to something that would also apply to a pair of `ColumnNames` etc +} + +// TODO: Is this really indexed by DbValues? +// Maybe we should have a separate index type for each type of value we're indexing over +struct ColumnIndex { + index: BTreeMap +} + +// Note that it is nice to split metadata from the data because +// then you can give the metadata to the parser without giving it the data. +struct TableSchema { + columns: HashMap +} + +// TODO +fn column_position(table_meta: TableSchema, column_name: ColumnName) -> Option { + todo!() +} + +// Use `TablePosition` as index +type Tables = Vec; + + +type ColumnName = String; +type ColumnPosition = u32; + +// Use `ColumnPosition` as index +type Row = Vec; + +type Rows = + BTreeMap; + +// ==============Interpreter================ +struct State { + table_positions: HashMap, + tables: Vec
, +} + +impl State { + fn table_from_name<'b: 'a, 'a>(&'b self, table_name: TableName) -> Option<&'a Table> { + todo!() + } + + fn attach_table(&mut self, table: Table) { + todo!() + } +} + +// TODO: Give a better name to something that you can respond to with rows +trait SqlConsumer { + // TODO: +} + +// TODO: This should return a reference to the table +// 'tables_life contains 'table_life +fn get_table<'tables_life: 'table_life, 'table_life>(tables: &'tables_life Tables, table_name: &TableName) -> &'table_life Table { + // let table_position: + todo!() +} + +// TODO: Decide if we want for this to return a response (but then you have to deal with lifetimes, +// because you'll be forced to put an iterator/slice into the Response data-structure. +// Alternative is to pass a row-consumer to the functionas that knows how to communicate with +// the client, but the details of communication are hidden behind an interface +fn interpret(table_name: TableName, operation: Operation, state: &mut State, consumer: impl SqlConsumer) -> () { + // TODO: lock stuff + use Operation::*; + + match operation { + Select(table_name, column_selection, maybe_condition) => { + let table: &Table = todo!(); + table.select_where(column_selection, maybe_condition, consumer) + }, + Insert(table_name, values) => { + let table: &mut Table = todo!(); + + table.insert(values, consumer) + }, + Delete(table_name, maybe_condition) => { + let table: &mut Table = todo!(); + + table.delete_where(maybe_condition, consumer) + }, + CreateTable(table_name, table_schema) => { + let table = Table::new(table_name, table_schema); + state.attach_table(table); + todo!() + }, + CreateIndex(table_name, column_name) => { + let table: &mut Table = todo!(); + + let index: ColumnIndex = ColumnIndex::new(table, column_name); + table.attach_index(index); + }, // TODO: Is this sufficient? + // + } +} + +impl ColumnIndex { + fn new(table: &Table, column_name: ColumnName) -> ColumnIndex { + todo!() + } +} + + +impl Table { + fn new(table_name: TableName, table_schema: TableSchema) -> Table { + todo!() + } + + fn attach_index(&mut self, column_index: ColumnIndex) { + todo!() + } + + fn select_where(&self, column_selection: ColumnSelection, maybe_condition: Option, consumer: impl SqlConsumer) { + todo!() + } + + fn insert(&mut self, values: InsertionValues, consumer: impl SqlConsumer) { + todo!() + } + + fn delete_where(&mut self, maybe_condition: Option, consumer: impl SqlConsumer) { + todo!() + } +} + +// enum Response { +// Selected(impl Iter), // TODO: How to do this? Some reference to an iterator somehow... slice..? +// Inserted(???), +// Deleted(usize), // how many were deleted +// } + +fn main() { + println!("Hello, world!"); +}