Merge branch 'tables' into 'main'

Refine design

See merge request x433485/minisql!1
This commit is contained in:
Yuriy Dupyn 2023-10-30 11:22:28 +01:00
commit da9d9bf799
5 changed files with 353 additions and 4 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/target
tmp_repl.txt

7
Cargo.lock generated Normal file
View file

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "minisql"
version = "0.1.0"

8
Cargo.toml Normal file
View file

@ -0,0 +1,8 @@
[package]
name = "minisql"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

117
DESIGN.md
View file

@ -69,11 +69,41 @@ which will store the database as a file `path/to/db/my-db.db` and open a TCP ser
how will the parsing output look like?
Consider something like
```
// TODO: Parser has access to all table metadata
// Could also be called `SQLAbstractSyntaxTree`
enum Operations {
Select(Vector<FieldName>, TableName),
Update(...)
enum Operation {
Select(TableName, ColumnSelection, Option<Condition>),
Insert(TableName, Vec<(ColumnName, DbValue)>), // String because we don't yet know which type of value this is for sure
Delete(TableName, Option<Condition>),
// Update(...),
}
enum ColumnSelection {
All,
Columns(Vec<ColumnName>),
}
enum Condition = {
// And(Condition, Condition),
// Or(Condition, Condition),
// Not(Condition),
Eq(ColumnName, DbValue)
// LessOrEqual(ColumnName, DbValue)
// Less(ColumnName, DbValue)
// StringCondition(StringCondition)
}
enum StringCondition {
Prefix(ColumnName, String)
Substring(ColumnName, String)
}
INSERT 123
```
* We also have to write an interpreter for these operations. How will the db-state be represented in memory?
For example how can we implement a table?
@ -82,11 +112,46 @@ enum Operations {
enum DbValue {
DbString(String),
DbNumber(Float),
DbByte(u8),
DbUUID(u32)
}
// We also need a type of db-types
enum DbType {
TString,
TNumber,
TId,
}
value_to_type(db_val: DbValue) -> DbType
// table-metadata and data
type TableName = String
// Note that it is nice to split metadata from the data because
// then you can give the metadata to the parser without giving it the data.
struct TableMetaData {
name: TableName, // TODO: Is this really necessary? probably not
columns: Vec<(ColumnName, DbType, ColumnPosition)>
}
fn column_position(TableMetaData, ColumnName) -> ColumnPosition
struct Table {
meta: TableMetaData,
rows: Rows // defined below
indexes:
BTree<ColumnName, Index> // TODO: Consider generalizing ColumnName to semething that would also apply to a pair of ColumnNames etc
}
type Tables = HashMap<TableName, Table>
// We also need a function that for a given value computes its type (for validation)
type ColumnName = String
type ColumnPosition = u32
// The below type is a type of a table row
type Row = HashMap<ColumnName, DbValue>
@ -94,6 +159,9 @@ type Row = HashMap<ColumnName, DbValue>
// Or you know... some appropriate Dictionary Type
HashMap::make![("id", 1), ("name", "Alice"), ("salary", 20.0)] : Row
type Rows =
BTree<Id, Row>
// possible optimization: have a mapping
// column names ~> indexes
// so that we could represent rows as
@ -112,6 +180,47 @@ e.g. Row ~> vec![DbUUID 1, DbSTring "Alice"]
Vec<Vec<DbValue>>
```
* Interpreter
```
trait SqlConsumer {
// TODO:
???
}
fn interpret<T: SqlConsumer>(operation: Operation, tables: &mut Tables, consumer: T) -> () {
// TODO: lock stuff
match operation {
Select(table_name, column_selection, maybe_condition) => {
let table: Table = ...
// TODO: Wrap this into a response
select(table, column_selection, maybe_condition, consumer)
},
Insert(table_name, Vec<(ColumnName, DbValue)>) => {
insert(table, ???)
}
Delete(table_name, maybe_condition) => {
}
}
}
response = interpret(...)
knows_how_to_respond(response, client)
enum Response {
Selected(impl Iter<???>) // TODO: How to do this? Some reference to an iterator somehow... slice..?
Inserted(???),
Deleted(usize), // how many were deleted
}
fn select(table: Table, ColumnName
```
* TODO: Consider streaming the response to the client and not just dumping 10K rows at once.

223
src/main.rs Normal file
View file

@ -0,0 +1,223 @@
use std::collections::{BTreeMap, HashMap};
// ==============SQL operations================
// TODO: Note that every operation has a table name.
// Perhaps consider factoring the table name out
// and think of the operations as operating on a unique table.
enum Operation {
Select(TableName, ColumnSelection, Option<Condition>),
Insert(TableName, InsertionValues),
Delete(TableName, Option<Condition>),
// Update(...),
//
CreateTable(TableName, TableSchema),
CreateIndex(TableName, ColumnName), // TODO: Is this sufficient?
// DropTable(TableName),
}
type InsertionValues = Vec<(ColumnName, DbValue)>;
enum ColumnSelection {
All,
Columns(Vec<ColumnName>),
}
enum Condition {
// And(Box<Condition>, Box<Condition>),
// Or(Box<Condition>, Box<Condition>),
// Not(Box<Condition>),
Eq(ColumnName, DbValue),
// LessOrEqual(ColumnName, DbValue),
// Less(ColumnName, DbValue),
// StringCondition(StringCondition),
}
// enum StringCondition {
// Prefix(ColumnName, String),
// Substring(ColumnName, String),
// }
// ==============Values and Types================
type UUID = u64;
// TODO: What about nulls? I would rather not have that as in SQL, it sucks.
// I would rather have non-nullable values by default,
// and something like an explicit Option type for nulls.
enum DbValue {
String(String),
Int(u64),
Number(f64),
UUID(UUID),
}
// TODO: Can this be autogenerated from the values?
enum DbType {
String,
Int,
Number,
UUID,
}
impl DbValue {
// TODO: Can this be autogenerated?
fn to_type(self) -> DbType {
match self {
Self::String(_) => DbType::String,
Self::Int(_) => DbType::Int,
Self::Number(_) => DbType::Number,
Self::UUID(_) => DbType::UUID,
}
}
}
// ==============Tables================
// table-metadata and data
type TableName = String;
type TablePosition = u32;
struct Table {
schema: TableSchema,
rows: Rows,
indexes:
HashMap<ColumnPosition, ColumnIndex> // TODO: Consider generalizing `ColumnPosition` to something that would also apply to a pair of `ColumnNames` etc
}
// TODO: Is this really indexed by DbValues?
// Maybe we should have a separate index type for each type of value we're indexing over
struct ColumnIndex {
index: BTreeMap<DbValue, UUID>
}
// Note that it is nice to split metadata from the data because
// then you can give the metadata to the parser without giving it the data.
struct TableSchema {
columns: HashMap<ColumnName, (DbType, ColumnPosition)>
}
// TODO
fn column_position(table_meta: TableSchema, column_name: ColumnName) -> Option<ColumnPosition> {
todo!()
}
// Use `TablePosition` as index
type Tables = Vec<Table>;
type ColumnName = String;
type ColumnPosition = u32;
// Use `ColumnPosition` as index
type Row = Vec<DbValue>;
type Rows =
BTreeMap<UUID, Row>;
// ==============Interpreter================
struct State {
table_positions: HashMap<TableName, TablePosition>,
tables: Vec<Table>,
}
impl State {
fn table_from_name<'b: 'a, 'a>(&'b self, table_name: TableName) -> Option<&'a Table> {
todo!()
}
fn attach_table(&mut self, table: Table) {
todo!()
}
}
// TODO: Give a better name to something that you can respond to with rows
trait SqlConsumer {
// TODO:
}
// TODO: This should return a reference to the table
// 'tables_life contains 'table_life
fn get_table<'tables_life: 'table_life, 'table_life>(tables: &'tables_life Tables, table_name: &TableName) -> &'table_life Table {
// let table_position:
todo!()
}
// TODO: Decide if we want for this to return a response (but then you have to deal with lifetimes,
// because you'll be forced to put an iterator/slice into the Response data-structure.
// Alternative is to pass a row-consumer to the functionas that knows how to communicate with
// the client, but the details of communication are hidden behind an interface
fn interpret(table_name: TableName, operation: Operation, state: &mut State, consumer: impl SqlConsumer) -> () {
// TODO: lock stuff
use Operation::*;
match operation {
Select(table_name, column_selection, maybe_condition) => {
let table: &Table = todo!();
table.select_where(column_selection, maybe_condition, consumer)
},
Insert(table_name, values) => {
let table: &mut Table = todo!();
table.insert(values, consumer)
},
Delete(table_name, maybe_condition) => {
let table: &mut Table = todo!();
table.delete_where(maybe_condition, consumer)
},
CreateTable(table_name, table_schema) => {
let table = Table::new(table_name, table_schema);
state.attach_table(table);
todo!()
},
CreateIndex(table_name, column_name) => {
let table: &mut Table = todo!();
let index: ColumnIndex = ColumnIndex::new(table, column_name);
table.attach_index(index);
}, // TODO: Is this sufficient?
//
}
}
impl ColumnIndex {
fn new(table: &Table, column_name: ColumnName) -> ColumnIndex {
todo!()
}
}
impl Table {
fn new(table_name: TableName, table_schema: TableSchema) -> Table {
todo!()
}
fn attach_index(&mut self, column_index: ColumnIndex) {
todo!()
}
fn select_where(&self, column_selection: ColumnSelection, maybe_condition: Option<Condition>, consumer: impl SqlConsumer) {
todo!()
}
fn insert(&mut self, values: InsertionValues, consumer: impl SqlConsumer) {
todo!()
}
fn delete_where(&mut self, maybe_condition: Option<Condition>, consumer: impl SqlConsumer) {
todo!()
}
}
// enum Response {
// Selected(impl Iter<???>), // TODO: How to do this? Some reference to an iterator somehow... slice..?
// Inserted(???),
// Deleted(usize), // how many were deleted
// }
fn main() {
println!("Hello, world!");
}