Merge remote-tracking branch 'origin/main' into interpreter
This commit is contained in:
commit
bd3dbe2365
3 changed files with 14 additions and 9 deletions
9
minisql/Cargo.toml
Normal file
9
minisql/Cargo.toml
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
[package]
|
||||
name = "minisql"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
bimap = "0.6.3"
|
||||
563
minisql/src/main.rs
Normal file
563
minisql/src/main.rs
Normal file
|
|
@ -0,0 +1,563 @@
|
|||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use bimap::BiMap;
|
||||
|
||||
// ==============SQL operations================
|
||||
// TODO: Note that every operation has a table name.
|
||||
// Perhaps consider factoring the table name out
|
||||
// and think of the operations as operating on a unique table.
|
||||
enum Operation {
|
||||
Select(TableName, ColumnSelection, Option<Condition>),
|
||||
Insert(TableName, InsertionValues),
|
||||
Delete(TableName, Option<Condition>),
|
||||
// Update(...),
|
||||
CreateTable(TableName, TableSchema),
|
||||
CreateIndex(TableName, ColumnName), // TODO: Is this sufficient?
|
||||
// DropTable(TableName),
|
||||
}
|
||||
|
||||
type InsertionValues = Vec<(ColumnName, DbValue)>;
|
||||
|
||||
enum ColumnSelection {
|
||||
All,
|
||||
Columns(Vec<ColumnName>),
|
||||
}
|
||||
|
||||
enum Condition {
|
||||
// And(Box<Condition>, Box<Condition>),
|
||||
// Or(Box<Condition>, Box<Condition>),
|
||||
// Not(Box<Condition>),
|
||||
|
||||
Eq(ColumnName, DbValue),
|
||||
// LessOrEqual(ColumnName, DbValue),
|
||||
// Less(ColumnName, DbValue),
|
||||
|
||||
// StringCondition(StringCondition),
|
||||
}
|
||||
|
||||
// enum StringCondition {
|
||||
// Prefix(ColumnName, String),
|
||||
// Substring(ColumnName, String),
|
||||
// }
|
||||
|
||||
|
||||
// ==============Values and Types================
|
||||
type UUID = u64;
|
||||
|
||||
// TODO: What about nulls? I would rather not have that as in SQL, it sucks.
|
||||
// I would rather have non-nullable values by default,
|
||||
// and something like an explicit Option type for nulls.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
enum DbValue {
|
||||
Number(f64), // TODO: Can't put floats as keys in maps, since they don't implement Eq. What to
|
||||
// do?
|
||||
Indexable(IndexableDbValue),
|
||||
}
|
||||
|
||||
#[derive(Debug, Ord, Eq, Clone, PartialOrd, PartialEq)]
|
||||
enum IndexableDbValue {
|
||||
String(String),
|
||||
Int(u64),
|
||||
UUID(UUID),
|
||||
// TODO: what bout null?
|
||||
}
|
||||
|
||||
// TODO: Can this be autogenerated from the values?
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum DbType {
|
||||
String,
|
||||
Int,
|
||||
Number,
|
||||
UUID,
|
||||
}
|
||||
|
||||
impl DbValue {
|
||||
// TODO: Can this be autogenerated?
|
||||
fn to_type(self) -> DbType {
|
||||
match self {
|
||||
Self::Number(_) => DbType::Number,
|
||||
Self::Indexable(val) =>
|
||||
match val {
|
||||
IndexableDbValue::String(_) => DbType::String,
|
||||
IndexableDbValue::Int(_) => DbType::Int,
|
||||
IndexableDbValue::UUID(_) => DbType::UUID,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ==============Tables================
|
||||
// table-metadata and data
|
||||
|
||||
type TableName = String;
|
||||
type TablePosition = usize;
|
||||
|
||||
struct Table {
|
||||
schema: TableSchema,
|
||||
rows: Rows, // TODO: Consider wrapping this in a lock. Also consider if we need to have the
|
||||
// same lock for both rows and indexes
|
||||
indexes:
|
||||
HashMap<ColumnPosition, ColumnIndex> // TODO: Consider generalizing `ColumnPosition` to something that would also apply to a pair of `ColumnNames` etc
|
||||
}
|
||||
|
||||
// TODO: Is this really indexed by DbValues?
|
||||
// Maybe we should have a separate index type for each type of value we're indexing over
|
||||
// TODO: I should have a set of UUID, not just a single UUID, e.g.
|
||||
// a user table can have multiple different users with the same name.
|
||||
struct ColumnIndex {
|
||||
index: BTreeMap<IndexableDbValue, HashSet<UUID>>
|
||||
}
|
||||
|
||||
// Note that it is nice to split metadata from the data because
|
||||
// then you can give the metadata to the parser without giving it the data.
|
||||
struct TableSchema {
|
||||
table_name: TableName, // used for descriptive errors
|
||||
primary_key: ColumnPosition,
|
||||
column_name_position_mapping: BiMap<ColumnName, ColumnPosition>,
|
||||
types: Vec<DbType>,
|
||||
}
|
||||
|
||||
// Use `TablePosition` as index
|
||||
type Tables = Vec<Table>;
|
||||
|
||||
|
||||
type ColumnName = String;
|
||||
type ColumnPosition = usize;
|
||||
|
||||
// Use `ColumnPosition` as index
|
||||
type Row = Vec<DbValue>;
|
||||
|
||||
type Rows =
|
||||
// TODO: This should be some sort of an interface to a dictionary
|
||||
// s.t. in the background it may modify stuff in memory or talk to the disk
|
||||
BTreeMap<UUID, Row>;
|
||||
|
||||
// interface
|
||||
// insert(id, value)
|
||||
|
||||
fn select_columns(row: &Row, columns: &Vec<ColumnPosition>) -> Row {
|
||||
// row.column_position
|
||||
todo!()
|
||||
}
|
||||
|
||||
// ==============Interpreter================
|
||||
struct State {
|
||||
table_name_position_mapping: BiMap<TableName, TablePosition>,
|
||||
tables: Vec<Table>,
|
||||
}
|
||||
|
||||
impl State {
|
||||
fn table_from_name<'b: 'a, 'a>(&'b self, table_name: &TableName) -> DbResult<&'a Table> {
|
||||
match self.table_name_position_mapping.get_by_left(table_name) {
|
||||
Some(table_position) => {
|
||||
let table = &self.tables[*table_position];
|
||||
Ok(table)
|
||||
},
|
||||
None => Err(Error::TableDoesNotExist(table_name.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
fn table_from_name_mut<'b: 'a, 'a>(&'b mut self, table_name: &TableName) -> DbResult<&'a mut Table> {
|
||||
match self.table_name_position_mapping.get_by_left(table_name) {
|
||||
Some(table_position) => {
|
||||
let table = &mut self.tables[*table_position];
|
||||
Ok(table)
|
||||
},
|
||||
None => Err(Error::TableDoesNotExist(table_name.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
fn attach_table(&mut self, table_name: TableName, table: Table) {
|
||||
let new_table_position: TablePosition = self.tables.len();
|
||||
self.table_name_position_mapping.insert(table_name, new_table_position);
|
||||
self.tables.push(table);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Give a better name to something that you can respond to with rows
|
||||
trait SqlConsumer {
|
||||
// TODO:
|
||||
}
|
||||
|
||||
// TODO: Decide if we want for this to return a response (but then you have to deal with lifetimes,
|
||||
// because you'll be forced to put an iterator/slice into the Response data-structure.
|
||||
// Alternative is to pass a row-consumer to the functionas that knows how to communicate with
|
||||
// the client, but the details of communication are hidden behind an interface
|
||||
fn interpret(table_name: TableName, operation: Operation, state: &mut State, consumer: impl SqlConsumer) -> DbResult<Response> {
|
||||
// TODO: lock stuff
|
||||
use Operation::*;
|
||||
|
||||
match operation {
|
||||
Select(table_name, column_selection, maybe_condition) => {
|
||||
let table: &Table = state.table_from_name(&table_name)?;
|
||||
Ok(Response::Selected(table.select_where(column_selection, maybe_condition)?))
|
||||
},
|
||||
Insert(table_name, values) => {
|
||||
let table: &mut Table = state.table_from_name_mut(&table_name)?;
|
||||
|
||||
let _ = table.insert(values)?;
|
||||
Ok(Response::Inserted)
|
||||
},
|
||||
Delete(table_name, maybe_condition) => {
|
||||
let table: &mut Table = state.table_from_name_mut(&table_name)?;
|
||||
|
||||
let rows_affected = table.delete_where(maybe_condition)?;
|
||||
Ok(Response::Deleted(rows_affected))
|
||||
},
|
||||
CreateTable(table_name, table_schema) => {
|
||||
let table = Table::new(table_schema);
|
||||
state.attach_table(table_name, table);
|
||||
Ok(Response::TableCreated)
|
||||
},
|
||||
CreateIndex(table_name, column_name) => {
|
||||
// TODO: This is incomplete. It can happen that an index is created
|
||||
// after the table has some rows for a while.
|
||||
// In such a case the index needs to be built over all those existing rows.
|
||||
let table: &mut Table = state.table_from_name_mut(&table_name)?;
|
||||
let column_position: ColumnPosition = table.schema.column_position_from_column_name(&column_name)?;
|
||||
|
||||
let index: ColumnIndex = ColumnIndex::new(&table.rows);
|
||||
table.attach_index(column_position, index);
|
||||
Ok(Response::IndexCreated)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
impl ColumnIndex {
|
||||
fn new(rows: &Rows) -> Self {
|
||||
let index = BTreeMap::new();
|
||||
// TODO: Take into account already existing rows
|
||||
todo!();
|
||||
Self { index }
|
||||
}
|
||||
}
|
||||
|
||||
impl TableSchema {
|
||||
fn get_column(&self, column_name: &ColumnName) -> DbResult<(DbType, ColumnPosition)> {
|
||||
match self.column_name_position_mapping.get_by_left(column_name) {
|
||||
Some(column_position) => {
|
||||
match self.types.get(*column_position) {
|
||||
Some(type_) => {
|
||||
Ok((*type_, *column_position))
|
||||
},
|
||||
None => {
|
||||
Err(Error::MissingTypeAnnotationOfColumn(self.table_name.clone(), *column_position))
|
||||
}
|
||||
}
|
||||
},
|
||||
None => Err(Error::ColumnDoesNotExist(self.table_name.clone(), column_name.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
fn column_position_from_column_name(&self, column_name: &ColumnName) -> DbResult<ColumnPosition> {
|
||||
self.get_column(column_name).map(|(_, column_position)| column_position)
|
||||
}
|
||||
|
||||
fn is_primary(&self, column_position: ColumnPosition) -> bool {
|
||||
self.primary_key == column_position
|
||||
}
|
||||
|
||||
fn column_positions_from_column_names(&self, column_names: &[ColumnName]) -> DbResult<Vec<ColumnPosition>> {
|
||||
let mut positions: Vec<ColumnPosition> = Vec::with_capacity(column_names.len());
|
||||
for column_name in column_names {
|
||||
let column_position = self.column_position_from_column_name(column_name)?;
|
||||
positions.push(column_position)
|
||||
}
|
||||
Ok(positions)
|
||||
}
|
||||
|
||||
fn column_name_from_column_position(&self, column_position: ColumnPosition) -> DbResult<ColumnName> {
|
||||
match self.column_name_position_mapping.get_by_right(&column_position) {
|
||||
Some(column_name) => Ok(column_name.clone()),
|
||||
None => Err(Error::ColumnPositionDoesNotExist(self.table_name.clone(), column_position))
|
||||
}
|
||||
}
|
||||
|
||||
fn column_positions_from_column_selection(&self, column_selection: &ColumnSelection) -> DbResult<Vec<ColumnPosition>> {
|
||||
match column_selection {
|
||||
ColumnSelection::All => {
|
||||
let mut column_positions: Vec<ColumnPosition> = self.column_name_position_mapping.iter().map(|(_, column_position)| *column_position).collect();
|
||||
column_positions.sort();
|
||||
Ok(column_positions)
|
||||
},
|
||||
|
||||
ColumnSelection::Columns(column_names) => {
|
||||
self.column_positions_from_column_names(column_names)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn number_of_columns(&self) -> usize {
|
||||
self.column_name_position_mapping.len()
|
||||
}
|
||||
|
||||
fn row_from_insertion_values(&self, insertion_values: InsertionValues) -> DbResult<(UUID, Row)> {
|
||||
// TODO: There should be proper validation of the insertion_values.
|
||||
// And it shouldn't really be done here.
|
||||
//
|
||||
// In the below we don't check for duplicate column names
|
||||
//
|
||||
let number_of_columns = self.number_of_columns();
|
||||
if number_of_columns != insertion_values.len() {
|
||||
return Err(Error::MismatchBetweenInsertValuesAndColumns(self.table_name.clone(), insertion_values))
|
||||
}
|
||||
|
||||
let mut row: Vec<DbValue> = Vec::with_capacity(number_of_columns);
|
||||
|
||||
let mut values: HashMap<ColumnName, DbValue> = HashMap::new();
|
||||
for (column_name, db_value) in &insertion_values {
|
||||
values.insert(column_name.clone(), db_value.clone());
|
||||
}
|
||||
|
||||
for column_position in 0..number_of_columns {
|
||||
let column_name: ColumnName = self.column_name_from_column_position(column_position)?;
|
||||
match values.get(&column_name) {
|
||||
Some(db_value) => {
|
||||
row.push(db_value.clone())
|
||||
},
|
||||
None => {
|
||||
return Err(Error::MissingColumnInInsertValues(self.table_name.clone(), column_name, insertion_values))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let id = match row.get(self.primary_key) {
|
||||
Some(val) => {
|
||||
match val {
|
||||
DbValue::Indexable(IndexableDbValue::UUID(id)) => {
|
||||
id
|
||||
},
|
||||
_ =>
|
||||
unreachable!()
|
||||
}
|
||||
},
|
||||
None =>
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
Ok((*id, row))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl Table {
|
||||
fn new(table_schema: TableSchema) -> Self {
|
||||
Self {
|
||||
schema: table_schema,
|
||||
rows: BTreeMap::new(),
|
||||
indexes: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn attach_index(&mut self, column_position: ColumnPosition, column_index: ColumnIndex) {
|
||||
self.indexes.insert(column_position, column_index);
|
||||
}
|
||||
|
||||
fn get_row_by_id(&self, id: UUID) -> Option<Row> {
|
||||
self.rows.get(&id).cloned()
|
||||
}
|
||||
|
||||
fn get_rows_by_ids(&self, ids: HashSet<UUID>) -> Vec<Row> {
|
||||
ids.into_iter()
|
||||
.filter_map(|id| self.get_row_by_id(id))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn get_rows_by_value(&self, column_position: ColumnPosition, value: &DbValue) -> Vec<Row> {
|
||||
// brute-force search
|
||||
self.rows.values()
|
||||
.filter_map(|row| if row.get(column_position) == Some(value) { Some(row.clone()) } else { None })
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn delete_row_by_id(&mut self, id: UUID) -> usize {
|
||||
if let Some(row) = self.rows.remove(&id) {
|
||||
let mut something_was_deleted = false;
|
||||
for (column_position, column_index) in &mut self.indexes {
|
||||
if let DbValue::Indexable(value) = &row[*column_position] {
|
||||
something_was_deleted = something_was_deleted || column_index.remove(value, id);
|
||||
};
|
||||
}
|
||||
if something_was_deleted { 1 } else { 0 }
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
fn delete_rows_by_ids(&mut self, ids: HashSet<UUID>) -> usize {
|
||||
let mut total_count = 0;
|
||||
for id in ids {
|
||||
total_count += self.delete_row_by_id(id)
|
||||
}
|
||||
total_count
|
||||
}
|
||||
|
||||
fn delete_rows_by_value(&mut self, column_position: ColumnPosition, value: &DbValue) -> usize {
|
||||
let matched_ids: HashSet<UUID> = self.rows.iter()
|
||||
.filter_map(|(id, row)| if row.get(column_position) == Some(value) { Some(*id) } else { None })
|
||||
.collect();
|
||||
self.delete_rows_by_ids(matched_ids)
|
||||
}
|
||||
|
||||
fn select_where(&self, column_selection: ColumnSelection, maybe_condition: Option<Condition>) -> DbResult<Vec<Row>> {
|
||||
let selected_column_positions = self.schema.column_positions_from_column_selection(&column_selection)?;
|
||||
match maybe_condition {
|
||||
None =>
|
||||
Ok(self.rows.values().map(|row| select_columns(row, &selected_column_positions)).collect()),
|
||||
|
||||
Some(Condition::Eq(eq_column_name, value)) => {
|
||||
let (type_, eq_column_position) = self.schema.get_column(&eq_column_name)?;
|
||||
if self.schema.is_primary(eq_column_position) {
|
||||
match value {
|
||||
DbValue::Indexable(IndexableDbValue::UUID(uuid)) => {
|
||||
match self.get_row_by_id(uuid) {
|
||||
Some(row) => Ok(vec![select_columns(&row, &selected_column_positions)]),
|
||||
None => Ok(vec![]),
|
||||
}
|
||||
},
|
||||
_ => Err(Error::ValueDoesNotMatchExpectedType(self.schema.table_name.clone(), eq_column_name.clone(), type_, value.clone()))
|
||||
}
|
||||
} else {
|
||||
match value {
|
||||
DbValue::Indexable(value) => {
|
||||
match self.indexes.get(&eq_column_position) {
|
||||
Some(column_index) => {
|
||||
let ids = column_index.get(value);
|
||||
Ok(self.get_rows_by_ids(ids).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
|
||||
},
|
||||
None => {
|
||||
Ok(self.get_rows_by_value(eq_column_position, &DbValue::Indexable(value)).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
|
||||
}
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
Ok(self.get_rows_by_value(eq_column_position, &value).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert(&mut self, values: InsertionValues) -> DbResult<()> {
|
||||
let (id, row) = self.schema.row_from_insertion_values(values)?;
|
||||
|
||||
if self.rows.get(&id).is_some() {
|
||||
return Err(Error::AttemptingToInsertAlreadyPresentId(self.schema.table_name.clone(), id))
|
||||
}
|
||||
|
||||
for (column_position, column_index) in &mut self.indexes {
|
||||
match row.get(*column_position) {
|
||||
Some(DbValue::Indexable(val)) => {
|
||||
column_index.add(val.clone(), id)
|
||||
},
|
||||
Some(_) => {},
|
||||
None => return Err(Error::ColumnPositionDoesNotExist(self.schema.table_name.clone(), *column_position))
|
||||
}
|
||||
}
|
||||
|
||||
let _ = self.rows.insert(id, row);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn delete_where(&mut self, maybe_condition: Option<Condition>) -> DbResult<usize> {
|
||||
// kinda similar to select with respect to the conditions
|
||||
// update index
|
||||
match maybe_condition {
|
||||
None => {
|
||||
// delete all
|
||||
let number_of_rows = self.rows.len();
|
||||
self.rows = BTreeMap::new();
|
||||
self.indexes = HashMap::new();
|
||||
Ok(number_of_rows)
|
||||
},
|
||||
|
||||
Some(Condition::Eq(eq_column_name, value)) => {
|
||||
let (type_, eq_column_position) = self.schema.get_column(&eq_column_name)?;
|
||||
if self.schema.is_primary(eq_column_position) {
|
||||
match value {
|
||||
DbValue::Indexable(IndexableDbValue::UUID(uuid)) => {
|
||||
Ok(self.delete_row_by_id(uuid))
|
||||
},
|
||||
_ =>
|
||||
return Err(Error::ValueDoesNotMatchExpectedType(self.schema.table_name.clone(), eq_column_name.clone(), type_, value.clone()))
|
||||
}
|
||||
|
||||
} else {
|
||||
match value {
|
||||
DbValue::Indexable(value) => {
|
||||
match self.indexes.get(&eq_column_position) {
|
||||
Some(column_index) => {
|
||||
let ids = column_index.get(value);
|
||||
Ok(self.delete_rows_by_ids(ids))
|
||||
},
|
||||
None =>
|
||||
Ok(self.delete_rows_by_value(eq_column_position, &DbValue::Indexable(value)))
|
||||
}
|
||||
},
|
||||
_ =>
|
||||
Ok(self.delete_rows_by_value(eq_column_position, &value))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ColumnIndex {
|
||||
fn get(&self, value: IndexableDbValue) -> HashSet<UUID> {
|
||||
match self.index.get(&value) {
|
||||
Some(set) => set.clone(),
|
||||
None => HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add(&mut self, value: IndexableDbValue, id: UUID) {
|
||||
match self.index.get_mut(&value) {
|
||||
Some(ids) => {
|
||||
ids.insert(id);
|
||||
},
|
||||
None => {
|
||||
self.index.insert(value, HashSet::from([id]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn remove(&mut self, value: &IndexableDbValue, id_to_be_removed: UUID) -> bool {
|
||||
match self.index.get_mut(value) {
|
||||
Some(ids) => {
|
||||
let was_present = ids.remove(&id_to_be_removed);
|
||||
was_present
|
||||
},
|
||||
None => {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum Response {
|
||||
Selected(Vec<Row>),
|
||||
Inserted,
|
||||
Deleted(usize), // how many were deleted
|
||||
TableCreated,
|
||||
IndexCreated,
|
||||
}
|
||||
|
||||
type DbResult<A> = Result<A, Error>;
|
||||
|
||||
// #[derive(Debug)]
|
||||
enum Error {
|
||||
TableDoesNotExist(TableName),
|
||||
ColumnDoesNotExist(TableName, ColumnName),
|
||||
ColumnPositionDoesNotExist(TableName, ColumnPosition),
|
||||
ValueDoesNotMatchExpectedType(TableName, ColumnName, DbType, DbValue),
|
||||
AttemptingToInsertAlreadyPresentId(TableName, UUID),
|
||||
MissingTypeAnnotationOfColumn(TableName, ColumnPosition),
|
||||
MissingColumnInInsertValues(TableName, ColumnName, InsertionValues),
|
||||
MismatchBetweenInsertValuesAndColumns(TableName, InsertionValues),
|
||||
}
|
||||
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue