Merge remote-tracking branch 'origin/main' into interpreter

This commit is contained in:
Yuriy Dupyn 2023-12-28 09:37:48 +01:00
commit bd3dbe2365
3 changed files with 14 additions and 9 deletions

9
minisql/Cargo.toml Normal file
View file

@ -0,0 +1,9 @@
[package]
name = "minisql"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
bimap = "0.6.3"

563
minisql/src/main.rs Normal file
View file

@ -0,0 +1,563 @@
use std::collections::{BTreeMap, HashMap, HashSet};
use bimap::BiMap;
// ==============SQL operations================
// TODO: Note that every operation has a table name.
// Perhaps consider factoring the table name out
// and think of the operations as operating on a unique table.
enum Operation {
Select(TableName, ColumnSelection, Option<Condition>),
Insert(TableName, InsertionValues),
Delete(TableName, Option<Condition>),
// Update(...),
CreateTable(TableName, TableSchema),
CreateIndex(TableName, ColumnName), // TODO: Is this sufficient?
// DropTable(TableName),
}
type InsertionValues = Vec<(ColumnName, DbValue)>;
enum ColumnSelection {
All,
Columns(Vec<ColumnName>),
}
enum Condition {
// And(Box<Condition>, Box<Condition>),
// Or(Box<Condition>, Box<Condition>),
// Not(Box<Condition>),
Eq(ColumnName, DbValue),
// LessOrEqual(ColumnName, DbValue),
// Less(ColumnName, DbValue),
// StringCondition(StringCondition),
}
// enum StringCondition {
// Prefix(ColumnName, String),
// Substring(ColumnName, String),
// }
// ==============Values and Types================
type UUID = u64;
// TODO: What about nulls? I would rather not have that as in SQL, it sucks.
// I would rather have non-nullable values by default,
// and something like an explicit Option type for nulls.
#[derive(Debug, Clone, PartialEq)]
enum DbValue {
Number(f64), // TODO: Can't put floats as keys in maps, since they don't implement Eq. What to
// do?
Indexable(IndexableDbValue),
}
#[derive(Debug, Ord, Eq, Clone, PartialOrd, PartialEq)]
enum IndexableDbValue {
String(String),
Int(u64),
UUID(UUID),
// TODO: what bout null?
}
// TODO: Can this be autogenerated from the values?
#[derive(Debug, Clone, Copy)]
enum DbType {
String,
Int,
Number,
UUID,
}
impl DbValue {
// TODO: Can this be autogenerated?
fn to_type(self) -> DbType {
match self {
Self::Number(_) => DbType::Number,
Self::Indexable(val) =>
match val {
IndexableDbValue::String(_) => DbType::String,
IndexableDbValue::Int(_) => DbType::Int,
IndexableDbValue::UUID(_) => DbType::UUID,
}
}
}
}
// ==============Tables================
// table-metadata and data
type TableName = String;
type TablePosition = usize;
struct Table {
schema: TableSchema,
rows: Rows, // TODO: Consider wrapping this in a lock. Also consider if we need to have the
// same lock for both rows and indexes
indexes:
HashMap<ColumnPosition, ColumnIndex> // TODO: Consider generalizing `ColumnPosition` to something that would also apply to a pair of `ColumnNames` etc
}
// TODO: Is this really indexed by DbValues?
// Maybe we should have a separate index type for each type of value we're indexing over
// TODO: I should have a set of UUID, not just a single UUID, e.g.
// a user table can have multiple different users with the same name.
struct ColumnIndex {
index: BTreeMap<IndexableDbValue, HashSet<UUID>>
}
// Note that it is nice to split metadata from the data because
// then you can give the metadata to the parser without giving it the data.
struct TableSchema {
table_name: TableName, // used for descriptive errors
primary_key: ColumnPosition,
column_name_position_mapping: BiMap<ColumnName, ColumnPosition>,
types: Vec<DbType>,
}
// Use `TablePosition` as index
type Tables = Vec<Table>;
type ColumnName = String;
type ColumnPosition = usize;
// Use `ColumnPosition` as index
type Row = Vec<DbValue>;
type Rows =
// TODO: This should be some sort of an interface to a dictionary
// s.t. in the background it may modify stuff in memory or talk to the disk
BTreeMap<UUID, Row>;
// interface
// insert(id, value)
fn select_columns(row: &Row, columns: &Vec<ColumnPosition>) -> Row {
// row.column_position
todo!()
}
// ==============Interpreter================
struct State {
table_name_position_mapping: BiMap<TableName, TablePosition>,
tables: Vec<Table>,
}
impl State {
fn table_from_name<'b: 'a, 'a>(&'b self, table_name: &TableName) -> DbResult<&'a Table> {
match self.table_name_position_mapping.get_by_left(table_name) {
Some(table_position) => {
let table = &self.tables[*table_position];
Ok(table)
},
None => Err(Error::TableDoesNotExist(table_name.clone()))
}
}
fn table_from_name_mut<'b: 'a, 'a>(&'b mut self, table_name: &TableName) -> DbResult<&'a mut Table> {
match self.table_name_position_mapping.get_by_left(table_name) {
Some(table_position) => {
let table = &mut self.tables[*table_position];
Ok(table)
},
None => Err(Error::TableDoesNotExist(table_name.clone()))
}
}
fn attach_table(&mut self, table_name: TableName, table: Table) {
let new_table_position: TablePosition = self.tables.len();
self.table_name_position_mapping.insert(table_name, new_table_position);
self.tables.push(table);
}
}
// TODO: Give a better name to something that you can respond to with rows
trait SqlConsumer {
// TODO:
}
// TODO: Decide if we want for this to return a response (but then you have to deal with lifetimes,
// because you'll be forced to put an iterator/slice into the Response data-structure.
// Alternative is to pass a row-consumer to the functionas that knows how to communicate with
// the client, but the details of communication are hidden behind an interface
fn interpret(table_name: TableName, operation: Operation, state: &mut State, consumer: impl SqlConsumer) -> DbResult<Response> {
// TODO: lock stuff
use Operation::*;
match operation {
Select(table_name, column_selection, maybe_condition) => {
let table: &Table = state.table_from_name(&table_name)?;
Ok(Response::Selected(table.select_where(column_selection, maybe_condition)?))
},
Insert(table_name, values) => {
let table: &mut Table = state.table_from_name_mut(&table_name)?;
let _ = table.insert(values)?;
Ok(Response::Inserted)
},
Delete(table_name, maybe_condition) => {
let table: &mut Table = state.table_from_name_mut(&table_name)?;
let rows_affected = table.delete_where(maybe_condition)?;
Ok(Response::Deleted(rows_affected))
},
CreateTable(table_name, table_schema) => {
let table = Table::new(table_schema);
state.attach_table(table_name, table);
Ok(Response::TableCreated)
},
CreateIndex(table_name, column_name) => {
// TODO: This is incomplete. It can happen that an index is created
// after the table has some rows for a while.
// In such a case the index needs to be built over all those existing rows.
let table: &mut Table = state.table_from_name_mut(&table_name)?;
let column_position: ColumnPosition = table.schema.column_position_from_column_name(&column_name)?;
let index: ColumnIndex = ColumnIndex::new(&table.rows);
table.attach_index(column_position, index);
Ok(Response::IndexCreated)
},
}
}
impl ColumnIndex {
fn new(rows: &Rows) -> Self {
let index = BTreeMap::new();
// TODO: Take into account already existing rows
todo!();
Self { index }
}
}
impl TableSchema {
fn get_column(&self, column_name: &ColumnName) -> DbResult<(DbType, ColumnPosition)> {
match self.column_name_position_mapping.get_by_left(column_name) {
Some(column_position) => {
match self.types.get(*column_position) {
Some(type_) => {
Ok((*type_, *column_position))
},
None => {
Err(Error::MissingTypeAnnotationOfColumn(self.table_name.clone(), *column_position))
}
}
},
None => Err(Error::ColumnDoesNotExist(self.table_name.clone(), column_name.clone()))
}
}
fn column_position_from_column_name(&self, column_name: &ColumnName) -> DbResult<ColumnPosition> {
self.get_column(column_name).map(|(_, column_position)| column_position)
}
fn is_primary(&self, column_position: ColumnPosition) -> bool {
self.primary_key == column_position
}
fn column_positions_from_column_names(&self, column_names: &[ColumnName]) -> DbResult<Vec<ColumnPosition>> {
let mut positions: Vec<ColumnPosition> = Vec::with_capacity(column_names.len());
for column_name in column_names {
let column_position = self.column_position_from_column_name(column_name)?;
positions.push(column_position)
}
Ok(positions)
}
fn column_name_from_column_position(&self, column_position: ColumnPosition) -> DbResult<ColumnName> {
match self.column_name_position_mapping.get_by_right(&column_position) {
Some(column_name) => Ok(column_name.clone()),
None => Err(Error::ColumnPositionDoesNotExist(self.table_name.clone(), column_position))
}
}
fn column_positions_from_column_selection(&self, column_selection: &ColumnSelection) -> DbResult<Vec<ColumnPosition>> {
match column_selection {
ColumnSelection::All => {
let mut column_positions: Vec<ColumnPosition> = self.column_name_position_mapping.iter().map(|(_, column_position)| *column_position).collect();
column_positions.sort();
Ok(column_positions)
},
ColumnSelection::Columns(column_names) => {
self.column_positions_from_column_names(column_names)
},
}
}
fn number_of_columns(&self) -> usize {
self.column_name_position_mapping.len()
}
fn row_from_insertion_values(&self, insertion_values: InsertionValues) -> DbResult<(UUID, Row)> {
// TODO: There should be proper validation of the insertion_values.
// And it shouldn't really be done here.
//
// In the below we don't check for duplicate column names
//
let number_of_columns = self.number_of_columns();
if number_of_columns != insertion_values.len() {
return Err(Error::MismatchBetweenInsertValuesAndColumns(self.table_name.clone(), insertion_values))
}
let mut row: Vec<DbValue> = Vec::with_capacity(number_of_columns);
let mut values: HashMap<ColumnName, DbValue> = HashMap::new();
for (column_name, db_value) in &insertion_values {
values.insert(column_name.clone(), db_value.clone());
}
for column_position in 0..number_of_columns {
let column_name: ColumnName = self.column_name_from_column_position(column_position)?;
match values.get(&column_name) {
Some(db_value) => {
row.push(db_value.clone())
},
None => {
return Err(Error::MissingColumnInInsertValues(self.table_name.clone(), column_name, insertion_values))
}
}
}
let id = match row.get(self.primary_key) {
Some(val) => {
match val {
DbValue::Indexable(IndexableDbValue::UUID(id)) => {
id
},
_ =>
unreachable!()
}
},
None =>
unreachable!()
};
Ok((*id, row))
}
}
impl Table {
fn new(table_schema: TableSchema) -> Self {
Self {
schema: table_schema,
rows: BTreeMap::new(),
indexes: HashMap::new(),
}
}
fn attach_index(&mut self, column_position: ColumnPosition, column_index: ColumnIndex) {
self.indexes.insert(column_position, column_index);
}
fn get_row_by_id(&self, id: UUID) -> Option<Row> {
self.rows.get(&id).cloned()
}
fn get_rows_by_ids(&self, ids: HashSet<UUID>) -> Vec<Row> {
ids.into_iter()
.filter_map(|id| self.get_row_by_id(id))
.collect()
}
fn get_rows_by_value(&self, column_position: ColumnPosition, value: &DbValue) -> Vec<Row> {
// brute-force search
self.rows.values()
.filter_map(|row| if row.get(column_position) == Some(value) { Some(row.clone()) } else { None })
.collect()
}
fn delete_row_by_id(&mut self, id: UUID) -> usize {
if let Some(row) = self.rows.remove(&id) {
let mut something_was_deleted = false;
for (column_position, column_index) in &mut self.indexes {
if let DbValue::Indexable(value) = &row[*column_position] {
something_was_deleted = something_was_deleted || column_index.remove(value, id);
};
}
if something_was_deleted { 1 } else { 0 }
} else {
0
}
}
fn delete_rows_by_ids(&mut self, ids: HashSet<UUID>) -> usize {
let mut total_count = 0;
for id in ids {
total_count += self.delete_row_by_id(id)
}
total_count
}
fn delete_rows_by_value(&mut self, column_position: ColumnPosition, value: &DbValue) -> usize {
let matched_ids: HashSet<UUID> = self.rows.iter()
.filter_map(|(id, row)| if row.get(column_position) == Some(value) { Some(*id) } else { None })
.collect();
self.delete_rows_by_ids(matched_ids)
}
fn select_where(&self, column_selection: ColumnSelection, maybe_condition: Option<Condition>) -> DbResult<Vec<Row>> {
let selected_column_positions = self.schema.column_positions_from_column_selection(&column_selection)?;
match maybe_condition {
None =>
Ok(self.rows.values().map(|row| select_columns(row, &selected_column_positions)).collect()),
Some(Condition::Eq(eq_column_name, value)) => {
let (type_, eq_column_position) = self.schema.get_column(&eq_column_name)?;
if self.schema.is_primary(eq_column_position) {
match value {
DbValue::Indexable(IndexableDbValue::UUID(uuid)) => {
match self.get_row_by_id(uuid) {
Some(row) => Ok(vec![select_columns(&row, &selected_column_positions)]),
None => Ok(vec![]),
}
},
_ => Err(Error::ValueDoesNotMatchExpectedType(self.schema.table_name.clone(), eq_column_name.clone(), type_, value.clone()))
}
} else {
match value {
DbValue::Indexable(value) => {
match self.indexes.get(&eq_column_position) {
Some(column_index) => {
let ids = column_index.get(value);
Ok(self.get_rows_by_ids(ids).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
},
None => {
Ok(self.get_rows_by_value(eq_column_position, &DbValue::Indexable(value)).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
}
}
},
_ => {
Ok(self.get_rows_by_value(eq_column_position, &value).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
}
}
}
}
}
}
fn insert(&mut self, values: InsertionValues) -> DbResult<()> {
let (id, row) = self.schema.row_from_insertion_values(values)?;
if self.rows.get(&id).is_some() {
return Err(Error::AttemptingToInsertAlreadyPresentId(self.schema.table_name.clone(), id))
}
for (column_position, column_index) in &mut self.indexes {
match row.get(*column_position) {
Some(DbValue::Indexable(val)) => {
column_index.add(val.clone(), id)
},
Some(_) => {},
None => return Err(Error::ColumnPositionDoesNotExist(self.schema.table_name.clone(), *column_position))
}
}
let _ = self.rows.insert(id, row);
Ok(())
}
fn delete_where(&mut self, maybe_condition: Option<Condition>) -> DbResult<usize> {
// kinda similar to select with respect to the conditions
// update index
match maybe_condition {
None => {
// delete all
let number_of_rows = self.rows.len();
self.rows = BTreeMap::new();
self.indexes = HashMap::new();
Ok(number_of_rows)
},
Some(Condition::Eq(eq_column_name, value)) => {
let (type_, eq_column_position) = self.schema.get_column(&eq_column_name)?;
if self.schema.is_primary(eq_column_position) {
match value {
DbValue::Indexable(IndexableDbValue::UUID(uuid)) => {
Ok(self.delete_row_by_id(uuid))
},
_ =>
return Err(Error::ValueDoesNotMatchExpectedType(self.schema.table_name.clone(), eq_column_name.clone(), type_, value.clone()))
}
} else {
match value {
DbValue::Indexable(value) => {
match self.indexes.get(&eq_column_position) {
Some(column_index) => {
let ids = column_index.get(value);
Ok(self.delete_rows_by_ids(ids))
},
None =>
Ok(self.delete_rows_by_value(eq_column_position, &DbValue::Indexable(value)))
}
},
_ =>
Ok(self.delete_rows_by_value(eq_column_position, &value))
}
}
}
}
}
}
impl ColumnIndex {
fn get(&self, value: IndexableDbValue) -> HashSet<UUID> {
match self.index.get(&value) {
Some(set) => set.clone(),
None => HashSet::new(),
}
}
fn add(&mut self, value: IndexableDbValue, id: UUID) {
match self.index.get_mut(&value) {
Some(ids) => {
ids.insert(id);
},
None => {
self.index.insert(value, HashSet::from([id]));
}
}
}
fn remove(&mut self, value: &IndexableDbValue, id_to_be_removed: UUID) -> bool {
match self.index.get_mut(value) {
Some(ids) => {
let was_present = ids.remove(&id_to_be_removed);
was_present
},
None => {
false
}
}
}
}
enum Response {
Selected(Vec<Row>),
Inserted,
Deleted(usize), // how many were deleted
TableCreated,
IndexCreated,
}
type DbResult<A> = Result<A, Error>;
// #[derive(Debug)]
enum Error {
TableDoesNotExist(TableName),
ColumnDoesNotExist(TableName, ColumnName),
ColumnPositionDoesNotExist(TableName, ColumnPosition),
ValueDoesNotMatchExpectedType(TableName, ColumnName, DbType, DbValue),
AttemptingToInsertAlreadyPresentId(TableName, UUID),
MissingTypeAnnotationOfColumn(TableName, ColumnPosition),
MissingColumnInInsertValues(TableName, ColumnName, InsertionValues),
MismatchBetweenInsertValuesAndColumns(TableName, InsertionValues),
}
fn main() {
println!("Hello, world!");
}