First attempt at SELECT

This commit is contained in:
Yuriy Dupyn 2023-12-11 17:51:17 +01:00
parent cb7b50109e
commit 20615508a2

View file

@ -1,4 +1,4 @@
use std::collections::{BTreeMap, HashMap};
use std::collections::{BTreeMap, HashMap, HashSet};
// ==============SQL operations================
// TODO: Note that every operation has a table name.
@ -45,15 +45,23 @@ type UUID = u64;
// TODO: What about nulls? I would rather not have that as in SQL, it sucks.
// I would rather have non-nullable values by default,
// and something like an explicit Option type for nulls.
#[derive(Debug, Clone, PartialEq)]
enum DbValue {
Number(f64), // TODO: Can't put floats as keys in maps, since they don't implement Eq. What to
// do?
Indexable(IndexableDbValue),
}
#[derive(Debug, Ord, Eq, Clone, PartialOrd, PartialEq)]
enum IndexableDbValue {
String(String),
Int(u64),
Number(f64),
UUID(UUID),
// TODO: what bout null?
}
// TODO: Can this be autogenerated from the values?
#[derive(Debug, Clone, Copy)]
enum DbType {
String,
Int,
@ -65,10 +73,13 @@ impl DbValue {
// TODO: Can this be autogenerated?
fn to_type(self) -> DbType {
match self {
Self::String(_) => DbType::String,
Self::Int(_) => DbType::Int,
Self::Number(_) => DbType::Number,
Self::UUID(_) => DbType::UUID,
Self::Indexable(val) =>
match val {
IndexableDbValue::String(_) => DbType::String,
IndexableDbValue::Int(_) => DbType::Int,
IndexableDbValue::UUID(_) => DbType::UUID,
}
}
}
}
@ -90,13 +101,17 @@ struct Table {
// TODO: Is this really indexed by DbValues?
// Maybe we should have a separate index type for each type of value we're indexing over
// TODO: I should have a set of UUID, not just a single UUID, e.g.
// a user table can have multiple different users with the same name.
struct ColumnIndex {
index: BTreeMap<DbValue, UUID>
index: BTreeMap<IndexableDbValue, HashSet<UUID>>
}
// Note that it is nice to split metadata from the data because
// then you can give the metadata to the parser without giving it the data.
struct TableSchema {
table_name: TableName, // used for descriptive errors
primary_key: ColumnPosition,
columns: HashMap<ColumnName, (DbType, ColumnPosition)>
}
@ -104,13 +119,12 @@ struct TableSchema {
fn column_position(table_meta: TableSchema, column_name: ColumnName) -> Option<ColumnPosition> {
todo!()
}
// Use `TablePosition` as index
type Tables = Vec<Table>;
type ColumnName = String;
type ColumnPosition = u32;
type ColumnPosition = usize;
// Use `ColumnPosition` as index
type Row = Vec<DbValue>;
@ -123,6 +137,11 @@ type Rows =
// interface
// insert(id, value)
fn select_columns(row: &Row, columns: &Vec<ColumnPosition>) -> Row {
// row.column_position
todo!()
}
// ==============Interpreter================
struct State {
table_positions: HashMap<TableName, TablePosition>,
@ -134,7 +153,7 @@ impl State {
todo!()
}
fn attach_table(&mut self, table: Table) {
fn attach_table(&mut self, table_name: TableName, table: Table) {
todo!()
}
}
@ -155,104 +174,190 @@ fn get_table<'tables_life: 'table_life, 'table_life>(tables: &'tables_life Table
// because you'll be forced to put an iterator/slice into the Response data-structure.
// Alternative is to pass a row-consumer to the functionas that knows how to communicate with
// the client, but the details of communication are hidden behind an interface
fn interpret(table_name: TableName, operation: Operation, state: &mut State, consumer: impl SqlConsumer) -> () {
fn interpret(table_name: TableName, operation: Operation, state: &mut State, consumer: impl SqlConsumer) -> DbResult<Response> {
// TODO: lock stuff
use Operation::*;
match operation {
Select(table_name, column_selection, maybe_condition) => {
let table: &Table = todo!();
table.select_where(column_selection, maybe_condition, consumer)
Ok(Response::Selected(table.select_where(column_selection, maybe_condition)?))
},
Insert(table_name, values) => {
let table: &mut Table = todo!();
table.insert(values, consumer)
table.insert(values);
todo!()
},
Delete(table_name, maybe_condition) => {
let table: &mut Table = todo!();
table.delete_where(maybe_condition, consumer)
table.delete_where(maybe_condition);
todo!()
},
CreateTable(table_name, table_schema) => {
let table = Table::new(table_name, table_schema);
state.attach_table(table);
let table = Table::new(table_schema);
state.attach_table(table_name, table);
todo!()
},
CreateIndex(table_name, column_name) => {
let table: &mut Table = todo!();
let column_position: ColumnPosition = todo!();
let index: ColumnIndex = ColumnIndex::new(table, column_name);
table.attach_index(index);
}, // TODO: Is this sufficient?
//
let index: ColumnIndex = ColumnIndex::new();
table.attach_index(column_position, index);
todo!()
},
}
}
impl ColumnIndex {
fn new(table: &Table, column_name: ColumnName) -> ColumnIndex {
todo!()
fn new() -> Self {
Self { index: BTreeMap::new() }
}
}
impl TableSchema {
fn get_column(&self, column_name: &ColumnName) -> DbResult<(DbType, ColumnPosition)> {
match self.columns.get(column_name) {
Some((type_, column_position)) => Ok((*type_, *column_position)),
None => Err(Error::ColumnDoesNotExist(self.table_name.clone(), column_name.clone()))
}
}
fn is_primary(&self, column_position: ColumnPosition) -> bool {
self.primary_key == column_position
}
fn column_positions_from_column_names(&self, column_names: &[ColumnName]) -> DbResult<Vec<ColumnPosition>> {
let mut positions: Vec<ColumnPosition> = Vec::with_capacity(column_names.len());
for column_name in column_names {
let (_, column_position) = self.get_column(column_name)?;
positions.push(column_position)
}
Ok(positions)
}
fn column_positions_from_column_selection(&self, column_selection: &ColumnSelection) -> DbResult<Vec<ColumnPosition>> {
match column_selection {
ColumnSelection::All => {
let mut column_positions: Vec<ColumnPosition> = self.columns.values().map(|(_, column_position)| *column_position).collect();
column_positions.sort();
Ok(column_positions)
},
ColumnSelection::Columns(column_names) => {
self.column_positions_from_column_names(column_names)
},
}
}
}
impl Table {
fn new(table_name: TableName, table_schema: TableSchema) -> Table {
todo!()
fn new(table_schema: TableSchema) -> Self {
Self {
schema: table_schema,
rows: BTreeMap::new(),
indexes: HashMap::new(),
}
}
fn attach_index(&mut self, column_index: ColumnIndex) {
todo!()
fn attach_index(&mut self, column_position: ColumnPosition, column_index: ColumnIndex) {
self.indexes.insert(column_position, column_index);
}
fn select_where(&self, column_selection: ColumnSelection, maybe_condition: Option<Condition>, consumer: impl SqlConsumer) {
fn get_row_by_id(&self, id: UUID) -> Option<Row> {
self.rows.get(&id).cloned()
}
fn get_rows_by_ids(&self, ids: HashSet<UUID>) -> Vec<Row> {
ids.into_iter()
.filter_map(|id| self.get_row_by_id(id))
.collect()
}
fn get_rows_by_value(&self, column_position: ColumnPosition, value: &DbValue) -> Vec<Row> {
// brute-force search
self.rows.values()
.filter_map(|row| if row.get(column_position) == Some(value) { Some(row.clone()) } else { None })
.collect()
}
fn select_where(&self, column_selection: ColumnSelection, maybe_condition: Option<Condition>) -> DbResult<Vec<Row>> {
let selected_column_positions = self.schema.column_positions_from_column_selection(&column_selection)?;
match maybe_condition {
None => {
// .iter() will give us an iterator over all the rows
None =>
Ok(self.rows.values().map(|row| select_columns(row, &selected_column_positions)).collect()),
// two choices
// 1. optimized version
// self.iter_with_columns(column_selection).for_each(|row| {
// consumer.send(row)
// });
// 2.
// self.iter()
// .map(|row| row.select_columns(column_selection))
// .for_each(|reduced_row| {
// consumer.send(row)
// });
todo!()
},
Some(Condition::Eq(column_name, value)) => {
// is column_name primary key? then it is easy
// self.get(id)
// is column_name indexed? Then get the index, and then it is not easy, because you
// may get a set of ids.
// what if it is not primary nor indexed? then you need to brute force your way
// through the whole table?
todo!()
Some(Condition::Eq(eq_column_name, value)) => {
let (type_, eq_column_position) = self.schema.get_column(&eq_column_name)?;
if self.schema.is_primary(eq_column_position) {
match value {
DbValue::Indexable(IndexableDbValue::UUID(uuid)) => {
match self.get_row_by_id(uuid) {
Some(row) => Ok(vec![select_columns(&row, &selected_column_positions)]),
None => Ok(vec![]),
}
},
_ => Err(Error::ValueDoesNotMatchExpectedType(self.schema.table_name.clone(), eq_column_name.clone(), type_, value.clone()))
}
} else {
match value {
DbValue::Indexable(value) => {
match self.indexes.get(&eq_column_position) {
Some(column_index) => {
let ids = column_index.get(value);
Ok(self.get_rows_by_ids(ids).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
},
None => {
Ok(self.get_rows_by_value(eq_column_position, &DbValue::Indexable(value)).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
}
}
},
_ => {
Ok(self.get_rows_by_value(eq_column_position, &value).iter().map(|row| select_columns(row, &selected_column_positions)).collect())
}
}
}
}
}
}
fn insert(&mut self, values: InsertionValues, consumer: impl SqlConsumer) {
fn insert(&mut self, values: InsertionValues) {
// 1. You need to update indices
// 2. you simply insert the data
todo!()
}
fn delete_where(&mut self, maybe_condition: Option<Condition>, consumer: impl SqlConsumer) {
fn delete_where(&mut self, maybe_condition: Option<Condition>) {
// kinda similar to select with respect to the conditions
// update index
todo!()
}
}
// enum Response {
// Selected(impl Iter<???>), // TODO: How to do this? Some reference to an iterator somehow... slice..?
// Inserted(???),
// Deleted(usize), // how many were deleted
// }
impl ColumnIndex {
fn get(&self, value: IndexableDbValue) -> HashSet<UUID> {
match self.index.get(&value) {
Some(set) => set.clone(),
None => HashSet::new(),
}
}
}
enum Response {
Selected(Vec<Row>),
Inserted(),
Deleted(usize), // how many were deleted
}
type DbResult<A> = Result<A, Error>;
enum Error {
ColumnDoesNotExist(TableName, ColumnName),
ValueDoesNotMatchExpectedType(TableName, ColumnName, DbType, DbValue)
}
fn main() {
println!("Hello, world!");