Merge branch 'main' into redesign-tables

This commit is contained in:
Yuriy Dupyn 2024-02-01 19:45:29 +01:00
commit eb034592fa
5 changed files with 130 additions and 25 deletions

View file

@ -1,3 +1,7 @@
Note that this is a Historical Document. It is a first attempt at
figuring out basic design requirements.
# MiniSQL # MiniSQL
## Official Description ## Official Description
@ -40,7 +44,7 @@ Possible usage:
```./minisql server start --db path/to/db/my-db.db --port 1433``` ```./minisql server start --db path/to/db/my-db.db --port 1433```
which will store the database as a file `path/to/db/my-db.db` and open a TCP server on port `1433` which will store the database as a file `path/to/db/my-db.db` and open a TCP server on port `1433`
* Then on possibly a different machine you run `./minisql client connect server_ip_address:6666` to start a client. This will open a REPL with which you can send queries/db management commands * Then on possibly a different machine you run `./minisql client connect server_ip_address:6666` to start a client. This will open a REPL with which you can send queries/db management commands
* TODO: We should also consider writing a rust library that allows you to spin up a client that connects to the server. * We should also consider writing a rust library that allows you to spin up a client that connects to the server.
How would the interface look like? How would the interface look like?
``` ```
use mysql::{DB, DBConnection} use mysql::{DB, DBConnection}
@ -69,7 +73,7 @@ which will store the database as a file `path/to/db/my-db.db` and open a TCP ser
how will the parsing output look like? how will the parsing output look like?
Consider something like Consider something like
``` ```
// TODO: Parser has access to all table metadata // Parser has access to all table metadata
// Could also be called `SQLAbstractSyntaxTree` // Could also be called `SQLAbstractSyntaxTree`
enum Operation { enum Operation {
@ -132,7 +136,7 @@ type TableName = String
// Note that it is nice to split metadata from the data because // Note that it is nice to split metadata from the data because
// then you can give the metadata to the parser without giving it the data. // then you can give the metadata to the parser without giving it the data.
struct TableMetaData { struct TableMetaData {
name: TableName, // TODO: Is this really necessary? probably not name: TableName,
columns: Vec<(ColumnName, DbType, ColumnPosition)> columns: Vec<(ColumnName, DbType, ColumnPosition)>
} }
@ -142,7 +146,7 @@ struct Table {
meta: TableMetaData, meta: TableMetaData,
rows: Rows // defined below rows: Rows // defined below
indexes: indexes:
BTree<ColumnName, Index> // TODO: Consider generalizing ColumnName to semething that would also apply to a pair of ColumnNames etc BTree<ColumnName, Index>
} }
type Tables = HashMap<TableName, Table> type Tables = HashMap<TableName, Table>
@ -183,16 +187,13 @@ Vec<Vec<DbValue>>
* Interpreter * Interpreter
``` ```
trait SqlConsumer { trait SqlConsumer {
// TODO:
??? ???
} }
fn interpret<T: SqlConsumer>(operation: Operation, tables: &mut Tables, consumer: T) -> () { fn interpret<T: SqlConsumer>(operation: Operation, tables: &mut Tables, consumer: T) -> () {
// TODO: lock stuff
match operation { match operation {
Select(table_name, column_selection, maybe_condition) => { Select(table_name, column_selection, maybe_condition) => {
let table: Table = ... let table: Table = ...
// TODO: Wrap this into a response
select(table, column_selection, maybe_condition, consumer) select(table, column_selection, maybe_condition, consumer)
}, },
Insert(table_name, Vec<(ColumnName, DbValue)>) => { Insert(table_name, Vec<(ColumnName, DbValue)>) => {
@ -209,7 +210,7 @@ fn interpret<T: SqlConsumer>(operation: Operation, tables: &mut Tables, consumer
enum Response { enum Response {
Selected(impl Iter<???>) // TODO: How to do this? Some reference to an iterator somehow... slice..? Selected(impl Iter<???>) // How to do this? Some reference to an iterator somehow... slice..?
Inserted(???), Inserted(???),
Deleted(usize), // how many were deleted Deleted(usize), // how many were deleted
} }
@ -221,7 +222,7 @@ fn select(table: Table, ColumnName
``` ```
* TODO: Consider streaming the response to the client and not just dumping 10K rows at once. * Consider streaming the response to the client and not just dumping 10K rows at once.

File diff suppressed because one or more lines are too long

View file

@ -12,8 +12,7 @@ use crate::type_system::{IndexableValue, Uuid, Value};
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
pub struct Table { pub struct Table {
schema: TableSchema, schema: TableSchema,
rows: Rows, // TODO: Consider wrapping this in a lock. Also consider if we need to have the rows: Rows,
// same lock for both rows and indexes
indexes: HashMap<Column, ColumnIndex>, indexes: HashMap<Column, ColumnIndex>,
} }

View file

@ -35,10 +35,9 @@ impl std::fmt::Debug for Response<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
use Response::*; use Response::*;
match self { match self {
Selected(_schema, _columns, _rows) => Selected(_schema, _columns, _rows) => {
// TODO: How can we iterate through the rows without having to take ownership of // It seems that Rust requires ownership of rows to format them here.
// them? // This is why we output the string below
{
f.write_str("Some rows... trust me") f.write_str("Some rows... trust me")
} }
Inserted => f.write_str("Inserted"), Inserted => f.write_str("Inserted"),
@ -89,7 +88,6 @@ impl State {
} }
pub fn interpret<'a>(&'a mut self, operation: Operation) -> DbResult<Response<'a>> { pub fn interpret<'a>(&'a mut self, operation: Operation) -> DbResult<Response<'a>> {
// TODO: lock stuff
use Operation::*; use Operation::*;
match operation { match operation {
@ -575,8 +573,6 @@ pub fn example() {
{ {
{ {
// TODO: Why do I have to write these braces explicitely? Why doesn't Rust compiler
// "infer" them?
let _delete_response: Response = state let _delete_response: Response = state
.interpret(Delete(users_position, Some(Eq(id_column, id0.clone())))) .interpret(Delete(users_position, Some(Eq(id_column, id0.clone()))))
.unwrap(); .unwrap();

View file

@ -13,22 +13,32 @@ pub enum DbType {
// ==============Values================ // ==============Values================
pub type Uuid = u64; pub type Uuid = u64;
// TODO: What about nulls? I would rather not have that in SQL, it sucks.
// I would rather have non-nullable values by default,
// and something like an explicit Option type for nulls.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(try_from = "String", into = "String")]
pub enum Value { pub enum Value {
Number(f64), // TODO: Can't put floats as keys in maps, since they don't implement Eq. What to // Note that it doesn't really make sense to compare floats on equality without specifying
// do? // precision. You can ofcourse convert a float to string or to a bytevector and then compare
// equality of those, but that's not the right equality. And ofcourse Rust designers are aware
// of this, so floats don't implement the Eq trait.
// This ofcourse complicates indexing of Number columns.
//
// Either we'd have to design a specific key-value map data-structure where keys are floats,
// s.t. to index with a given float K you also specify a tolerance error so that the resulting
// value set will contain all values whose keys are close to K within that tolerence. This
// seems highly non-trivial.
//
// So we choose to make a distinction between indexable and non-indexable types, and Number is
// not indexable.
Number(f64),
Indexable(IndexableValue), Indexable(IndexableValue),
} }
#[derive(Debug, Ord, Eq, Clone, PartialOrd, PartialEq, Serialize, Deserialize)] #[derive(Debug, Ord, Eq, Clone, PartialOrd, PartialEq, Serialize, Deserialize)]
#[serde(try_from = "String", into = "String")]
pub enum IndexableValue { pub enum IndexableValue {
String(String), String(String),
Int(u64), Int(u64),
Uuid(Uuid), Uuid(Uuid),
// TODO: what about null?
} }
impl DbType { impl DbType {
@ -113,10 +123,88 @@ impl Value {
} }
} }
// Own string serialization so enums can be used as keys in maps
impl From<IndexableValue> for String {
fn from(value: IndexableValue) -> Self {
match value {
IndexableValue::String(s) => format!("String({s})"),
IndexableValue::Int(i) => format!("Int({i})"),
IndexableValue::Uuid(u) => format!("Uuid({u})"),
}
}
}
impl TryFrom<String> for IndexableValue {
type Error = String;
fn try_from(value: String) -> Result<Self, Self::Error> {
if !value.ends_with(')') {
return Err(format!("Invalid IndexableValue: {}", value));
}
if value.starts_with("String(") {
let s = value[7..value.len() - 1].to_string();
return Ok(Self::String(s));
}
if value.starts_with("Int(") {
let s = value[4..value.len() - 1].to_string();
let i = s
.parse::<u64>()
.map_err(|e| format!("Invalid Int: {}", e))?;
return Ok(Self::Int(i));
}
if value.starts_with("Uuid(") {
let s = value[5..value.len() - 1].to_string();
let u = s
.parse::<u64>()
.map_err(|e| format!("Invalid UUID: {}", e))?;
return Ok(Self::Uuid(u));
}
Err(format!("Invalid IndexableValue: {}", value))
}
}
impl From<Value> for String {
fn from(value: Value) -> Self {
match value {
Value::Number(n) => format!("Number({n})"),
Value::Indexable(i) => format!("Indexable({})", String::from(i)),
}
}
}
impl TryFrom<String> for Value {
type Error = String;
fn try_from(value: String) -> Result<Self, Self::Error> {
if !value.ends_with(')') {
return Err(format!("Invalid Value: {}", value));
}
if value.starts_with("Number(") {
let s = value[7..value.len() - 1].to_string();
let n = s
.parse::<f64>()
.map_err(|e| format!("Invalid Number: {}", e))?;
return Ok(Self::Number(n));
}
if value.starts_with("Indexable(") {
let s = value[10..value.len() - 1].to_string();
let i = IndexableValue::try_from(s)?;
return Ok(Self::Indexable(i));
}
Err(format!("Invalid Value: {}", value))
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{IndexableValue, Value}; use super::{IndexableValue, Value};
use crate::error::TypeConversionError::UnknownType; use crate::error::TypeConversionError::UnknownType;
use crate::type_system::Value::{Indexable, Number};
#[test] #[test]
fn test_encode_number() { fn test_encode_number() {
@ -213,4 +301,25 @@ mod tests {
Err(UnknownType { oid: 2950, size: 8 }) Err(UnknownType { oid: 2950, size: 8 })
)) ))
} }
#[test]
fn test_value_stringification() {
let pairs = vec![
(Number(1.0), "Number(1)"),
(
Indexable(IndexableValue::String("hello".to_string())),
"Indexable(String(hello))",
),
(Indexable(IndexableValue::Int(123)), "Indexable(Int(123))"),
(Indexable(IndexableValue::Uuid(123)), "Indexable(Uuid(123))"),
];
for (value, string) in pairs {
let serialized = String::from(value.clone());
assert_eq!(serialized, string);
let deserialized = Value::try_from(serialized);
assert_eq!(deserialized, Ok(value));
}
}
} }