Thinking about indexes

This commit is contained in:
Yuriy Dupyn 2024-02-03 15:48:44 +01:00
parent 85ef52dfb4
commit dbd2ba9946
2 changed files with 72 additions and 5 deletions

View file

@ -4,13 +4,14 @@ use tokio::fs::{File, OpenOptions, DirBuilder};
use std::path::Path; use std::path::Path;
use std::collections::{BTreeMap}; use std::collections::{BTreeMap};
use async_trait::async_trait;
use bincode; use bincode;
use bincode::{Decode, Encode}; use bincode::{Decode, Encode};
use crate::binary_coding::{encode, decode, encode_sequence, decode_sequence}; use crate::binary_coding::{encode, decode, encode_sequence, decode_sequence};
use tokio::fs; use tokio::fs;
use crate::error::{Error, DecodeErrorKind}; use crate::error::Error;
use std::mem::size_of; use std::mem::size_of;
@ -19,6 +20,7 @@ type Result<T> = std::result::Result<T, Error>;
// Implements a persistant self-balancing Binary Search Tree. Nope. // Implements a persistant self-balancing Binary Search Tree. Nope.
// We need fixed-size nodes. But we want to index Strings which are variable length. // We need fixed-size nodes. But we want to index Strings which are variable length.
#[derive(Debug)]
pub struct Index<K, V> { pub struct Index<K, V> {
file: File, file: File,
// None means index is asleep on disk. // None means index is asleep on disk.
@ -28,11 +30,28 @@ pub struct Index<K, V> {
value_type: PhantomData<V>, value_type: PhantomData<V>,
} }
#[derive(Debug)]
pub struct IndexHeader { pub struct IndexHeader {
} }
use crate::storage_engine::FilePosition;
#[async_trait]
pub trait SomethingSupportingLeq {
async fn less_than_eq(&mut self, file_position0: FilePosition, file_position1: FilePosition) -> std::result::Result<bool, Error>;
}
impl <K, V>Index<K, V> { impl <K, V>Index<K, V> {
pub async fn new(file_name: &str) -> Result<Index<K, V>> { // TODO: delete
// pub async fn new<F, Fut, Store>(file_name: &str, less_than_eq: &F) -> Result<Index<K, V>>
// where F: Fn(&mut Store, K, K) -> Fut,
// Store: SomethingSupportingLeq,
// Fut: Future<Output=std::result::Result<bool, Error>>,
// {
// todo!()
// }
pub async fn new(file_name: &str) -> Result<Index<K, V>>
{
todo!() todo!()
} }
@ -56,9 +75,11 @@ impl <K, V>Index<K, V> {
todo!() todo!()
} }
pub async fn lookup(&mut self, k: K) -> Result<Option<V>> pub async fn lookup<Store>(&mut self, store: &mut Store, k: K) -> Result<Option<V>>
where K: Encode + Decode, where K: Encode + Decode,
Store: SomethingSupportingLeq,
{ {
let x = store.less_than_eq(123, 123).await?;
todo!() todo!()
} }

View file

@ -3,13 +3,20 @@ use tokio::fs::{File, OpenOptions, DirBuilder};
use std::path::Path; use std::path::Path;
use std::marker::PhantomData; use std::marker::PhantomData;
use async_trait::async_trait;
use bincode; use bincode;
use bincode::{Decode, Encode}; use bincode::{Decode, Encode};
use crate::binary_coding::{encode, decode, encode_sequence, encode_sequence_with_sizes, decode_sequence}; use crate::binary_coding::{encode, decode, encode_sequence, encode_sequence_with_sizes, decode_sequence};
use tokio::fs; use tokio::fs;
use crate::index::SomethingSupportingLeq;
use crate::error::{Error, DecodeErrorKind}; use crate::error::{Error, DecodeErrorKind};
use crate::index::Index;
use std::cell::RefCell;
use std::mem::size_of; use std::mem::size_of;
type Result<T> = std::result::Result<T, Error>; type Result<T> = std::result::Result<T, Error>;
@ -24,6 +31,7 @@ pub struct Store<T> {
file: File, file: File,
// primary_index: Vec<Index<T, FilePosition>>>, // primary_index: Vec<Index<T, FilePosition>>>,
// indexes: Vec<Option<Index<T, HashSet<FilePosition>>>>, // indexes: Vec<Option<Index<T, HashSet<FilePosition>>>>,
// primary_index: Index<PositionOfValue, PositionOfRow>,
header: StoreHeader, header: StoreHeader,
data_type: PhantomData<T>, data_type: PhantomData<T>,
@ -36,6 +44,11 @@ pub struct Store<T> {
// list // list
} }
type PositionOfValue = FilePosition;
type PositionOfRow = FilePosition;
// TODO: Basically a pointer to Store + its own file position // TODO: Basically a pointer to Store + its own file position
// pub struct Cursor<'a, T> { // pub struct Cursor<'a, T> {
// } // }
@ -113,6 +126,23 @@ pub async fn store_exists(table_folder: &str) -> Result<bool> {
Ok(fs::metadata(table_folder).await.is_ok()) Ok(fs::metadata(table_folder).await.is_ok())
} }
pub async fn less_than_eq<T>(store: &mut Store<T>, file_position0: FilePosition, file_position1: FilePosition) -> Result<bool> {
todo!()
}
// pub trait SomethingSupportingLeq {
// async fn less_than_eq(&mut self, file_position0: FilePosition, file_position1: FilePosition) -> std::result::Result<bool, Error>;
// }
#[async_trait]
impl <T>SomethingSupportingLeq for Store<T>
where T: Send
{
async fn less_than_eq(&mut self, file_position0: FilePosition, file_position1: FilePosition) -> std::result::Result<bool, Error> {
Ok(true)
}
}
impl <T>Store<T> { impl <T>Store<T> {
const ROWS_FILE_NAME: &'static str = "rows"; const ROWS_FILE_NAME: &'static str = "rows";
@ -168,6 +198,10 @@ impl <T>Store<T> {
Ok(bytes) Ok(bytes)
} }
pub async fn less_than_eq(&mut self, file_position0: FilePosition, file_position1: FilePosition) -> Result<bool> {
todo!()
}
// ===Creation=== // ===Creation===
pub async fn new(table_folder: &str, number_of_columns: usize, primary_column: Column) -> Result<Self> { pub async fn new(table_folder: &str, number_of_columns: usize, primary_column: Column) -> Result<Self> {
let path_to_table = Path::new(table_folder); let path_to_table = Path::new(table_folder);
@ -190,8 +224,11 @@ impl <T>Store<T> {
primary_column, primary_column,
}; };
let encoded_header: Vec<u8> = header.encode()?; let encoded_header: Vec<u8> = header.encode()?;
// Index::new<T, FilePosition>(format!("rows", primary_column.to_string())) // let index: Index<PositionOfValue, PositionOfRow> = Index::new(
// &format!("rows_{}", primary_column.to_string()),
// ).await?;
let mut store = Self { let mut store = Self {
table_folder: table_folder.to_string(), table_folder: table_folder.to_string(),
@ -202,6 +239,7 @@ impl <T>Store<T> {
}; };
store.write_bytes(&encoded_header).await?; store.write_bytes(&encoded_header).await?;
store.eof_file_position = store.current_file_position().await?; store.eof_file_position = store.current_file_position().await?;
Ok(store) Ok(store)
} }
@ -231,7 +269,7 @@ impl <T>Store<T> {
file, file,
header, header,
data_type: PhantomData::<T>, data_type: PhantomData::<T>,
eof_file_position eof_file_position,
}; };
Ok(store) Ok(store)
} }
@ -298,6 +336,14 @@ impl <T>Store<T> {
Ok(header) Ok(header)
} }
pub async fn search_for(&mut self, index: T) -> Result<()>
where T: Send
{
// let index = self.primary_index.borrow_mut();
// let x = index.lookup(self, 123).await?;
todo!()
}
// Returns None when file_positoin == eof_file_position // Returns None when file_positoin == eof_file_position
pub async fn read_entry_at(&mut self, file_position: FilePosition) -> Result<Option<EntryDetailed<T>>> pub async fn read_entry_at(&mut self, file_position: FilePosition) -> Result<Option<EntryDetailed<T>>>
where T: Decode where T: Decode