Change entry header. Expand api

This commit is contained in:
Yuriy Dupyn 2024-02-02 18:27:32 +01:00
parent 2f23df1009
commit cac34d95e0
6 changed files with 150 additions and 39 deletions

View file

@ -45,6 +45,18 @@ pub fn encode_sequence<T: Encode>(ts: &[T]) -> Result<Vec<u8>, bincode::error::E
Ok(result) Ok(result)
} }
pub fn encode_sequence_with_sizes<T: Encode>(ts: &[T]) -> Result<(Vec<u8>, Vec<usize>), bincode::error::EncodeError> {
let mut result_bytes = vec![];
let mut sizes = Vec::with_capacity(ts.len());
for t in ts {
let mut bytes = encode(&t)?;
sizes.push(bytes.len());
result_bytes.append(&mut bytes);
}
Ok((result_bytes, sizes))
}
pub fn decode_sequence<T: Decode>(len: usize, bytes: &[u8]) -> Result<Vec<T>, bincode::error::DecodeError> { pub fn decode_sequence<T: Decode>(len: usize, bytes: &[u8]) -> Result<Vec<T>, bincode::error::DecodeError> {
let mut result: Vec<T> = Vec::with_capacity(len); let mut result: Vec<T> = Vec::with_capacity(len);
let mut offset = 0; let mut offset = 0;

View file

@ -11,9 +11,10 @@ pub enum DecodeErrorKind {
StoreHeaderNumberOfColumns, StoreHeaderNumberOfColumns,
StoreHeaderDeletedCount, StoreHeaderDeletedCount,
StoreHeaderTotalCount, StoreHeaderTotalCount,
StoreHeaderPrimaryColumn,
EntryData, EntryData,
EntryIsDeleted, EntryIsDeleted,
EntryDataSize EntryHeaderWithDataSizes,
} }
// ===Errors=== // ===Errors===

View file

@ -0,0 +1,68 @@
use std::marker::PhantomData;
use tokio::io::{AsyncReadExt, AsyncWriteExt, AsyncSeekExt, SeekFrom};
use tokio::fs::{File, OpenOptions, DirBuilder};
use std::path::Path;
use std::collections::{BTreeMap};
use bincode;
use bincode::{Decode, Encode};
use crate::binary_coding::{encode, decode, encode_sequence, decode_sequence};
use tokio::fs;
use crate::error::{Error, DecodeErrorKind};
use std::mem::size_of;
type Result<T> = std::result::Result<T, Error>;
// Implements a persistant self-balancing Binary Search Tree. Nope.
// We need fixed-size nodes. But we want to index Strings which are variable length.
pub struct Index<K, V> {
file: File,
// None means index is asleep on disk.
in_memory: Option<BTreeMap<K, V>>,
header: IndexHeader,
key_type: PhantomData<K>,
value_type: PhantomData<V>,
}
pub struct IndexHeader {
}
impl <I, V>Index<I, V> {
pub async fn new(file_name: &str) -> Result<Index<I, V>> {
todo!()
}
pub async fn connect(file_name: &str) -> Result<Index<I, V>> {
todo!()
}
// Saves the in-memory index to disk and deallocates.
pub async fn sleep() -> Result<Index<I, V>> {
todo!()
}
// Loads the index into memory
pub async fn wake() -> Result<Index<I, V>> {
todo!()
}
pub async fn insert() -> Result<()>
where I: Encode, V: Encode
{
todo!()
}
pub async fn lookup(&mut self, k: I) -> Result<Option<V>>
where I: Encode + Decode,
{
todo!()
}
pub async fn delete(&mut self, k: I) -> Result<Option<V>> {
todo!()
}
}

View file

@ -1,3 +1,4 @@
pub mod storage_engine; pub mod storage_engine;
mod binary_coding; mod binary_coding;
mod error; mod error;
mod index;

View file

@ -7,6 +7,7 @@ use std::path::Path;
mod storage_engine; mod storage_engine;
mod binary_coding; mod binary_coding;
mod error; mod error;
mod index;
use crate::storage_engine::*; use crate::storage_engine::*;
@ -17,7 +18,7 @@ const TABLE_PATH: &'static str = "test_table";
type Result<T> = std::result::Result<T, std::io::Error>; type Result<T> = std::result::Result<T, std::io::Error>;
async fn create_store() -> Result<Store<Data>> { async fn create_store() -> Result<Store<Data>> {
let mut store: Store<Data> = Store::new(TABLE_PATH, 5).await.map_err(|e| e.to_io_or_panic())?; let mut store: Store<Data> = Store::new(TABLE_PATH, 5, 0).await.map_err(|e| e.to_io_or_panic())?;
println!("CREATED"); println!("CREATED");
println!("{:?}", store.read_all_bytes().await?); println!("{:?}", store.read_all_bytes().await?);
@ -77,7 +78,7 @@ async fn main() -> Result<()> {
// let entry0: Entry<u32> = Entry::new(vec![99, 98, 97, 96, 95]); // let entry0: Entry<u32> = Entry::new(vec![99, 98, 97, 96, 95]);
// append_entry(&mut store, &entry0).await?; // append_entry(&mut store, &entry0).await?;
store.read_entries(3).await.map_err(|e| e.to_io_or_panic())?; store.read_entries(2).await.map_err(|e| e.to_io_or_panic())?;
// let entry2: StoreEntry<u32> = StoreEntry::new_deleted(vec![3, 2, 1]); // let entry2: StoreEntry<u32> = StoreEntry::new_deleted(vec![3, 2, 1]);

View file

@ -5,7 +5,7 @@ use std::marker::PhantomData;
use bincode; use bincode;
use bincode::{Decode, Encode}; use bincode::{Decode, Encode};
use crate::binary_coding::{encode, decode, encode_sequence, decode_sequence}; use crate::binary_coding::{encode, decode, encode_sequence, encode_sequence_with_sizes, decode_sequence};
use tokio::fs; use tokio::fs;
use crate::error::{Error, DecodeErrorKind}; use crate::error::{Error, DecodeErrorKind};
@ -37,16 +37,19 @@ pub struct StoreHeader {
number_of_columns: usize, number_of_columns: usize,
deleted_count: usize, deleted_count: usize,
total_count: usize, total_count: usize,
primary_column: Column,
} }
impl StoreHeader { impl StoreHeader {
const NUMBER_OF_COLUMNS_SIZE: usize = size_of::<usize>(); const NUMBER_OF_COLUMNS_SIZE: usize = size_of::<usize>();
const DELETED_COUNT_SIZE: usize = size_of::<usize>(); const DELETED_COUNT_SIZE: usize = size_of::<usize>();
const TOTAL_COUNT_SIZE: usize = size_of::<usize>(); const TOTAL_COUNT_SIZE: usize = size_of::<usize>();
const SIZE: usize = Self::NUMBER_OF_COLUMNS_SIZE + Self::DELETED_COUNT_SIZE + Self::TOTAL_COUNT_SIZE; const PRIMARY_COLUMN_SIZE: usize = size_of::<Column>();
const SIZE: usize = Self::NUMBER_OF_COLUMNS_SIZE + Self::DELETED_COUNT_SIZE + Self::TOTAL_COUNT_SIZE + Self::PRIMARY_COLUMN_SIZE;
const NUMBER_OF_COLUMNS_OFFSET: usize = 0; const NUMBER_OF_COLUMNS_OFFSET: usize = 0;
const DELETED_COUNT_OFFSET: usize = Self::NUMBER_OF_COLUMNS_OFFSET + Self::NUMBER_OF_COLUMNS_SIZE; const DELETED_COUNT_OFFSET: usize = Self::NUMBER_OF_COLUMNS_OFFSET + Self::NUMBER_OF_COLUMNS_SIZE;
const TOTAL_COUNT_OFFSET: usize = Self::DELETED_COUNT_OFFSET + Self::DELETED_COUNT_SIZE; const TOTAL_COUNT_OFFSET: usize = Self::DELETED_COUNT_OFFSET + Self::DELETED_COUNT_SIZE;
const PRIMARY_COLUMN_OFFSET: usize = Self::TOTAL_COUNT_OFFSET + Self::TOTAL_COUNT_SIZE;
} }
#[derive(Debug)] #[derive(Debug)]
@ -61,12 +64,22 @@ impl EntryHeader {
#[derive(Debug)] #[derive(Debug)]
pub struct EntryHeaderWithDataSize { pub struct EntryHeaderWithDataSize {
is_deleted: bool, is_deleted: bool,
data_size: usize, // in bytes data_sizes: Vec<usize>, // vec![5, 6, 20] means that column 0 stores 5 bytes, column 1 stores 6
// bytes etc
} }
impl EntryHeaderWithDataSize { impl EntryHeaderWithDataSize {
const IS_DELETED_OFFSET: usize = 0;
const IS_DELETED_SIZE: usize = size_of::<bool>(); const IS_DELETED_SIZE: usize = size_of::<bool>();
const DATA_SIZE_SIZE: usize = size_of::<usize>(); const DATA_SIZES_OFFSET: usize = Self::IS_DELETED_OFFSET + Self::IS_DELETED_SIZE;
const SIZE: usize = Self::IS_DELETED_SIZE + Self::DATA_SIZE_SIZE;
fn size(number_of_columns: usize) -> usize {
let size_of_data_sizes: usize = number_of_columns*size_of::<usize>();
Self::IS_DELETED_SIZE + size_of_data_sizes
}
fn size_of_data(&self) -> usize{
self.data_sizes.iter().sum()
}
} }
#[derive(Debug)] #[derive(Debug)]
@ -142,7 +155,7 @@ impl <T>Store<T> {
const ROWS_FILE_NAME: &'static str = "rows"; const ROWS_FILE_NAME: &'static str = "rows";
// ===Creation=== // ===Creation===
pub async fn new(table_folder: &str, number_of_columns: usize) -> Result<Self> { pub async fn new(table_folder: &str, number_of_columns: usize, primary_column: Column) -> Result<Self> {
let path_to_table = Path::new(table_folder); let path_to_table = Path::new(table_folder);
let path_to_rows = path_to_table.join(Self::ROWS_FILE_NAME); let path_to_rows = path_to_table.join(Self::ROWS_FILE_NAME);
DirBuilder::new() DirBuilder::new()
@ -160,9 +173,12 @@ impl <T>Store<T> {
number_of_columns, number_of_columns,
deleted_count: 0, deleted_count: 0,
total_count: 0, total_count: 0,
primary_column,
}; };
let encoded_header: Vec<u8> = header.encode()?; let encoded_header: Vec<u8> = header.encode()?;
println!("ENCODED_HEADER: {:?}", encoded_header);
let mut store = Self { let mut store = Self {
table_folder: table_folder.to_string(), table_folder: table_folder.to_string(),
file, file,
@ -173,7 +189,9 @@ impl <T>Store<T> {
Ok(store) Ok(store)
} }
pub async fn connect(table_folder: &str) -> Result<Self> { pub async fn connect(table_folder: &str) -> Result<Self>
where T: std::fmt::Debug
{
let path_to_table = Path::new(table_folder); let path_to_table = Path::new(table_folder);
let path_to_rows = path_to_table.join(Self::ROWS_FILE_NAME); let path_to_rows = path_to_table.join(Self::ROWS_FILE_NAME);
@ -190,12 +208,14 @@ impl <T>Store<T> {
file.read_exact(&mut header_bytes).await?; file.read_exact(&mut header_bytes).await?;
let header = StoreHeader::decode(&mut header_bytes).await?; let header = StoreHeader::decode(&mut header_bytes).await?;
Ok(Self { let store = Self {
table_folder: table_folder.to_string(), table_folder: table_folder.to_string(),
file, file,
header, header,
data_type: PhantomData::<T>, data_type: PhantomData::<T>,
}) };
println!("just connected TOOOOO {:?}", store);
Ok(store)
} }
// ===Append Entry=== // ===Append Entry===
@ -247,9 +267,10 @@ impl <T>Store<T> {
self.seek_to(cursor).await?; self.seek_to(cursor).await?;
self.file.seek(SeekFrom::Start(cursor)).await?; self.file.seek(SeekFrom::Start(cursor)).await?;
let mut header_bytes: Vec<u8> = vec![0; EntryHeaderWithDataSize::SIZE]; let number_of_columns: usize = self.header.number_of_columns;
let mut header_bytes: Vec<u8> = vec![0; EntryHeaderWithDataSize::size(number_of_columns)];
self.read_bytes(&mut header_bytes).await?; self.read_bytes(&mut header_bytes).await?;
let header = EntryHeaderWithDataSize::decode(&mut header_bytes[..])?; let header = EntryHeaderWithDataSize::decode(&mut header_bytes[..], number_of_columns)?;
// TODO: Get rid of the println's // TODO: Get rid of the println's
// println!("HEADER_BYTES: {:?}", header_bytes); // println!("HEADER_BYTES: {:?}", header_bytes);
// println!("HEADER: {:?}", header); // println!("HEADER: {:?}", header);
@ -257,21 +278,12 @@ impl <T>Store<T> {
Ok(header) Ok(header)
} }
pub async fn read_entry_data(&mut self, header: &EntryHeaderWithDataSize) -> Result<Vec<T>> {
let mut data_bytes: Vec<u8> = vec![0; header.data_size];
// TODO: Get rid of the println's
// println!("HEADER_BYTES: {:?}", header_bytes);
// println!("PREPARED_DATA_BYTES: {:?}", data_bytes);
self.read_bytes(&mut data_bytes).await?;
todo!()
}
pub async fn read_entry_at(&mut self, cursor: Cursor) -> Result<EntryDetailed<T>> pub async fn read_entry_at(&mut self, cursor: Cursor) -> Result<EntryDetailed<T>>
where T: Decode where T: Decode
{ {
let header = self.read_entry_header_at(cursor).await?; let header = self.read_entry_header_at(cursor).await?;
let mut data_bytes: Vec<u8> = vec![0; header.data_size]; let mut data_bytes: Vec<u8> = vec![0; header.size_of_data()];
// TODO: Get rid of the println's // TODO: Get rid of the println's
// println!("PREPARED_DATA_BYTES: {:?}", data_bytes); // println!("PREPARED_DATA_BYTES: {:?}", data_bytes);
self.read_bytes(&mut data_bytes).await?; self.read_bytes(&mut data_bytes).await?;
@ -294,15 +306,29 @@ impl <T>Store<T> {
} }
Ok(()) Ok(())
} }
pub async fn search_for_entry_with_id(&mut self, id: T) -> Result<Option<EntryDetailed<T>>> {
// TODO: make call to the primary index
todo!()
}
// TODO: This needs to be some sort of an iterator
pub async fn get_eq(&self, column: Column, value: T) -> Result<Option<EntryDetailed<T>>> {
todo!()
}
pub async fn garbage_collect(&mut self) -> Result<()> {
todo!()
}
} }
// ===Store Header=== // ===Store Header===
impl StoreHeader { impl StoreHeader {
fn encode(&self) -> Result<Vec<u8>> { fn encode(&self) -> Result<Vec<u8>> {
// FORMAT: First Number of Columns, Then Deleted Count.
let mut result = encode(&self.number_of_columns)?; let mut result = encode(&self.number_of_columns)?;
result.append(&mut encode(&self.deleted_count)?); result.append(&mut encode(&self.deleted_count)?);
result.append(&mut encode(&self.total_count)?); result.append(&mut encode(&self.total_count)?);
result.append(&mut encode(&self.primary_column)?);
Ok(result) Ok(result)
} }
@ -317,13 +343,17 @@ impl StoreHeader {
let (deleted_count, _) = let (deleted_count, _) =
decode::<usize>(&result[Self::DELETED_COUNT_OFFSET..Self::DELETED_COUNT_OFFSET + Self::DELETED_COUNT_SIZE]) decode::<usize>(&result[Self::DELETED_COUNT_OFFSET..Self::DELETED_COUNT_OFFSET + Self::DELETED_COUNT_SIZE])
.map_err(|e| Error::DecodeError(DecodeErrorKind::StoreHeaderDeletedCount, e))?; .map_err(|e| Error::DecodeError(DecodeErrorKind::StoreHeaderDeletedCount, e))?;
let (total_count, _offset) = let (total_count, _) =
decode::<usize>(&result[Self::TOTAL_COUNT_OFFSET..Self::TOTAL_COUNT_OFFSET + Self::TOTAL_COUNT_SIZE]) decode::<usize>(&result[Self::TOTAL_COUNT_OFFSET..Self::TOTAL_COUNT_OFFSET + Self::TOTAL_COUNT_SIZE])
.map_err(|e| Error::DecodeError(DecodeErrorKind::StoreHeaderTotalCount, e))?; .map_err(|e| Error::DecodeError(DecodeErrorKind::StoreHeaderTotalCount, e))?;
let (primary_column, _) =
decode::<Column>(&result[Self::PRIMARY_COLUMN_OFFSET..Self::PRIMARY_COLUMN_OFFSET + Self::PRIMARY_COLUMN_SIZE])
.map_err(|e| Error::DecodeError(DecodeErrorKind::StoreHeaderPrimaryColumn, e))?;
let header = StoreHeader { let header = StoreHeader {
number_of_columns, number_of_columns,
deleted_count, deleted_count,
total_count, total_count,
primary_column,
}; };
Ok(header) Ok(header)
@ -351,15 +381,15 @@ impl EntryHeader {
} }
impl EntryHeaderWithDataSize { impl EntryHeaderWithDataSize {
fn decode(bytes: &mut [u8]) -> Result<Self> { fn decode(bytes: &mut [u8], number_of_columns: usize) -> Result<Self> {
let (is_deleted, offset) = let (is_deleted, _) =
decode::<bool>(&bytes) decode::<bool>(&bytes)
.map_err(|e| Error::DecodeError(DecodeErrorKind::EntryIsDeleted, e))?; .map_err(|e| Error::DecodeError(DecodeErrorKind::EntryIsDeleted, e))?;
let (data_size, _) = let data_sizes = decode_sequence::<usize>(number_of_columns, &bytes[Self::DATA_SIZES_OFFSET..])
decode::<usize>(&bytes[offset..]) .map_err(|e| Error::DecodeError(DecodeErrorKind::EntryHeaderWithDataSizes, e))?;
.map_err(|e| Error::DecodeError(DecodeErrorKind::EntryDataSize, e))?;
Ok(Self { is_deleted, data_size} ) Ok(Self { is_deleted, data_sizes } )
} }
} }
@ -372,19 +402,17 @@ impl <T>Entry<T> {
Self { header: EntryHeader { is_deleted: true}, data } Self { header: EntryHeader { is_deleted: true}, data }
} }
// FORMAT: [HEADER, ..sequence of data] // FORMAT: [EntryHeaderWithDataSize, ..sequence of data]
// HEADER: [Boolean (one byte), number of bytes in the data (not including the boolean)] fn encode(&self) -> Result<Vec<u8>>
fn encode(self: &Entry<T>) -> Result<Vec<u8>>
where T: Encode where T: Encode
{ {
let mut result: Vec<u8> = self.header.encode()?; let mut result: Vec<u8> = self.header.encode()?;
let mut encoded_data = encode_sequence(&self.data[..])?;
let encoded_data_len: usize = encoded_data.len(); let (mut encoded_data, sizes) = encode_sequence_with_sizes(&self.data[..])?;
result.append(&mut encode(&encoded_data_len)?); // usize 8 bytes result.append(&mut encode_sequence(&sizes)?); // sizes of data (fixed by number of columns)
result.append(&mut encoded_data); // data variable size result.append(&mut encoded_data); // data variable size
Ok(result) Ok(result)
} }
} }
impl <T>EntryDetailed<T> { impl <T>EntryDetailed<T> {