Prepare for garbage collection

This commit is contained in:
Yuriy Dupyn 2024-02-03 23:45:55 +01:00
parent 0f98903759
commit daa39850f0
3 changed files with 115 additions and 28 deletions

View file

@ -13,7 +13,7 @@ use crate::binary_coding::{encode, decode};
use crate::entry::{Entry, EntryDetailed};
use crate::entry_header::{EntryHeaderWithDataSize, EntryHeader};
use crate::store_header::StoreHeader;
use crate::storage_engine::{Store, FilePosition, Column, Result, ROWS_FILE_NAME};
use crate::storage_engine::{Store, FilePosition, Column, Result, ROWS_FILE_NAME, GARBAGE_COLLECTION_INTERMEDIATE_ROWS_FILE_NAME};
#[async_trait]
// TODO: Make this private
@ -261,7 +261,7 @@ impl <T> ReadCursor<T> {
pub async fn new(store: &Store<T>) -> Result<Self>
where T: Send
{
let path_to_rows = Path::new(&store.table_folder).join(ROWS_FILE_NAME);
let path_to_rows = Path::new(&store.header.table_folder).join(ROWS_FILE_NAME);
let file: File =
OpenOptions::new()
.read(true)
@ -293,7 +293,7 @@ impl <'cursor, T> WriteCursor<'cursor, T> {
pub async fn new<'store: 'cursor>(store: &'store mut Store<T>) -> Result<Self>
where T: Send
{
let path_to_rows = Path::new(&store.table_folder).join(ROWS_FILE_NAME);
let path_to_rows = Path::new(&store.header.table_folder).join(ROWS_FILE_NAME);
let file: File =
OpenOptions::new()
.read(true)
@ -315,7 +315,33 @@ impl <'cursor, T> WriteCursor<'cursor, T> {
Ok(cursor)
}
pub async fn connect<'header: 'cursor>(path_to_rows: &str, header: &'header mut StoreHeader) -> Result<Self>
where T: Send
{
let file: File =
OpenOptions::new()
.read(true)
.write(true)
.open(path_to_rows)
.await?;
let mut cursor = Self {
header,
file,
data_type: PhantomData::<T>,
eof_file_position: 0,
};
let eof_file_position: FilePosition = cursor.seek_to_end().await?;
cursor.eof_file_position = eof_file_position;
cursor.seek_to_start_of_data().await?;
Ok(cursor)
}
// ===Primitive Operations===
async fn write_bytes(&mut self, bytes: &[u8]) -> Result<usize> {
Ok(self.file.write(bytes).await?)
@ -389,6 +415,18 @@ impl <'cursor, T> WriteCursor<'cursor, T> {
}
}
async fn find_first_eq_bruteforce_and_delete(&mut self, column: Column, t0: &T) -> Result<Option<EntryDetailed<T>>>
where T: Decode + PartialEq + Send + Sync
{
let maybe_entry = self.find_first_eq_bruteforce(column, t0).await?;
if let Some(entry) = maybe_entry {
self.mark_deleted_at(entry.file_position).await?;
Ok(Some(entry))
} else {
Ok(maybe_entry)
}
}
async fn attempt_garbage_collection_if_necessary(&mut self) -> Result<()> {
// TODO: What should be the policy? Counting size of garbage? Counting how many entries are
// garbage?
@ -398,6 +436,46 @@ impl <'cursor, T> WriteCursor<'cursor, T> {
Ok(())
}
}
async fn initiate_garbage_collection(&mut self) -> Result<usize>
where T: Send
{
let table_folder = self.header.table_folder.to_string();
let path_to_table = Path::new(&table_folder);
let path_to_rows = path_to_table.join(GARBAGE_COLLECTION_INTERMEDIATE_ROWS_FILE_NAME);
let intermediate_file: File = Store::<T>::create_empty_rows_file(path_to_rows, &self.header).await?;
let mut intermediate_header: StoreHeader = StoreHeader {
table_folder,
number_of_columns: self.header.number_of_columns,
deleted_count: 0,
total_count: 0,
primary_column: self.header.primary_column
};
// Creates a new cursor to the intermediate file in which we'll dump the live entries.
// let mut cursor_to_intermediate = Self {
// header: &mut intermediate_header,
// file: intermediate_file,
// data_type: PhantomData::<T>,
// eof_file_position: 0,
// };
let mut cursor_to_intermediate: Self = todo!();
let eof_file_position: FilePosition = cursor_to_intermediate.seek_to_end().await?;
cursor_to_intermediate.eof_file_position = eof_file_position;
// TODO: intermediate_header does not live long enough, so after garbage collection is
// done, we need to use it in the swap.
cursor_to_intermediate.header = todo!();
// In it there will be only the alive rows.
// Afterwards we swap the files, and delete the garbage.
todo!()
}
}