Implement Garbage Collection

This commit is contained in:
Yuriy Dupyn 2024-02-05 01:34:16 +01:00
parent 1086b2fc5e
commit f3fc67cbbc
4 changed files with 43 additions and 20 deletions

View file

@ -602,7 +602,7 @@ impl <'cursor, T> WriteCursor<'cursor, T>
// ===Deletion===
pub async fn mark_deleted_at(&mut self, file_position: FilePosition) -> Result<()>
where T: Encode + Decode + Ord + Send + Sync
where T: Encode + Decode + Ord + Send + Sync + Clone + Ord
{
self.seek_to(file_position).await?;
let mut entry_header = self.read_entry_header().await?;
@ -634,7 +634,7 @@ impl <'cursor, T> WriteCursor<'cursor, T>
}
async fn find_first_eq_bruteforce_and_delete(&mut self, column: Column, t0: &T) -> Result<Option<EntryDetailed<T>>>
where T: Encode + Decode + Ord + Send + Sync
where T: Encode + Decode + Ord + Send + Sync + Clone + Ord
{
let maybe_entry = self.find_first_eq_bruteforce(column, t0).await?;
if let Some(entry) = maybe_entry {
@ -669,7 +669,7 @@ impl <'cursor, T> WriteCursor<'cursor, T>
// ===Garbage Collection===
async fn attempt_garbage_collection_if_necessary(&mut self) -> Result<()>
where T: Send + Decode + Encode
where T: Send + Decode + Encode + Clone + Ord
{
// TODO: What should be the policy? Counting size of garbage? Counting how many entries are
// garbage?
@ -680,11 +680,21 @@ impl <'cursor, T> WriteCursor<'cursor, T>
}
async fn initiate_garbage_collection(&mut self) -> Result<usize>
where T: Send + Decode + Encode
where T: Send + Decode + Encode + Clone + Ord
{
let mut cursor_to_intermediate = self.spawn_cursor_to_intermediate_file().await?;
// This will be a vector of such BTree maps...
let in_memory_index: BTreeMap<T, HashSet<FilePosition>> = BTreeMap::new();
// Since garbage collection changes FilePositions of live entries, we need to update the
// indexes too.
let mut in_memory_indexes: Vec<Option<BTreeMap<T, HashSet<FilePosition>>>> = Vec::with_capacity(self.header.number_of_columns);
for column in 0..self.header.number_of_columns {
if self.header.is_column_indexed(column as Column) {
let in_memory_index = BTreeMap::new();
in_memory_indexes.push(Some(in_memory_index))
} else {
in_memory_indexes.push(None)
}
}
// We'll dump all alive entries into a new file.
let mut entries_deleted = 0;
@ -692,25 +702,31 @@ impl <'cursor, T> WriteCursor<'cursor, T>
while let Some(live_entry) = self.next_alive().await? {
entries_deleted += 1;
let file_position = cursor_to_intermediate.append_entry_no_indexing(&live_entry.forget()).await?;
// TODO: Start indexing all of the indexable columns from scratch.
// Update index. (Wouldn't it be nice if we had `for let ...`?)
for (maybe_in_memory_index, value) in in_memory_indexes.iter_mut().zip(&live_entry.data) {
if let Some(in_memory_index) = maybe_in_memory_index {
in_memory_index.entry(value.clone()).or_insert_with(HashSet::new).insert(file_position);
}
}
}
}
// TODO: Create a new indexes from in_memory_index.
// Afterwards we swap the files, and delete the garbage.
// TODO:
// What needs to be done?
// 1. We take self cursor and mutate it
// ===swap===
// swapping indexes
// Update indexes on disk.
for (column, maybe_in_memory_index) in in_memory_indexes.into_iter().enumerate() {
if let Some(in_memory_index) = maybe_in_memory_index {
let index = self.mut_index_at(column as Column);
index.reset(in_memory_index).await?;
}
}
// swapping headers
self.header.deleted_count = 0;
self.header.total_count = cursor_to_intermediate.header.total_count;
// TODO: We'll actually have to iterate through all the indexes and swap each of them.
self.indexes = todo!();
self.file = cursor_to_intermediate.file;
self.eof_file_position = cursor_to_intermediate.eof_file_position;
Ok(entries_deleted)