Can build indexes on new columns

This commit is contained in:
Yuriy Dupyn 2024-02-05 00:44:14 +01:00
parent 8139112934
commit b0f05f36f2
5 changed files with 110 additions and 14 deletions

View file

@ -173,9 +173,10 @@ pub trait CursorWithStoreHeader<T>: PrimitiveCursor<T> {
}
// Like next, but only reads the column, not the whole entry.
async fn next_at_column(&mut self, column: Column) -> Result<Option<(EntryHeaderWithDataSize, T)>>
async fn next_at_column(&mut self, column: Column) -> Result<Option<(EntryHeaderWithDataSize, FilePosition, T)>>
where T: Decode + Send
{
let file_position = self.current_file_position().await?;
let Some(entry_header) = self.next_entry_header().await? else { return Ok(None) };
let file_position_at_start_of_data = self.current_file_position().await?;
@ -194,9 +195,20 @@ pub trait CursorWithStoreHeader<T>: PrimitiveCursor<T> {
self.seek_to(file_position_at_start_of_data).await?;
self.jump_from_start_of_entry_data_to_next_entry(&entry_header).await?;
Ok(Some((entry_header, value)))
Ok(Some((entry_header, file_position, value)))
}
async fn next_alive_at_column(&mut self, column: Column) -> Result<Option<(EntryHeaderWithDataSize, FilePosition, T)>>
where T: Decode + Send
{
while let Some((header, file_position, t)) = self.next_at_column(column).await? {
if !header.is_deleted {
return Ok(Some((header, file_position, t)))
}
}
Ok(None)
}
async fn next_alive(&mut self) -> Result<Option<EntryDetailed<T>>>
where T: Decode
{
@ -213,7 +225,7 @@ pub trait CursorWithStoreHeader<T>: PrimitiveCursor<T> {
where T: Decode + PartialEq + Send + Sync
{
let mut file_position = self.current_file_position().await?;
while let Some((_, t)) = self.next_at_column(column).await? {
while let Some((_, _, t)) = self.next_alive_at_column(column).await? {
if &t == t0 {
// go back and decode the whole entry
self.seek_to(file_position).await?;
@ -225,6 +237,16 @@ pub trait CursorWithStoreHeader<T>: PrimitiveCursor<T> {
Ok(None)
}
async fn find_all_eq_bruteforce(&mut self, column: Column, t0: &T) -> Result<Vec<EntryDetailed<T>>>
where T: Decode + PartialEq + Send + Sync
{
let mut entries = vec![];
while let Some(entry) = self.find_first_eq_bruteforce(column, t0).await? {
entries.push(entry)
}
Ok(entries)
}
// ===Debugging===
async fn read_entries(&mut self) -> Result<()>
where T: Decode + std::fmt::Debug
@ -271,6 +293,16 @@ pub trait CursorWithWriteStoreHeader<T>: CursorWithStoreHeader<T> + PrimitiveWri
Ok(())
}
async fn set_header(&mut self, header: &StoreHeader) -> Result<()>
where T: Send
{
self.seek_to_start().await?;
let encoded_header: Vec<u8> = header.encode()?;
self.write_bytes(&encoded_header).await?;
Ok(())
}
// ===Append Entry===
// Moves cursor to the end.
@ -320,9 +352,16 @@ pub trait CursorWithAccessToIndex<T>: CursorWithStoreHeader<T> {
}
}
// TODO: I also need the global find
async fn lookup(&mut self, column: Column, k: &T) -> Result<Vec<EntryDetailed<T>>> {
todo!()
async fn select_entries_where_eq(&mut self, column: Column, value: &T) -> Result<Vec<EntryDetailed<T>>>
where T: Encode + Decode + Ord + Send + Sync
{
if self.header().is_column_indexed(column) {
println!("INDEXED LOOKUP");
self.index_lookup(column, value).await
} else {
println!("BRUTE-FORCE LOOKUP");
self.find_all_eq_bruteforce(column, value).await
}
}
}
@ -664,11 +703,25 @@ impl <'cursor, T> WriteCursor<'cursor, T>
}
// ===Indexing===
async fn insert_to_index(&mut self, t: T, file_position: FilePosition) -> Result<Option<FilePosition>>
where T: Encode + Decode + Ord + Send + Sync
// WARNING: Assumes the column is NOT indexable.
pub async fn attach_index(&mut self, column: Column) -> Result<()>
where T: Ord + Decode + Encode + Send + Sync
{
// let x = self.primary_index.insert(t, file_position).await?;
todo!()
// New Index
let index = Store::create_empty_index_at(&self.header, column).await?;
self.indexes[column as usize] = Some(index);
// Mark column as indexed
self.header.make_column_indexed(column);
self.set_header(&self.header.clone()).await?;
// Build index
self.seek_to_start_of_data().await?;
while let Some((_, file_position, value)) = self.next_alive_at_column(column).await? {
self.insert_into_index(column, value, file_position).await?
}
Ok(())
}
async fn delete_from_index(&mut self, t: T, file_position: FilePosition) -> Result<Option<FilePosition>>