minisql/storage_engine/src/index.rs
2024-02-04 20:20:20 +01:00

147 lines
3.8 KiB
Rust

use std::path::PathBuf;
use tokio::fs::{File, OpenOptions};
use tokio::io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufWriter};
use std::collections::{BTreeMap, HashSet};
use std::hash::Hash;
use crate::binary_coding::{decode, encode};
use bincode;
use bincode::{Decode, Encode};
use crate::error::{DecodeErrorKind, Error};
type Result<T> = std::result::Result<T, Error>;
#[derive(Debug)]
pub struct Index<K, V> {
file: File,
data: BTreeMap<K, HashSet<V>>,
}
#[derive(Debug)]
pub struct IndexHeader {}
impl<K, V> Index<K, V>
where
K: Encode + Decode + Ord,
V: Encode + Decode + Clone + Eq + Hash,
{
pub async fn new(file_name: PathBuf) -> Result<Index<K, V>> {
let file: File = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(file_name)
.await?;
let data = BTreeMap::new();
Ok(Index { file, data })
}
pub async fn connect(file_name: PathBuf) -> Result<Index<K, V>> {
let file: File = OpenOptions::new()
.read(true)
.write(true)
.open(file_name)
.await?;
let mut index = Index {
file,
data: BTreeMap::new(),
};
index.load_from_file().await?;
Ok(index)
}
pub async fn insert(&mut self, k: K, v: V) -> Result<()> {
self.append_to_file(&k, &v).await?;
self.data.entry(k).or_insert_with(HashSet::new).insert(v);
Ok(())
}
pub fn insert_desynced(&mut self, k: K, v: V) -> () {
self.data.entry(k).or_insert_with(HashSet::new).insert(v);
}
pub async fn lookup(&self, k: &K) -> Result<Option<HashSet<V>>> {
let hashset = self.data.get(k).cloned();
Ok(hashset)
}
pub async fn delete(&mut self, k: K, v: V) -> Result<()> {
self.data.entry(k).and_modify(|values| {
values.remove(&v);
});
self.dump_to_file().await
}
pub async fn sync_to_disk(&mut self) -> Result<()> {
self.dump_to_file().await
}
async fn append_to_file(&mut self, key: &K, value: &V) -> Result<()> {
let mut encoded = Vec::new();
encoded.extend(encode(key)?);
encoded.extend(encode(value)?);
self.file.seek(std::io::SeekFrom::End(0)).await?;
self.file.write(&encoded).await?;
Ok(())
}
async fn dump_to_file(&mut self) -> Result<()> {
let mut writer = BufWriter::new(&mut self.file);
writer.seek(std::io::SeekFrom::Start(0)).await?;
let mut written: u64 = 0;
let mut encoded = Vec::new();
for (key, value) in &self.data {
for v in value {
encoded.clear();
encoded.extend(encode(key)?);
encoded.extend(encode(v)?);
writer.write(&encoded).await?;
written += encoded.len() as u64;
}
}
writer.flush().await?;
self.file.set_len(written).await?;
Ok(())
}
async fn load_from_file(&mut self) -> Result<()> {
let mut bytes = vec![];
self.file.seek(std::io::SeekFrom::Start(0)).await?;
self.file.read_to_end(&mut bytes).await?;
let mut cursor = 0;
while cursor < bytes.len() {
let (key, len) = decode(&bytes[cursor..])
.map_err(|e| Error::DecodeError(DecodeErrorKind::CorruptedData, e))?;
cursor += len;
let (value, len) = decode(&bytes[cursor..])
.map_err(|e| Error::DecodeError(DecodeErrorKind::CorruptedData, e))?;
cursor += len;
self.insert_desynced(key, value);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn encode_decode() {
todo!();
}
}