Introduce segments module

This commit is contained in:
Yuriy Dupyn 2024-02-05 03:35:43 +01:00
parent 1618bffb85
commit b13d2f04cd
8 changed files with 12 additions and 13 deletions

View file

@ -0,0 +1,60 @@
use bincode::{Decode, Encode};
use crate::binary_coding::{encode_sequence, encode_sequence_with_sizes, decode_sequence};
use crate::storage_engine::{Result, FilePosition};
use crate::error::{Error, DecodeErrorKind};
use crate::segments::entry_header::{EntryHeader, EntryHeaderWithDataSize};
#[derive(Debug)]
pub struct Entry<T> {
pub header: EntryHeader,
pub data: Vec<T>,
}
#[derive(Debug)]
pub struct EntryDetailed<T> {
pub header: EntryHeaderWithDataSize,
pub file_position: FilePosition,
pub data: Vec<T>,
}
impl <T>Entry<T> {
pub fn new(data: Vec<T>) -> Self {
Self { header: EntryHeader { is_deleted: false }, data }
}
pub fn new_deleted(data: Vec<T>) -> Self {
Self { header: EntryHeader { is_deleted: true}, data }
}
// FORMAT: [EntryHeaderWithDataSize, ..sequence of data]
pub fn encode(&self) -> Result<Vec<u8>>
where T: Encode
{
let mut result: Vec<u8> = self.header.encode()?;
let (mut encoded_data, sizes) = encode_sequence_with_sizes(&self.data[..])?;
result.append(&mut encode_sequence(&sizes)?); // sizes of data (fixed by number of columns)
result.append(&mut encoded_data); // data variable size
Ok(result)
}
}
impl <T>EntryDetailed<T> {
pub fn decode(header: EntryHeaderWithDataSize, file_position: FilePosition, number_of_columns: usize, bytes: &[u8]) -> Result<Self>
where T: Decode
{
let data = decode_sequence::<T>(number_of_columns, bytes)
.map_err(|e| Error::DecodeError(DecodeErrorKind::EntryData, e))?;
Ok(EntryDetailed { header, file_position, data })
}
pub fn forget(&self) -> Entry<T>
where T: Clone
{
Entry {
header: self.header.clone().into(),
data: self.data.clone(),
}
}
}

View file

@ -0,0 +1,68 @@
use crate::binary_coding::{decode, encode, decode_sequence};
use crate::storage_engine::{Result, Column};
use crate::error::{Error, DecodeErrorKind};
use std::mem::size_of;
#[derive(Debug)]
pub struct EntryHeader {
pub is_deleted: bool,
}
#[derive(Debug, Clone)]
pub struct EntryHeaderWithDataSize {
pub is_deleted: bool,
pub data_sizes: Vec<usize>, // vec![5, 6, 20] means that column 0 stores 5 bytes, column 1 stores 6
// bytes etc
}
impl EntryHeader {
pub fn encode(self: &EntryHeader) -> Result<Vec<u8>> {
let result: Vec<u8> = encode(&self.is_deleted)?;
Ok(result)
}
}
impl From<EntryHeaderWithDataSize> for EntryHeader {
fn from(entry: EntryHeaderWithDataSize) -> Self {
Self { is_deleted: entry.is_deleted, }
}
}
impl EntryHeaderWithDataSize {
pub const IS_DELETED_OFFSET: usize = 0;
pub const IS_DELETED_SIZE: usize = size_of::<bool>();
pub const DATA_SIZES_OFFSET: usize = Self::IS_DELETED_OFFSET + Self::IS_DELETED_SIZE;
pub fn size(number_of_columns: usize) -> usize {
let size_of_data_sizes: usize = number_of_columns*size_of::<usize>();
Self::IS_DELETED_SIZE + size_of_data_sizes
}
pub fn size_of_data(&self) -> usize{
self.data_sizes.iter().sum()
}
pub fn offset_of_column(&self, column: Column) -> usize {
let mut sum = 0;
for (i, size) in self.data_sizes.iter().enumerate() {
if i < column as usize {
sum += size;
} else {
break
}
}
sum
}
pub fn decode(bytes: &mut [u8], number_of_columns: usize) -> Result<Self> {
let (is_deleted, _) =
decode::<bool>(&bytes)
.map_err(|e| Error::DecodeError(DecodeErrorKind::EntryIsDeleted, e))?;
let data_sizes = decode_sequence::<usize>(number_of_columns, &bytes[Self::DATA_SIZES_OFFSET..])
.map_err(|e| Error::DecodeError(DecodeErrorKind::EntryHeaderWithDataSizes, e))?;
Ok(Self { is_deleted, data_sizes } )
}
}

View file

@ -0,0 +1,3 @@
pub mod entry;
pub mod entry_header;
pub mod store_header;

View file

@ -0,0 +1,127 @@
use crate::binary_coding::{encode, encode_sequence, decode, decode_sequence};
use crate::storage_engine::{Result, Column};
use crate::error::{Error, DecodeErrorKind};
use std::mem::size_of;
#[derive(Debug, Clone)]
pub struct StoreHeader {
pub table_folder: String, // This one is not encoded into the file
pub number_of_columns: usize,
pub deleted_count: usize,
pub total_count: usize,
pub primary_column: Column,
pub indexed_columns: Vec<bool>,
}
#[derive(Debug, Clone)]
pub struct StoreHeaderFixedPart {
pub table_folder: String, // This one is not encoded into the file
pub number_of_columns: usize,
pub deleted_count: usize,
pub total_count: usize,
pub primary_column: Column,
}
impl StoreHeader {
pub const NUMBER_OF_COLUMNS_SIZE: usize = size_of::<usize>();
pub const DELETED_COUNT_SIZE: usize = size_of::<usize>();
pub const TOTAL_COUNT_SIZE: usize = size_of::<usize>();
pub const PRIMARY_COLUMN_SIZE: usize = size_of::<Column>();
pub const FIXED_SIZE: usize = Self::NUMBER_OF_COLUMNS_SIZE + Self::DELETED_COUNT_SIZE + Self::TOTAL_COUNT_SIZE + Self::PRIMARY_COLUMN_SIZE;
pub const NUMBER_OF_COLUMNS_OFFSET: usize = 0;
pub const DELETED_COUNT_OFFSET: usize = Self::NUMBER_OF_COLUMNS_OFFSET + Self::NUMBER_OF_COLUMNS_SIZE;
pub const TOTAL_COUNT_OFFSET: usize = Self::DELETED_COUNT_OFFSET + Self::DELETED_COUNT_SIZE;
pub const PRIMARY_COLUMN_OFFSET: usize = Self::TOTAL_COUNT_OFFSET + Self::TOTAL_COUNT_SIZE;
#[allow(dead_code)]
pub const INDEXED_COLUMNS_OFFSET: usize = Self::PRIMARY_COLUMN_OFFSET + Self::PRIMARY_COLUMN_SIZE;
fn indexed_columns_size(number_of_columns: usize) -> usize {
size_of::<bool>() * number_of_columns
}
pub fn size(number_of_columns: usize) -> usize {
Self::FIXED_SIZE + Self::indexed_columns_size(number_of_columns)
}
pub fn encode(&self) -> Result<Vec<u8>> {
let mut result = encode(&self.number_of_columns)?;
result.append(&mut encode(&self.deleted_count)?);
result.append(&mut encode(&self.total_count)?);
result.append(&mut encode(&self.primary_column)?);
result.append(&mut encode_sequence(&self.indexed_columns)?);
Ok(result)
}
pub fn buffer_for_fixed_decoding() -> [u8; Self::FIXED_SIZE] {
[0; Self::FIXED_SIZE]
}
pub fn buffer_for_rest_decoding(header: &StoreHeaderFixedPart) -> Vec<u8> {
vec![0; Self::indexed_columns_size(header.number_of_columns)]
}
pub async fn decode_fixed(table_folder: &str, result: &[u8]) -> Result<StoreHeaderFixedPart> {
let (number_of_columns, _) =
decode::<usize>(&result[Self::NUMBER_OF_COLUMNS_OFFSET..Self::NUMBER_OF_COLUMNS_OFFSET + Self::NUMBER_OF_COLUMNS_SIZE])
.map_err(|e| Error::DecodeError(DecodeErrorKind::StoreHeaderNumberOfColumns, e))?;
let (deleted_count, _) =
decode::<usize>(&result[Self::DELETED_COUNT_OFFSET..Self::DELETED_COUNT_OFFSET + Self::DELETED_COUNT_SIZE])
.map_err(|e| Error::DecodeError(DecodeErrorKind::StoreHeaderDeletedCount, e))?;
let (total_count, _) =
decode::<usize>(&result[Self::TOTAL_COUNT_OFFSET..Self::TOTAL_COUNT_OFFSET + Self::TOTAL_COUNT_SIZE])
.map_err(|e| Error::DecodeError(DecodeErrorKind::StoreHeaderTotalCount, e))?;
let (primary_column, _) =
decode::<Column>(&result[Self::PRIMARY_COLUMN_OFFSET..Self::PRIMARY_COLUMN_OFFSET + Self::PRIMARY_COLUMN_SIZE])
.map_err(|e| Error::DecodeError(DecodeErrorKind::StoreHeaderPrimaryColumn, e))?;
let header = StoreHeaderFixedPart {
table_folder: table_folder.to_string(),
number_of_columns,
deleted_count,
total_count,
primary_column,
};
Ok(header)
}
pub async fn decode_rest(header: StoreHeaderFixedPart, result: &[u8]) -> Result<StoreHeader> {
let indexed_columns: Vec<bool> =
decode_sequence::<bool>(header.number_of_columns, result)
.map_err(|e| Error::DecodeError(DecodeErrorKind::StoreHeaderIndexedColumns, e))?;
Ok(StoreHeader {
table_folder: header.table_folder,
number_of_columns: header.number_of_columns,
deleted_count: header.deleted_count,
total_count: header.total_count,
primary_column: header.primary_column,
indexed_columns,
})
}
// returns new count
pub fn increment_total_count(&mut self) -> usize {
self.total_count += 1;
self.total_count
}
// returns new count
pub fn increment_deleted_count(&mut self) -> usize {
self.deleted_count += 1;
self.deleted_count
}
pub fn is_column_indexed(&self, column: Column) -> bool {
self.indexed_columns[column as usize]
}
pub fn make_column_indexed(&mut self, column: Column) {
self.indexed_columns[column as usize] = true
}
}