Put parsing details into one module

This commit is contained in:
Yuriy Dupyn 2024-01-26 19:45:15 +01:00
parent 61c0a34253
commit 6000b1f242
10 changed files with 30 additions and 29 deletions

View file

@ -0,0 +1,95 @@
use nom::{
character::complete::{alphanumeric1, char, multispace0, anychar, multispace1},
combinator::peek,
error::make_error,
sequence::{delimited, terminated},
bytes::complete::tag,
IResult, branch::alt,
};
use minisql::{operation::Condition, type_system::DbType};
use super::literal::parse_db_value;
pub fn parse_table_name(input: &str) -> IResult<&str, &str> {
alt((
delimited(char('"'), alphanumeric1, char('"')),
parse_identifier
))(input)
}
pub fn parse_identifier(input: &str) -> IResult<&str, &str> {
// TODO: allow underscores
let (_, first) = peek(anychar)(input)?;
if first.is_alphabetic() {
alphanumeric1(input)
} else {
Err(nom::Err::Error(make_error(input, nom::error::ErrorKind::Alpha)))
}
}
pub fn parse_column_name(input: &str) -> IResult<&str, String> {
terminated(parse_identifier, multispace0)(input).map(|(rest, name)| (rest, name.to_string()))
}
pub fn parse_db_type(input: &str) -> IResult<&str, DbType> {
let (input, type_name) = alt((tag("STRING"), tag("INT"), tag("Float"), tag("UUID")))(input)?;
let db_type = match type_name {
"STRING" => DbType::String,
"INT" => DbType::Int,
"UUID" => DbType::Uuid,
"Float" => DbType::Number,
_ => return Err(nom::Err::Failure(make_error(input, nom::error::ErrorKind::IsNot)))
};
Ok((input, db_type))
}
pub fn parse_condition(input: &str) -> IResult<&str, Option<Condition>> {
match tag::<&str, &str, nom::error::Error<&str>>("WHERE")(input) {
Ok((input, _)) => {
let (input, _) = multispace1(input)?;
let (input, condition) = parse_equality(input)?;
Ok((input, Some(condition)))
}
Err(_) => {
Ok((input, None))
}
}
}
fn parse_equality(input: &str) -> IResult<&str, Condition> {
let (input, column_name) = parse_column_name(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char('=')(input)?;
let (input, _) = multispace0(input)?;
let (input, db_value) = parse_db_value(input)?;
Ok((input, Condition::Eq(column_name, db_value)))
}
#[cfg(test)]
mod tests {
use minisql::{operation::Condition, type_system::DbType};
use crate::parsing::common::{parse_db_type, parse_equality};
#[test]
fn test_parse_equality() {
use minisql::type_system::{IndexableValue, Value};
match parse_equality("id = 1") {
Ok(("", Condition::Eq(column_name, value))) => {
assert!(column_name.eq("id"));
assert_eq!(value, Value::Indexable(IndexableValue::Int(1)))
}
_ => {
panic!("should parse");
}
}
}
#[test]
fn test_parse_db_type() {
assert!(matches!(parse_db_type("INT").expect("should parse").1, DbType::Int));
assert!(matches!(parse_db_type("STRING").expect("should parse").1, DbType::String));
assert!(matches!(parse_db_type("UUID").expect("should parse").1, DbType::Uuid));
assert!(matches!(parse_db_type("NUMBER").expect("should parse").1, DbType::Number));
assert!(matches!(parse_db_type("Unknown"), Err(_)));
}
}

View file

@ -0,0 +1,109 @@
use minisql::{operation::Operation, schema::{ColumnName, TableSchema}, type_system::DbType};
use nom::{
bytes::complete::tag,
character::complete::{char, multispace0, multispace1},
multi::separated_list0,
sequence::terminated,
IResult, combinator::opt,
};
use super::common::{parse_table_name, parse_identifier, parse_db_type};
pub fn parse_create(input: &str) -> IResult<&str, Operation> {
let (input, _) = tag("CREATE")(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("TABLE")(input)?;
let (input, _) = multispace1(input)?;
let (input, table_name) = parse_table_name(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char('(')(input)?;
let (input, _) = multispace0(input)?;
let (input, column_definitions) = parse_column_definitions(input)?;
let mut column_name_position_mapping = Vec::new();
let mut types: Vec<DbType> = Vec::new();
let mut primary_key = None;
for (position, (column_name, db_type, pk)) in column_definitions.iter().enumerate() {
types.push(db_type.clone());
if *pk {
primary_key = Some(position);
}
column_name_position_mapping.push((column_name.clone(), position));
}
let (input, _) = char(')')(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(';')(input)?;
let schema = TableSchema::new(
table_name.to_string(),
primary_key.unwrap_or_default(),
column_name_position_mapping,
types
);
Ok((
input,
Operation::CreateTable(table_name.to_string(), schema),
))
}
pub fn parse_column_definitions(input: &str) -> IResult<&str, Vec<(ColumnName, DbType, bool)>> {
separated_list0(terminated(char(','), multispace0), parse_column_definition)(input)
}
fn parse_primary_key(input: &str) -> IResult<&str, &str> {
let (input, _) = multispace1(input)?;
let (input, _) = tag("PRIMARY")(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("KEY")(input)?;
Ok((input, "PRIMARY KEY"))
}
pub fn parse_column_definition(input: &str) -> IResult<&str, (ColumnName, DbType, bool)> {
let (input, identifier) = parse_identifier(input)?;
let (input, _) = multispace1(input)?;
let (input, db_type) = parse_db_type(input)?;
let (input, pk) = opt(parse_primary_key)(input).map(|(input, pk)| (input, pk.is_some()))?;
let (input, _) = multispace0(input)?;
Ok((input, (identifier.to_string(), db_type, pk)))
}
#[cfg(test)]
mod tests {
use minisql::operation::Operation;
use crate::parsing::create::parse_create;
#[test]
fn test_parse_create_no_spaces() {
parse_create("CREATE TABLE \"Table1\"(id UUID ,column1 INT);").expect("should parse");
}
#[test]
fn test_parse_create_primary_key() {
parse_create("CREATE TABLE \"Table1\"(id UUID PRIMARY KEY,column1 INT);").expect("should parse");
}
#[test]
fn test_parse_create_no_quotes_table_name() {
parse_create("CREATE TABLE Table1(id UUID PRIMARY KEY,column1 INT);").expect("should parse");
}
#[test]
fn test_parse_create_primary_key_with_spaces() {
parse_create("CREATE TABLE \"Table1\" ( id UUID PRIMARY KEY , column1 INT ) ;").expect("should parse");
}
#[test]
fn test_parse_create() {
let (_, create) = parse_create("CREATE TABLE \"Table1\"( id UUID , column1 INT );").expect("should parse");
assert!(matches!(create, Operation::CreateTable(_ ,_)));
match create {
Operation::CreateTable(name, schema) => {
assert_eq!(name, "Table1");
assert_eq!(schema.number_of_columns(), 2);
assert_eq!(schema.column_position_from_column_name(&"id".to_string()).unwrap(), 0);
assert_eq!(schema.column_position_from_column_name(&"column1".to_string()).unwrap(), 1);
}
_ => {}
}
}
}

View file

@ -0,0 +1,38 @@
use minisql::operation::Operation;
use nom::{
bytes::complete::tag,
character::complete::{char, multispace0, multispace1},
IResult,
};
use super::common::{parse_table_name, parse_condition};
pub fn parse_delete(input: &str) -> IResult<&str, Operation> {
let (input, _) = tag("DELETE")(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("FROM")(input)?;
let (input, _) = multispace1(input)?;
let (input, table_name) = parse_table_name(input)?;
let (input, _) = multispace0(input)?;
let (input, condition) = parse_condition(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(';')(input)?;
Ok((
input,
Operation::Delete(table_name.to_string(), condition),
))
}
#[cfg(test)]
mod tests {
use minisql::operation::Operation;
use crate::parsing::delete::parse_delete;
#[test]
fn test_parse_delete() {
let (_, operation) = parse_delete("DELETE FROM \"T1\" WHERE id = 1 ;").expect("should parse");
assert!(matches!(operation, Operation::Delete(_, _)))
}
// TODO: add test with condition
}

View file

@ -0,0 +1,70 @@
use minisql::operation::Operation;
use nom::{
bytes::complete::tag,
character::complete::{char, multispace0, multispace1},
IResult, combinator::opt,
};
use super::common::{parse_identifier, parse_table_name};
pub fn parse_create_index(input: &str) -> IResult<&str, Operation> {
let (input, _) = tag("CREATE")(input)?;
let unique = |input| -> IResult<&str, bool> {
let (input, _) = multispace1(input)?;
let (input, _) = tag("UNIQUE")(input)?;
Ok((input, true))
};
let (input, _) = opt(unique)(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("INDEX")(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = parse_identifier(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("ON")(input)?;
let (input, _) = multispace1(input)?;
let (input, table_name) = parse_table_name(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char('(')(input)?;
let (input, _) = multispace0(input)?;
let (input, column_name) = parse_identifier(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(')')(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(';')(input)?;
let operation = Operation::CreateIndex(table_name.to_string(), column_name.to_string());
Ok((input, operation))
}
#[cfg(test)]
mod tests {
use minisql::operation::Operation;
use crate::parsing::index::parse_create_index;
#[test]
fn test_create_index() {
let (_, operation) = parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" (email);").expect("should parse");
assert!(matches!(operation, Operation::CreateIndex(_, _)));
match operation {
Operation::CreateIndex(table_name, column_name) => {
assert_eq!(table_name, "contacts");
assert_eq!(column_name, "email");
}
_ => {}
}
}
#[test]
fn test_create_index_with_spaces() {
let (_, operation) = parse_create_index("CREATE UNIQUE INDEX idxcontactsemail ON \"contacts\" ( email ) ;").expect("should parse");
assert!(matches!(operation, Operation::CreateIndex(_, _)));
match operation {
Operation::CreateIndex(table_name, column_name) => {
assert_eq!(table_name, "contacts");
assert_eq!(column_name, "email");
}
_ => {}
}
}
}

View file

@ -0,0 +1,94 @@
use super::{literal::parse_db_value, common::{parse_table_name, parse_identifier}};
use minisql::{operation::Operation, type_system::Value};
use nom::{
bytes::complete::tag,
character::complete::{multispace0, multispace1, char},
combinator::map,
multi::separated_list0,
sequence::terminated,
IResult,
};
pub fn parse_insert(input: &str) -> IResult<&str, Operation> {
let (input, _) = tag("INSERT")(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("INTO")(input)?;
let (input, _) = multispace1(input)?;
let (input, table_name) = parse_table_name(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = char('(')(input)?;
let (input, _) = multispace0(input)?;
let (input, column_names) = parse_columns(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(')')(input)?;
let (input, _) = multispace1(input)?;
let (input, _) = tag("VALUES")(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char('(')(input)?;
let (input, _) = multispace0(input)?;
let (input, values) = parse_values(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(')')(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = char(';')(input)?;
Ok((
input,
Operation::Insert(table_name.to_string(), column_names.into_iter().zip(values).collect()),
))
}
pub fn parse_columns(input: &str) -> IResult<&str, Vec<String>> {
separated_list0(terminated(char(','), multispace0), map(parse_identifier, |name|name.to_string()))(input)
}
pub fn parse_values(input: &str) -> IResult<&str, Vec<Value>> {
separated_list0(terminated(char(','), multispace0), parse_db_value)(input)
}
#[cfg(test)]
mod tests {
use minisql::{operation::Operation, type_system::{IndexableValue, Value}};
use super::parse_insert;
#[test]
fn test_parse_insert() {
let sql = "INSERT INTO \"MyTable\" (id, data) VALUES(1, \"Text\");";
let operation = parse_insert(sql).expect("should parse");
match operation {
("", Operation::Insert(table_name, insertion_values)) => {
assert_eq!(table_name, "MyTable");
assert_eq!(
insertion_values,
vec![
("id".to_string(), Value::Indexable(IndexableValue::Int(1))),
("data".to_string(), Value::Indexable(IndexableValue::String("Text".to_string())))
]);
}
_ => {
unreachable!()
}
}
}
#[test]
fn test_parse_insert_with_spaces() {
let sql = "INSERT INTO \"MyTable\" ( id, data ) VALUES ( 1, \"Text\" ) ;";
let operation = parse_insert(sql).expect("should parse");
match operation {
("", Operation::Insert(table_name, insertion_values)) => {
assert_eq!(table_name, "MyTable");
assert_eq!(insertion_values,
vec![
("id".to_string(), Value::Indexable(IndexableValue::Int(1))),
("data".to_string(), Value::Indexable(IndexableValue::String("Text".to_string())))
]);
}
_ => {
unreachable!()
}
}
}
}

View file

@ -0,0 +1,164 @@
use minisql::type_system::{IndexableValue, Value};
use nom::{
branch::alt,
character::complete::{u64, char, digit1, none_of},
combinator::opt,
multi::many0,
sequence::{delimited, pair, preceded},
IResult, error::make_error
};
pub fn parse_db_value(input: &str) -> IResult<&str, Value> {
alt((
parse_string,
parse_number,
parse_int,
parse_uuid,
))(input)
}
pub fn parse_number(input: &str) -> IResult<&str, Value> {
// Parse the integer part
let (input, (sign, digits)) = pair(opt(char('-')), digit1)(input)?;
// Parse the fractional part
let (input, frac_part) = opt(pair(char('.'), digit1))(input)?;
match frac_part {
Some((_fsign, fdigits)) => {
// Combine integer and fractional parts
let combined_parts = format!(
"{}{}",
format!("{}{}", sign.unwrap_or('+'), digits),
format!(".{}", fdigits)
);
// Parse the combined parts as a floating-point number
let value = combined_parts.parse::<f64>()
.map_err(|_| {
nom::Err::Failure(make_error(input, nom::error::ErrorKind::Fail))
})?;
Ok((input, Value::Number(value)))
}
None => {
let value = format!("{}{}", sign.unwrap_or('+'), digits).parse::<u64>()
.map_err(|_| {
nom::Err::Failure(make_error(input, nom::error::ErrorKind::Fail))
})?;
Ok((input, Value::Indexable(IndexableValue::Int(value))))
}
}
}
pub fn parse_int(input: &str) -> IResult<&str, Value> {
u64(input).map(|(input, v)| {
(input, Value::Indexable(IndexableValue::Int(v)))
})
}
fn escape_tab(input:&str) -> IResult<&str, char> {
let (input, _) = preceded(char('\\'), char('t'))(input)?;
Ok((input, '\t'))
}
fn escape_backslash(input:&str) -> IResult<&str, char> {
let (input, _) = preceded(char('\\'), char('\\'))(input)?;
Ok((input, '\\'))
}
fn escape_newline(input:&str) -> IResult<&str, char> {
let (input, _) = preceded(char('\\'), char('n'))(input)?;
Ok((input, '\n'))
}
fn escape_carriegereturn(input:&str) -> IResult<&str, char> {
let (input, _) = preceded(char('\\'), char('r'))(input)?;
Ok((input, '\r'))
}
fn escape_doublequote(input:&str) -> IResult<&str, char> {
preceded(char('\\'), char('"'))(input)
}
pub fn parse_string(input: &str) -> IResult<&str, Value> {
// Parse the content inside the double quotes
let (input, content) = delimited(
char('"'),
many0(alt((
escape_backslash,
escape_carriegereturn,
escape_newline,
escape_doublequote,
escape_tab,
none_of(r#"\""#)
))),
char('"'),
)(input)?;
// Combine the characters into a string
let value: String = content.into_iter().collect();
Ok((input, Value::Indexable(IndexableValue::String(value))))
}
fn parse_uuid(input: &str) -> IResult<&str, Value> {
// TODO: make it actually uuid
u64(input).map(|(input, v)| {
(input, Value::Indexable(IndexableValue::Uuid(v)))
})
}
#[cfg(test)]
mod tests {
use minisql::type_system::{IndexableValue, Value};
use crate::parsing::literal::{parse_db_value, parse_string};
#[test]
fn test_string_parser() {
assert_eq!(parse_string(r#""simple""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("simple"))))));
assert_eq!(parse_string(r#""\"\t\r\n\\""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("\"\t\r\n\\"))))));
assert_eq!(parse_string(r#""name is \"John\".""#), Ok(("", Value::Indexable(IndexableValue::String(String::from("name is \"John\"."))))));
}
#[test]
fn test_parse_db_value() {
let (input, value) = parse_db_value("5").expect("should parse");
assert_eq!(input, "");
assert_eq!(value, Value::Indexable(IndexableValue::Int(5)));
let (input, value) = parse_db_value("5.5").expect("should parse");
assert_eq!(input, "");
assert_eq!(value, Value::Number(5.5));
let (_, _) = parse_db_value("\"STRING\"").expect("should parse");
let (input, value) = parse_db_value("\"abcdefghkjklmnopqrstuvwxyz!@#$%^&*()_+ \"").expect("should parse");
assert_eq!(input, "");
assert_eq!(value, Value::Indexable(IndexableValue::String("abcdefghkjklmnopqrstuvwxyz!@#$%^&*()_+ ".to_string())));
}
#[test]
fn test_parse_positive_float() {
assert_eq!(parse_db_value("23.213313"), Ok(("", Value::Number(23.213313))));
assert_eq!(parse_db_value("2241.9734"), Ok(("", Value::Number(2241.9734))));
}
#[test]
fn test_parse_negative_float() {
assert_eq!(parse_db_value("-9241.873654"), Ok(("", Value::Number(-9241.873654))));
assert_eq!(parse_db_value("-62625.0"), Ok(("", Value::Number(-62625.0))));
}
#[test]
fn test_parse_float_between_0_and_1() {
assert_eq!(parse_db_value("0.873654"), Ok(("", Value::Number(0.873654))));
assert_eq!(parse_db_value("0.62625"), Ok(("", Value::Number(0.62625))));
}
#[test]
fn test_parse_int() {
assert_eq!(parse_db_value("5134616"), Ok(("", Value::Indexable(IndexableValue::Int(5134616)))));
}
}

View file

@ -0,0 +1,7 @@
pub(crate) mod literal;
pub(crate) mod select;
pub(crate) mod common;
pub(crate) mod create;
pub(crate) mod insert;
pub(crate) mod delete;
pub(crate) mod index;

View file

@ -0,0 +1,122 @@
use super::common::{parse_table_name, parse_column_name, parse_condition};
use minisql::operation::{ColumnSelection, Operation};
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{multispace0, multispace1, char},
combinator::map,
error::Error,
multi::separated_list0,
sequence::terminated,
IResult,
};
pub fn parse_select(input: &str) -> IResult<&str, Operation> {
let (input, _) = tag("SELECT")(input)?;
let (input, _) = multispace1(input)?;
let (input, column_selection) = try_parse_column_selection(input)?;
let (input, _) = multispace0(input)?;
let (input, _) = tag("FROM")(input)?;
let (input, _) = multispace1(input)?;
let (input, table_name) = parse_table_name(input)?;
let (input, _) = multispace0(input)?;
let (input, condition) = parse_condition(input)?;
let (input, _) = multispace0(input)?;
// TODO: make it optional?
let (input, _) = tag(";")(input)?;
Ok((
input,
Operation::Select(table_name.to_string(), column_selection, condition),
))
}
pub fn try_parse_column_selection(input: &str) -> IResult<&str, ColumnSelection> {
let all_parser = map(tag::<&str, &str, Error<&str>>("*"), |_| {
ColumnSelection::All
});
let columns_parser = map(
separated_list0(terminated(char(','), multispace0), parse_column_name),
|names| ColumnSelection::Columns(names),
);
alt((all_parser, columns_parser))(input)
}
#[cfg(test)]
mod tests {
use minisql::operation::{ColumnSelection, Operation};
use crate::parsing::{common::{parse_column_name, parse_table_name}, select::parse_select};
#[test]
fn test_parse_select_all() {
let sql = "SELECT * FROM \"MyTable\";";
let operation = parse_select(sql).expect("should parse");
match operation {
("", Operation::Select(table_name, column_selection, maybe_condition)) => {
assert_eq!(table_name, "MyTable");
assert!(matches!(column_selection, ColumnSelection::All));
assert!(matches!(maybe_condition, None));
}
(input, _) => {
println!("Input to be parsed: {}", input);
panic!("expected select operation")
}
}
}
#[test]
fn test_parse_column_name() {
parse_column_name("1abc").expect_err("variable names should not start with number");
}
#[test]
fn test_parse_table_name() {
parse_table_name("\"\"").expect_err("Empty table names are not allowed");
}
#[test]
fn test_parse_select_columns() {
let sql = "SELECT name , email FROM \"AddressBook\" ;";
let operation = parse_select(sql).expect("should parse");
match operation {
("", Operation::Select(table_name, column_selection, maybe_condition)) => {
assert_eq!(table_name, "AddressBook");
assert!(matches!(column_selection, ColumnSelection::Columns(_)));
match column_selection {
ColumnSelection::Columns(column_names) => {
assert_eq!(column_names, vec!["name", "email"]);
}
_ => {
panic!("should select columns")
}
}
assert!(matches!(maybe_condition, None));
}
(input, _) => {
println!("Input to be parsed: {}", input);
panic!("expected select operation")
}
}
}
#[test]
fn test_parse_select_where() {
use minisql::operation::Condition;
let sql = "SELECT * FROM \"AddressBook\" WHERE id = 5 ;";
let operation = parse_select(sql).expect("should parse");
match operation {
("", Operation::Select(table_name, column_selection, maybe_condition)) => {
assert_eq!(table_name, "AddressBook");
assert!(matches!(column_selection, ColumnSelection::All));
assert!(matches!(maybe_condition, Some(Condition::Eq(_, _))));
}
(input, _) => {
println!("Input to be parsed: {}", input);
panic!("expected select operation")
}
}
}
// TODO: a test with multiple statements
// TODO: allow underscores in identifiers
}