Introduce length functor (#314)

This commit is contained in:
Mike Voronov
2022-09-08 16:58:04 +03:00
committed by GitHub
parent 626796b299
commit a4011ef038
56 changed files with 1411 additions and 683 deletions

View File

@ -28,10 +28,10 @@
mod parser;
pub use parser::parse;
pub use parser::AccessorsLexer;
pub use parser::LambdaParser;
pub use parser::LambdaASTLexer;
pub use parser::LambdaParserError;
pub use parser::LexerError;
pub use air_lambda_ast::Functor;
pub use air_lambda_ast::LambdaAST;
pub use air_lambda_ast::ValueAccessor;

View File

@ -26,8 +26,8 @@ pub enum LambdaParserError<'input> {
#[error(transparent)]
LexerError(#[from] LexerError),
#[error("provided lambda expression doesn't contain any accessor")]
EmptyLambda,
#[error(transparent)]
LambdaError(#[from] IncorrectLambdaError),
#[error("{0:?}")]
ParseError(ParseError<usize, Token<'input>, LexerError>),
@ -36,6 +36,17 @@ pub enum LambdaParserError<'input> {
RecoveryErrors(Vec<ErrorRecovery<usize, Token<'input>, LexerError>>),
}
#[derive(ThisError, Debug, Clone, PartialEq, Eq)]
pub enum IncorrectLambdaError {
#[error("provided lambda expression doesn't contain any accessor")]
EmptyLambda,
#[error(
"normally, this error shouldn't occur, it's an internal error of a parser implementation"
)]
InternalError,
}
impl<'input> From<ParseError<usize, Token<'input>, LexerError>> for LambdaParserError<'input> {
fn from(e: ParseError<usize, Token<'input>, LexerError>) -> Self {
Self::ParseError(e)

View File

@ -14,36 +14,54 @@
* limitations under the License.
*/
use super::lexer::AccessorsLexer;
use super::va_lambda;
use super::LambdaParserError;
use super::lexer::LambdaASTLexer;
use super::LambdaParserResult;
use crate::parser::errors::IncorrectLambdaError;
use crate::parser::va_lambda::RawLambdaASTParser;
use crate::Functor;
use crate::LambdaAST;
use crate::ValueAccessor;
use va_lambda::LambdaParser;
use std::convert::TryFrom;
use std::convert::TryInto;
// Caching parser to cache internal regexes, which are expensive to instantiate
// See also https://github.com/lalrpop/lalrpop/issues/269
thread_local!(static PARSER: LambdaParser = LambdaParser::new());
thread_local!(static PARSER: RawLambdaASTParser = RawLambdaASTParser::new());
/// Parse AIR `source_code` to `Box<Instruction>`
/// Parse AIR lambda ast to `LambdaAST`
pub fn parse(lambda: &str) -> LambdaParserResult<'_, LambdaAST> {
PARSER.with(|parser| {
let mut errors = Vec::new();
let lexer = AccessorsLexer::new(lambda);
let lexer = LambdaASTLexer::new(lambda);
let result = parser.parse(lambda, &mut errors, lexer);
match result {
Ok(accessors) if errors.is_empty() => try_to_lambda(accessors),
Ok(lambda_ast) if errors.is_empty() => lambda_ast.try_into().map_err(Into::into),
Ok(_) => Err(errors.into()),
Err(e) => Err(e.into()),
}
})
}
fn try_to_lambda(accessors: Vec<ValueAccessor>) -> LambdaParserResult<'_, LambdaAST> {
LambdaAST::try_from(accessors).or(Err(LambdaParserError::EmptyLambda))
impl<'input> TryFrom<RawLambdaAST<'input>> for LambdaAST<'input> {
type Error = IncorrectLambdaError;
fn try_from(raw_lambda_ast: RawLambdaAST<'input>) -> Result<Self, Self::Error> {
match raw_lambda_ast {
RawLambdaAST::ValuePath(accessors) => {
LambdaAST::try_from_accessors(accessors).or(Err(IncorrectLambdaError::EmptyLambda))
}
RawLambdaAST::Functor(functor) => Ok(LambdaAST::from_functor(functor)),
RawLambdaAST::Error => Err(IncorrectLambdaError::InternalError),
}
}
}
#[derive(Debug, PartialEq, Eq, Clone)]
pub(crate) enum RawLambdaAST<'input> {
Functor(Functor),
ValuePath(Vec<ValueAccessor<'input>>),
// needed to allow parser catch all errors from a lambda expression without stopping on the very first one.
Error,
}

View File

@ -16,21 +16,24 @@
use super::errors::LexerError;
use super::token::Token;
use crate::parser::lexer::is_air_alphanumeric;
use std::iter::Peekable;
use std::str::CharIndices;
const ARRAY_IDX_BASE: u32 = 10;
const LENGTH_FUNCTOR: &str = ".length";
const VALUE_PATH_STARTER: &str = ".$";
pub type Spanned<Token, Loc, Error> = Result<(Loc, Token, Loc), Error>;
pub struct AccessorsLexer<'input> {
pub struct LambdaASTLexer<'input> {
input: &'input str,
chars: Peekable<CharIndices<'input>>,
is_first_token: bool,
}
impl<'input> Iterator for AccessorsLexer<'input> {
impl<'input> Iterator for LambdaASTLexer<'input> {
type Item = Spanned<Token<'input>, usize, LexerError>;
fn next(&mut self) -> Option<Self::Item> {
@ -38,20 +41,30 @@ impl<'input> Iterator for AccessorsLexer<'input> {
}
}
impl<'input> AccessorsLexer<'input> {
impl<'input> LambdaASTLexer<'input> {
pub fn new(input: &'input str) -> Self {
Self {
input,
chars: input.char_indices().peekable(),
is_first_token: true,
}
}
pub fn next_token(&mut self) -> Option<Spanned<Token<'input>, usize, LexerError>> {
if self.input.is_empty() {
return None;
}
if self.is_first_token {
self.is_first_token = false;
return Some(self.try_parse_first_token());
}
self.chars.next().map(|(start_pos, ch)| match ch {
'[' => Ok((start_pos, Token::OpenSquareBracket, start_pos + 1)),
']' => Ok((start_pos, Token::CloseSquareBracket, start_pos + 1)),
'.' => Ok((start_pos, Token::Selector, start_pos + 1)),
'.' => Ok((start_pos, Token::ValuePathSelector, start_pos + 1)),
d if d.is_digit(ARRAY_IDX_BASE) => self.tokenize_arrays_idx(start_pos),
s if is_air_alphanumeric(s) => self.tokenize_field_name(start_pos),
@ -109,4 +122,24 @@ impl<'input> AccessorsLexer<'input> {
&self.input[start_pos..end_pos + 1]
}
fn try_parse_first_token(&mut self) -> Spanned<Token<'input>, usize, LexerError> {
let (token, token_size) = if self.input == LENGTH_FUNCTOR {
(Token::LengthFunctor, LENGTH_FUNCTOR.len())
} else if self.input.starts_with(VALUE_PATH_STARTER) {
(Token::ValuePathStarter, VALUE_PATH_STARTER.len())
} else {
return Err(LexerError::UnexpectedSymbol(0, self.input.len()));
};
self.advance_by(token_size);
Ok((0, token, token_size))
}
fn advance_by(&mut self, advance_size: usize) {
// advance_by is unstable
for _ in 0..advance_size {
self.chars.next();
}
}
}

View File

@ -14,16 +14,16 @@
* limitations under the License.
*/
mod accessors_lexer;
mod errors;
mod lambda_ast_lexer;
mod token;
mod utils;
#[cfg(test)]
mod tests;
pub use accessors_lexer::AccessorsLexer;
pub use errors::LexerError;
pub use lambda_ast_lexer::LambdaASTLexer;
pub use token::Token;
pub(self) use utils::is_air_alphanumeric;

View File

@ -14,26 +14,27 @@
* limitations under the License.
*/
use super::accessors_lexer::Spanned;
use super::AccessorsLexer;
use super::lambda_ast_lexer::Spanned;
use super::LambdaASTLexer;
use super::LexerError;
use super::Token;
fn run_lexer(input: &str) -> Vec<Spanned<Token<'_>, usize, LexerError>> {
let lexer = AccessorsLexer::new(input);
let lexer = LambdaASTLexer::new(input);
lexer.collect()
}
#[test]
fn array_access() {
let array_access: &str = ".[0]";
let array_access: &str = ".$.[0]";
let actual = run_lexer(array_access);
let expected = vec![
Spanned::Ok((0, Token::Selector, 1)),
Spanned::Ok((1, Token::OpenSquareBracket, 2)),
Spanned::Ok((2, Token::NumberAccessor(0), 3)),
Spanned::Ok((3, Token::CloseSquareBracket, 4)),
Spanned::Ok((0, Token::ValuePathStarter, 2)),
Spanned::Ok((2, Token::ValuePathSelector, 3)),
Spanned::Ok((3, Token::OpenSquareBracket, 4)),
Spanned::Ok((4, Token::NumberAccessor(0), 5)),
Spanned::Ok((5, Token::CloseSquareBracket, 6)),
];
assert_eq!(actual, expected);
}
@ -41,12 +42,13 @@ fn array_access() {
#[test]
fn field_access() {
let field_name = "some_field_name";
let field_access = format!(".{}", field_name);
let field_access = format!(".$.{}", field_name);
let actual = run_lexer(&field_access);
let expected = vec![
Spanned::Ok((0, Token::Selector, 1)),
Spanned::Ok((1, Token::StringAccessor(field_name), 1 + field_name.len())),
Spanned::Ok((0, Token::ValuePathStarter, 2)),
Spanned::Ok((2, Token::ValuePathSelector, 3)),
Spanned::Ok((3, Token::StringAccessor(field_name), 3 + field_name.len())),
];
assert_eq!(actual, expected);
}

View File

@ -19,8 +19,12 @@ use serde::Serialize;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum Token<'input> {
LengthFunctor,
//.$
ValuePathStarter,
// .
Selector,
ValuePathSelector,
OpenSquareBracket,
CloseSquareBracket,

View File

@ -31,9 +31,8 @@ pub type LambdaParserResult<'input, T> = std::result::Result<T, LambdaParserErro
pub use errors::LambdaParserError;
pub use lambda_parser::parse;
pub use lexer::AccessorsLexer;
pub use lexer::LambdaASTLexer;
pub use lexer::LexerError;
pub use va_lambda::LambdaParser;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Span {

View File

@ -14,27 +14,45 @@
* limitations under the License.
*/
use crate::parser::LambdaParser;
use crate::parser::lambda_parser::RawLambdaAST;
use crate::parser::va_lambda::RawLambdaASTParser;
use crate::ValueAccessor;
use air_lambda_ast::Functor;
thread_local!(static TEST_PARSER: LambdaParser = LambdaParser::new());
thread_local!(static TEST_PARSER: RawLambdaASTParser = RawLambdaASTParser::new());
fn parse(source_code: &str) -> Vec<ValueAccessor<'_>> {
fn parse(source_code: &str) -> RawLambdaAST<'_> {
TEST_PARSER.with(|parser| {
let mut errors = Vec::new();
let lexer = crate::parser::AccessorsLexer::new(source_code);
let lexer = crate::parser::LambdaASTLexer::new(source_code);
parser
.parse(source_code, &mut errors, lexer)
.expect("parsing should be successful")
})
}
fn parse_to_accessors(source_code: &str) -> Vec<ValueAccessor<'_>> {
let lambda_ast = parse(source_code);
match lambda_ast {
RawLambdaAST::ValuePath(accessors) => accessors,
_ => panic!("it should be a value path"),
}
}
fn parse_to_functor(source_code: &str) -> Functor {
let lambda_ast = parse(source_code);
match lambda_ast {
RawLambdaAST::Functor(functor) => functor,
_ => panic!("it should be a functor"),
}
}
#[test]
fn field_access() {
let field_name = "some_field_name";
let lambda = format!(".{}", field_name);
let lambda = format!(".$.{}", field_name);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![ValueAccessor::FieldAccessByName { field_name }];
assert_eq!(actual, expected);
}
@ -42,9 +60,9 @@ fn field_access() {
#[test]
fn field_access_with_flattening() {
let field_name = "some_field_name";
let lambda = format!(".{}!", field_name);
let lambda = format!(".$.{}!", field_name);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![ValueAccessor::FieldAccessByName { field_name }];
assert_eq!(actual, expected);
}
@ -52,9 +70,9 @@ fn field_access_with_flattening() {
#[test]
fn array_access() {
let idx = 0;
let lambda = format!(".[{}]", idx);
let lambda = format!(".$.[{}]", idx);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![ValueAccessor::ArrayAccess { idx }];
assert_eq!(actual, expected);
}
@ -62,9 +80,9 @@ fn array_access() {
#[test]
fn array_access_with_flattening() {
let idx = 0;
let lambda = format!(".[{}]!", idx);
let lambda = format!(".$.[{}]!", idx);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![ValueAccessor::ArrayAccess { idx }];
assert_eq!(actual, expected);
}
@ -72,9 +90,9 @@ fn array_access_with_flattening() {
#[test]
fn scalar_access() {
let scalar_name = "some_field_name";
let lambda = format!(".[{}]", scalar_name);
let lambda = format!(".$.[{}]", scalar_name);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![ValueAccessor::FieldAccessByScalar { scalar_name }];
assert_eq!(actual, expected);
}
@ -82,9 +100,9 @@ fn scalar_access() {
#[test]
fn scalar_access_with_flattening() {
let scalar_name = "some_scalar_name";
let lambda = format!(".[{}]!", scalar_name);
let lambda = format!(".$.[{}]!", scalar_name);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![ValueAccessor::FieldAccessByScalar { scalar_name }];
assert_eq!(actual, expected);
}
@ -93,9 +111,9 @@ fn scalar_access_with_flattening() {
fn field_array_access() {
let field_name = "some_field_name";
let idx = 1;
let lambda = format!(".{}.[{}]", field_name, idx);
let lambda = format!(".$.{}.[{}]", field_name, idx);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![
ValueAccessor::FieldAccessByName { field_name },
ValueAccessor::ArrayAccess { idx },
@ -107,9 +125,9 @@ fn field_array_access() {
fn field_scalar_access() {
let field_name = "some_field_name";
let scalar_name = "some_scalar_name";
let lambda = format!(".{}.[{}]", field_name, scalar_name);
let lambda = format!(".$.{}.[{}]", field_name, scalar_name);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![
ValueAccessor::FieldAccessByName { field_name },
ValueAccessor::FieldAccessByScalar { scalar_name },
@ -121,9 +139,9 @@ fn field_scalar_access() {
fn scalar_array_access() {
let scalar_name = "some_scalar_name";
let idx = 1;
let lambda = format!(".[{}].[{}]", scalar_name, idx);
let lambda = format!(".$.[{}].[{}]", scalar_name, idx);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![
ValueAccessor::FieldAccessByScalar { scalar_name },
ValueAccessor::ArrayAccess { idx },
@ -135,9 +153,9 @@ fn scalar_array_access() {
fn field_array_access_without_dot() {
let field_name = "some_field_name";
let idx = 1;
let lambda = format!(".{}[{}]", field_name, idx);
let lambda = format!(".$.{}[{}]", field_name, idx);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![
ValueAccessor::FieldAccessByName { field_name },
ValueAccessor::ArrayAccess { idx },
@ -149,9 +167,9 @@ fn field_array_access_without_dot() {
fn array_field_access() {
let field_name = "some_field_name";
let idx = 1;
let lambda = format!(".[{}].{}", idx, field_name);
let lambda = format!(".$.[{}].{}", idx, field_name);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![
ValueAccessor::ArrayAccess { idx },
ValueAccessor::FieldAccessByName { field_name },
@ -163,9 +181,9 @@ fn array_field_access() {
fn array_scalar_access() {
let scalar_name = "some_scalar_name";
let idx = 1;
let lambda = format!(".[{}].[{}]", idx, scalar_name);
let lambda = format!(".$.[{}].[{}]", idx, scalar_name);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![
ValueAccessor::ArrayAccess { idx },
ValueAccessor::FieldAccessByScalar { scalar_name },
@ -179,9 +197,12 @@ fn many_array_field_access() {
let field_name_2 = "some_field_name_2";
let idx_1 = 1;
let idx_2 = u32::MAX;
let lambda = format!(".[{}].{}.[{}].{}", idx_1, field_name_1, idx_2, field_name_2);
let lambda = format!(
".$.[{}].{}.[{}].{}",
idx_1, field_name_1, idx_2, field_name_2
);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![
ValueAccessor::ArrayAccess { idx: idx_1 },
ValueAccessor::FieldAccessByName {
@ -204,11 +225,11 @@ fn many_array_field_scalar_access() {
let scalar_name_1 = "some_scalar_name_1";
let scalar_name_2 = "some_scalar_name_2";
let lambda = format!(
".[{}].[{}].{}.[{}].[{}].{}",
".$.[{}].[{}].{}.[{}].[{}].{}",
idx_1, scalar_name_1, field_name_1, idx_2, scalar_name_2, field_name_2
);
let actual = parse(&lambda);
let actual = parse_to_accessors(&lambda);
let expected = vec![
ValueAccessor::ArrayAccess { idx: idx_1 },
ValueAccessor::FieldAccessByScalar {
@ -227,3 +248,25 @@ fn many_array_field_scalar_access() {
];
assert_eq!(actual, expected);
}
#[test]
fn parse_length_functor() {
let lambda = ".length";
let actual = parse_to_functor(&lambda);
let expected = Functor::Length;
assert_eq!(actual, expected);
}
#[test]
fn parse_length_functor_with_following_accessors() {
let lambda = ".length.[0]";
let actual = TEST_PARSER.with(|parser| {
let mut errors = Vec::new();
let lexer = crate::parser::LambdaASTLexer::new(lambda);
parser.parse(lambda, &mut errors, lexer)
});
assert!(matches!(actual, Err(lalrpop_util::ParseError::User { .. })))
}

View File

@ -1,4 +1,6 @@
use crate::ValueAccessor;
use crate::parser::lambda_parser::RawLambdaAST;
use crate::Functor;
use crate::parser::lexer::LexerError;
use crate::parser::lexer::Token;
@ -7,7 +9,13 @@ use lalrpop_util::ErrorRecovery;
// the only thing why input matters here is just introducing lifetime for Token
grammar<'err, 'input>(input: &'input str, errors: &'err mut Vec<ErrorRecovery<usize, Token<'input>, LexerError>>);
pub Lambda: Vec<ValueAccessor<'input>> = <ValueAccessor*> => <>;
pub(crate) RawLambdaAST: RawLambdaAST<'input> = {
<value_path_starter: ".$"> <accessors: ValueAccessor*> => RawLambdaAST::ValuePath(accessors),
length_functor => RawLambdaAST::Functor(Functor::Length),
! => { errors.push(<>); RawLambdaAST::Error },
}
ValueAccessor: ValueAccessor<'input> = {
<maybe_dot_selector:"."?> "[" <idx: number_accessor> "]" <maybe_flatten_sign:"!"?> => {
@ -30,7 +38,8 @@ extern {
type Error = LexerError;
enum Token<'input> {
"." => Token::Selector,
".$" => Token::ValuePathStarter,
"." => Token::ValuePathSelector,
"[" => Token::OpenSquareBracket,
"]" => Token::CloseSquareBracket,
@ -39,5 +48,7 @@ extern {
string_accessor => Token::StringAccessor(<&'input str>),
"!" => Token::FlatteningSign,
length_functor => Token::LengthFunctor,
}
}

File diff suppressed because it is too large Load Diff