Use custom logic for lambda instead of json path (#154)

This commit is contained in:
Mike Voronov
2021-10-18 23:23:30 +03:00
committed by GitHub
parent 4251a36842
commit 1c55d34981
77 changed files with 3149 additions and 746 deletions

View File

@ -0,0 +1,36 @@
/*
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#![deny(
dead_code,
nonstandard_style,
unused_imports,
unused_mut,
unused_variables,
unused_unsafe,
unreachable_patterns
)]
mod parser;
pub use parser::parse;
pub use parser::AlgebraLexer;
pub use parser::LambdaParser;
pub use parser::LambdaParserError;
pub use parser::LexerError;
pub use air_lambda_ast::LambdaAST;
pub use air_lambda_ast::ValueAccessor;

View File

@ -0,0 +1,51 @@
/*
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::parser::lexer::LexerError;
use crate::parser::lexer::Token;
use lalrpop_util::ErrorRecovery;
use lalrpop_util::ParseError;
use thiserror::Error as ThisError;
#[derive(ThisError, Debug, Clone, PartialEq, Eq)]
pub enum LambdaParserError<'input> {
#[error(transparent)]
LexerError(#[from] LexerError),
#[error("provided lambda expression doesn't contain any algebras")]
EmptyLambda,
#[error("{0:?}")]
ParseError(ParseError<usize, Token<'input>, LexerError>),
#[error("{0:?}")]
RecoveryErrors(Vec<ErrorRecovery<usize, Token<'input>, LexerError>>),
}
impl<'input> From<ParseError<usize, Token<'input>, LexerError>> for LambdaParserError<'input> {
fn from(e: ParseError<usize, Token<'input>, LexerError>) -> Self {
Self::ParseError(e)
}
}
impl<'input> From<Vec<ErrorRecovery<usize, Token<'input>, LexerError>>>
for LambdaParserError<'input>
{
fn from(errors: Vec<ErrorRecovery<usize, Token<'input>, LexerError>>) -> Self {
Self::RecoveryErrors(errors)
}
}

View File

@ -0,0 +1,52 @@
/*
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use super::lexer::AlgebraLexer;
use super::va_lambda;
use super::LambdaParserError;
use super::LambdaParserResult;
use crate::LambdaAST;
use crate::ValueAccessor;
use va_lambda::LambdaParser;
// Caching parser to cache internal regexes, which are expensive to instantiate
// See also https://github.com/lalrpop/lalrpop/issues/269
thread_local!(static PARSER: LambdaParser = LambdaParser::new());
/// Parse AIR `source_code` to `Box<Instruction>`
pub fn parse(lambda: &str) -> LambdaParserResult<'_, LambdaAST> {
PARSER.with(|parser| {
let mut errors = Vec::new();
let lexer = AlgebraLexer::new(lambda);
let result = parser.parse(lambda, &mut errors, lexer);
match result {
Ok(algebras) if errors.is_empty() => try_to_lambda(algebras),
Ok(_) => Err(errors.into()),
Err(e) => Err(e.into()),
}
})
}
fn try_to_lambda(algebras: Vec<ValueAccessor>) -> LambdaParserResult<'_, LambdaAST> {
if algebras.is_empty() {
return Err(LambdaParserError::EmptyLambda);
}
let ast = unsafe { LambdaAST::new_unchecked(algebras) };
Ok(ast)
}

View File

@ -0,0 +1,108 @@
/*
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use super::errors::LexerError;
use super::token::Token;
use crate::parser::lexer::is_air_alphanumeric;
use std::iter::Peekable;
use std::str::CharIndices;
const ARRAY_IDX_BASE: u32 = 10;
pub type Spanned<Token, Loc, Error> = Result<(Loc, Token, Loc), Error>;
pub struct AlgebraLexer<'input> {
input: &'input str,
chars: Peekable<CharIndices<'input>>,
}
impl<'input> Iterator for AlgebraLexer<'input> {
type Item = Spanned<Token<'input>, usize, LexerError>;
fn next(&mut self) -> Option<Self::Item> {
self.next_token()
}
}
impl<'input> AlgebraLexer<'input> {
pub fn new(input: &'input str) -> Self {
Self {
input,
chars: input.char_indices().peekable(),
}
}
pub fn next_token(&mut self) -> Option<Spanned<Token<'input>, usize, LexerError>> {
self.chars.next().map(|(start_pos, ch)| match ch {
'[' => Ok((start_pos, Token::OpenSquareBracket, start_pos + 1)),
']' => Ok((start_pos, Token::CloseSquareBracket, start_pos + 1)),
'.' => Ok((start_pos, Token::Selector, start_pos + 1)),
d if d.is_digit(ARRAY_IDX_BASE) => self.tokenize_arrays_idx(start_pos),
s if is_air_alphanumeric(s) => self.tokenize_field_name(start_pos),
'!' => Ok((start_pos, Token::FlatteningSign, start_pos + 1)),
_ => Err(LexerError::UnexpectedSymbol(start_pos, start_pos + 1)),
})
}
fn tokenize_arrays_idx(
&mut self,
start_pos: usize,
) -> Spanned<Token<'input>, usize, LexerError> {
let array_idx = self.tokenize_until(start_pos, |ch| ch.is_digit(ARRAY_IDX_BASE));
match array_idx
.parse::<u32>()
.map_err(|e| LexerError::ParseIntError(start_pos, start_pos + array_idx.len(), e))
{
Ok(idx) => Ok((start_pos, Token::ArrayIdx(idx), start_pos + array_idx.len())),
Err(e) => Err(e),
}
}
fn tokenize_field_name(
&mut self,
start_pos: usize,
) -> Spanned<Token<'input>, usize, LexerError> {
let field_name = self.tokenize_until(start_pos, is_air_alphanumeric);
Ok((
start_pos,
Token::FieldName(field_name),
start_pos + field_name.len(),
))
}
fn tokenize_until(
&mut self,
start_pos: usize,
condition: impl Fn(char) -> bool,
) -> &'input str {
let mut end_pos = start_pos;
while let Some((pos, ch)) = self.chars.peek() {
if !condition(*ch) {
break;
}
end_pos = *pos;
self.chars.next();
}
&self.input[start_pos..end_pos + 1]
}
}

View File

@ -0,0 +1,28 @@
/*
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use thiserror::Error as ThisError;
use std::num::ParseIntError;
#[derive(ThisError, Debug, Clone, PartialEq, Eq)]
pub enum LexerError {
#[error("unexpected symbol for value algebra")]
UnexpectedSymbol(usize, usize),
#[error("{2}")]
ParseIntError(usize, usize, #[source] ParseIntError),
}

View File

@ -0,0 +1,29 @@
/*
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
mod algebra_lexer;
mod errors;
mod token;
mod utils;
#[cfg(test)]
mod tests;
pub use algebra_lexer::AlgebraLexer;
pub use errors::LexerError;
pub use token::Token;
pub(self) use utils::is_air_alphanumeric;

View File

@ -0,0 +1,52 @@
/*
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use super::algebra_lexer::Spanned;
use super::AlgebraLexer;
use super::LexerError;
use super::Token;
fn run_lexer(input: &str) -> Vec<Spanned<Token<'_>, usize, LexerError>> {
let lexer = AlgebraLexer::new(input);
lexer.collect()
}
#[test]
fn array_access() {
let array_access: &str = ".[0]";
let actual = run_lexer(array_access);
let expected = vec![
Spanned::Ok((0, Token::Selector, 1)),
Spanned::Ok((1, Token::OpenSquareBracket, 2)),
Spanned::Ok((2, Token::ArrayIdx(0), 3)),
Spanned::Ok((3, Token::CloseSquareBracket, 4)),
];
assert_eq!(actual, expected);
}
#[test]
fn field_access() {
let field_name = "some_field_name";
let field_access = format!(".{}", field_name);
let actual = run_lexer(&field_access);
let expected = vec![
Spanned::Ok((0, Token::Selector, 1)),
Spanned::Ok((1, Token::FieldName(field_name), 1 + field_name.len())),
];
assert_eq!(actual, expected);
}

View File

@ -0,0 +1,33 @@
/*
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use serde::Deserialize;
use serde::Serialize;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum Token<'input> {
// .
Selector,
OpenSquareBracket,
CloseSquareBracket,
ArrayIdx(u32),
FieldName(&'input str),
// !
FlatteningSign,
}

View File

@ -0,0 +1,20 @@
/*
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// TODO: decouple it to a separate crate
pub(super) fn is_air_alphanumeric(ch: char) -> bool {
ch.is_alphanumeric() || ch == '_' || ch == '-'
}

View File

@ -0,0 +1,42 @@
/*
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
pub mod lambda_parser;
mod lexer;
// air is auto-generated, so exclude it from `cargo fmt -- --check` and `cargo clippy`
#[rustfmt::skip]
#[allow(clippy::all)]
mod va_lambda;
mod errors;
#[cfg(test)]
pub mod tests;
pub type LambdaParserResult<'input, T> = std::result::Result<T, LambdaParserError<'input>>;
pub use errors::LambdaParserError;
pub use lambda_parser::parse;
pub use lexer::AlgebraLexer;
pub use lexer::LexerError;
pub use va_lambda::LambdaParser;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Span {
pub left: usize,
pub right: usize,
}

View File

@ -0,0 +1,134 @@
/*
* Copyright 2021 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
use crate::parser::LambdaParser;
use crate::ValueAccessor;
thread_local!(static TEST_PARSER: LambdaParser = LambdaParser::new());
fn parse(source_code: &str) -> Vec<ValueAccessor<'_>> {
TEST_PARSER.with(|parser| {
let mut errors = Vec::new();
let lexer = crate::parser::AlgebraLexer::new(source_code);
parser
.parse(source_code, &mut errors, lexer)
.expect("parsing should be successful")
})
}
#[test]
fn field_access() {
let field_name = "some_field_name";
let lambda = format!(".{}", field_name);
let actual = parse(&lambda);
let expected = vec![ValueAccessor::FieldAccess { field_name }];
assert_eq!(actual, expected);
}
#[test]
fn field_access_with_flattening() {
let field_name = "some_field_name";
let lambda = format!(".{}!", field_name);
let actual = parse(&lambda);
let expected = vec![ValueAccessor::FieldAccess { field_name }];
assert_eq!(actual, expected);
}
#[test]
fn array_access() {
let idx = 0;
let lambda = format!(".[{}]", idx);
let actual = parse(&lambda);
let expected = vec![ValueAccessor::ArrayAccess { idx }];
assert_eq!(actual, expected);
}
#[test]
fn array_access_with_flattening() {
let idx = 0;
let lambda = format!(".[{}]!", idx);
let actual = parse(&lambda);
let expected = vec![ValueAccessor::ArrayAccess { idx }];
assert_eq!(actual, expected);
}
#[test]
fn field_array_access() {
let field_name = "some_field_name";
let idx = 1;
let lambda = format!(".{}.[{}]", field_name, idx);
let actual = parse(&lambda);
let expected = vec![
ValueAccessor::FieldAccess { field_name },
ValueAccessor::ArrayAccess { idx },
];
assert_eq!(actual, expected);
}
#[test]
fn field_array_access_without_dot() {
let field_name = "some_field_name";
let idx = 1;
let lambda = format!(".{}[{}]", field_name, idx);
let actual = parse(&lambda);
let expected = vec![
ValueAccessor::FieldAccess { field_name },
ValueAccessor::ArrayAccess { idx },
];
assert_eq!(actual, expected);
}
#[test]
fn array_field_access() {
let field_name = "some_field_name";
let idx = 1;
let lambda = format!(".[{}].{}", idx, field_name);
let actual = parse(&lambda);
let expected = vec![
ValueAccessor::ArrayAccess { idx },
ValueAccessor::FieldAccess { field_name },
];
assert_eq!(actual, expected);
}
#[test]
fn many_array_field_access() {
let field_name_1 = "some_field_name_1";
let field_name_2 = "some_field_name_2";
let idx_1 = 1;
let idx_2 = u32::MAX;
let lambda = format!(".[{}].{}.[{}].{}", idx_1, field_name_1, idx_2, field_name_2);
let actual = parse(&lambda);
let expected = vec![
ValueAccessor::ArrayAccess { idx: idx_1 },
ValueAccessor::FieldAccess {
field_name: field_name_1,
},
ValueAccessor::ArrayAccess { idx: idx_2 },
ValueAccessor::FieldAccess {
field_name: field_name_2,
},
];
assert_eq!(actual, expected);
}

View File

@ -0,0 +1,39 @@
use crate::ValueAlgebra;
use crate::parser::lexer::LexerError;
use crate::parser::lexer::Token;
use lalrpop_util::ErrorRecovery;
// the only thing why input matters here is just introducing lifetime for Token
grammar<'err, 'input>(input: &'input str, errors: &'err mut Vec<ErrorRecovery<usize, Token<'input>, LexerError>>);
pub Lambda: Vec<ValueAlgebra<'input>> = <ValueAlgebra*> => <>;
ValueAlgebra: ValueAlgebra<'input> = {
<maybe_dot_selector:"."?> "[" <idx: array_idx> "]" <maybe_flatten_sign:"!"?> => {
ValueAlgebra::ArrayAccess { idx }
},
"." <field_name: field_name> <maybe_flatten_sign:"!"?> => {
ValueAlgebra::FieldAccess { field_name }
},
! => { errors.push(<>); ValueAlgebra::Error },
}
extern {
type Location = usize;
type Error = LexerError;
enum Token<'input> {
"." => Token::Selector,
"[" => Token::OpenSquareBracket,
"]" => Token::CloseSquareBracket,
array_idx => Token::ArrayIdx(<u32>),
field_name => Token::FieldName(<&'input str>),
"!" => Token::FlatteningSign,
}
}

File diff suppressed because it is too large Load Diff