Use a patched wson JSON parser

Instead of nom's sample JSON parser.
This commit is contained in:
Ivan Boldyrev 2022-11-22 14:37:59 +04:00
parent 4e92f59cc1
commit af169abb4e
5 changed files with 12 additions and 231 deletions

9
Cargo.lock generated
View File

@ -203,6 +203,7 @@ dependencies = [
"pretty_assertions 1.3.0",
"serde_json",
"strum",
"wson",
]
[[package]]
@ -3147,6 +3148,14 @@ version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
[[package]]
name = "wson"
version = "0.1.1"
source = "git+https://github.com/monoid/wson/?branch=fluence-edition#e8e1f969efaa40abae2138d8dc12c833c0a44a7e"
dependencies = [
"nom 7.1.1",
]
[[package]]
name = "yansi"
version = "0.5.1"

View File

@ -22,6 +22,7 @@ strum = { version="0.24.1", features=["derive"] }
nom = "7.1.1"
nom_locate = "4.0.0"
serde_json = "1.0.86"
wson = { git = "https://github.com/monoid/wson/", branch = "fluence-edition" }
[dev-dependencies]
maplit = "1.0.2"

View File

@ -1,228 +0,0 @@
/*
* Copyright 2022 Fluence Labs Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*
* Based on the JSON parser from nom examples:
*
* Copyright (c) 2014-2019 Geoffroy Couprie
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
use nom::{
branch::alt,
bytes::complete::{escaped, tag, take_while},
character::complete::{char, one_of, satisfy},
combinator::{cut, map, opt, value},
error::{context, ContextError, ParseError},
multi::separated_list0,
number::complete::double,
sequence::{delimited, preceded, separated_pair, terminated},
IResult,
};
use std::collections::HashMap;
use std::str;
#[derive(Debug, PartialEq)]
pub enum JsonValue {
Null,
Str(String),
Boolean(bool),
Num(f64),
Array(Vec<JsonValue>),
Object(HashMap<String, JsonValue>),
}
/// parser combinators are constructed from the bottom up:
/// first we write parsers for the smallest elements (here a space character),
/// then we'll combine them in larger parsers
fn sp<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
let chars = " \t\r\n";
// nom combinators like `take_while` return a function. That function is the
// parser,to which we can pass the input
take_while(move |c| chars.contains(c))(i)
}
/// A nom parser has the following signature:
/// `Input -> IResult<Input, Output, Error>`, with `IResult` defined as:
/// `type IResult<I, O, E = (I, ErrorKind)> = Result<(I, O), Err<E>>;`
///
/// most of the times you can ignore the error type and use the default (but this
/// examples shows custom error types later on!)
///
/// Here we use `&str` as input type, but nom parsers can be generic over
/// the input type, and work directly with `&[u8]` or any other type that
/// implements the required traits.
///
/// Finally, we can see here that the input and output type are both `&str`
/// with the same lifetime tag. This means that the produced value is a subslice
/// of the input data. and there is no allocation needed. This is the main idea
/// behind nom's performance.
fn parse_str<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, &'a str, E> {
escaped(
satisfy(|c| ('\u{20}'..='\u{10FFFF}').contains(&c) && (c != '\\') && (c != '"')),
'\\',
one_of("\"n\\"),
)(i)
}
/// `tag(string)` generates a parser that recognizes the argument string.
///
/// we can combine it with other functions, like `value` that takes another
/// parser, and if that parser returns without an error, returns a given
/// constant value.
///
/// `alt` is another combinator that tries multiple parsers one by one, until
/// one of them succeeds
fn boolean<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, bool, E> {
// This is a parser that returns `true` if it sees the string "true", and
// an error otherwise
let parse_true = value(true, tag("true"));
// This is a parser that returns `false` if it sees the string "false", and
// an error otherwise
let parse_false = value(false, tag("false"));
// `alt` combines the two parsers. It returns the result of the first
// successful parser, or an error
alt((parse_true, parse_false))(input)
}
fn null<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, (), E> {
value((), tag("null"))(input)
}
/// this parser combines the previous `parse_str` parser, that recognizes the
/// interior of a string, with a parse to recognize the double quote character,
/// before the string (using `preceded`) and after the string (using `terminated`).
///
/// `context` and `cut` are related to error management:
/// - `cut` transforms an `Err::Error(e)` in `Err::Failure(e)`, signaling to
/// combinators like `alt` that they should not try other parsers. We were in the
/// right branch (since we found the `"` character) but encountered an error when
/// parsing the string
/// - `context` lets you add a static string to provide more information in the
/// error chain (to indicate which parser had an error)
fn string<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, &'a str, E> {
context(
"string",
preceded(char('\"'), cut(terminated(parse_str, char('\"')))),
)(i)
}
/// some combinators, like `separated_list0` or `many0`, will call a parser repeatedly,
/// accumulating results in a `Vec`, until it encounters an error.
/// If you want more control on the parser application, check out the `iterator`
/// combinator (cf `examples/iterator.rs`)
fn array<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, Vec<JsonValue>, E> {
context(
"array",
preceded(
char('['),
cut(terminated(
separated_list0(preceded(sp, char(',')), json_value),
preceded(sp, char(']')),
)),
),
)(i)
}
fn key_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, (&'a str, JsonValue), E> {
separated_pair(
preceded(sp, string),
cut(preceded(sp, char(':'))),
json_value,
)(i)
}
pub fn hash<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, HashMap<String, JsonValue>, E> {
context(
"map",
preceded(
char('{'),
cut(terminated(
map(
separated_list0(preceded(sp, char(',')), key_value),
|tuple_vec| {
tuple_vec
.into_iter()
.map(|(k, v)| (String::from(k), v))
.collect()
},
),
preceded(sp, char('}')),
)),
),
)(i)
}
/// here, we apply the space parser before trying to parse a value
pub fn json_value<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, JsonValue, E> {
preceded(
sp,
alt((
map(hash, JsonValue::Object),
map(array, JsonValue::Array),
map(string, |s| JsonValue::Str(String::from(s))),
map(double, JsonValue::Num),
map(boolean, JsonValue::Boolean),
map(null, |_| JsonValue::Null),
)),
)(i)
}
/// the root element of a JSON parser is either an object or an array
#[allow(dead_code)]
pub fn root<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
i: &'a str,
) -> IResult<&'a str, JsonValue, E> {
delimited(
sp,
alt((
map(hash, JsonValue::Object),
map(array, JsonValue::Array),
map(null, |_| JsonValue::Null),
)),
opt(sp),
)(i)
}

View File

@ -14,7 +14,6 @@
* limitations under the License.
*/
mod json;
pub(crate) mod parser;
use crate::services::JValue;

View File

@ -49,13 +49,13 @@ pub fn parse_kw(inp: &str) -> IResult<&str, ServiceDefinition, ParseError> {
let json_value = || {
cut(context(
"result value has to be a valid JSON",
recognize(super::json::json_value),
recognize(wson::value_parser),
))
};
let json_map = || {
cut(context(
"result value has to be a valid JSON hash",
recognize(super::json::hash),
recognize(wson::object),
))
};