filter, collector 정리

This commit is contained in:
freestrings 2020-03-23 22:50:08 +09:00
parent 7991bba51d
commit c99879cfbb
2 changed files with 340 additions and 288 deletions

View File

@ -61,20 +61,295 @@ impl fmt::Display for JsonPathError {
}
}
#[derive(Debug, Default)]
#[derive(Debug)]
struct FilterTerms<'a>(Vec<Option<ExprTerm<'a>>>);
impl<'a> FilterTerms<'a> {
fn new_filter_context(&mut self) {
self.0.push(None);
debug!("new_filter_context: {:?}", self.0);
}
fn is_term_empty(&self) -> bool {
self.0.is_empty()
}
fn push_term(&mut self, term: Option<ExprTerm<'a>>) {
self.0.push(term);
}
fn pop_term(&mut self) -> Option<Option<ExprTerm<'a>>> {
self.0.pop()
}
fn filter_json_term<F: Fn(&Vec<&'a Value>, &mut Vec<&'a Value>, &mut HashSet<usize>) -> FilterKey>(
&mut self,
e: ExprTerm<'a>,
fun: F,
) {
debug!("filter_json_term: {:?}", e);
if let ExprTerm::Json(rel, fk, vec) = e {
let mut tmp = Vec::new();
let mut not_matched = HashSet::new();
let filter_key = if let Some(FilterKey::String(key)) = fk {
let key_contained = &vec.iter().map(|v| match v {
Value::Object(map) if map.contains_key(&key) => map.get(&key).unwrap(),
_ => v,
}).collect();
fun(key_contained, &mut tmp, &mut not_matched)
} else {
fun(&vec, &mut tmp, &mut not_matched)
};
if rel.is_some() {
self.0.push(Some(ExprTerm::Json(rel, Some(filter_key), tmp)));
} else {
let filtered: Vec<&Value> = vec.iter().enumerate()
.filter(
|(idx, _)| !not_matched.contains(idx)
)
.map(|(_, v)| *v)
.collect();
self.0.push(Some(ExprTerm::Json(Some(filtered), Some(filter_key), tmp)));
}
} else {
unreachable!("unexpected: ExprTerm: {:?}", e);
}
}
fn push_json_term<F: Fn(&Vec<&'a Value>, &mut Vec<&'a Value>, &mut HashSet<usize>) -> FilterKey>(
&mut self,
current: &Option<Vec<&'a Value>>,
fun: F,
) {
debug!("push_json_term: {:?}", &current);
if let Some(current) = &current {
let mut tmp = Vec::new();
let mut not_matched = HashSet::new();
let filter_key = fun(current, &mut tmp, &mut not_matched);
self.0.push(Some(ExprTerm::Json(None, Some(filter_key), tmp)));
}
}
fn filter<F: Fn(&Vec<&'a Value>, &mut Vec<&'a Value>, &mut HashSet<usize>) -> FilterKey>(
&mut self,
current: &Option<Vec<&'a Value>>,
fun: F,
) {
if let Some(peek) = self.0.pop() {
if let Some(e) = peek {
self.filter_json_term(e, fun);
} else {
self.push_json_term(current, fun);
}
}
}
fn filter_all_with_str(&mut self, current: &Option<Vec<&'a Value>>, key: &str) {
self.filter(current, |vec, tmp, _| {
ValueWalker::all_with_str(&vec, tmp, key, true);
FilterKey::All
});
debug!("filter_all_with_str : {}, {:?}", key, self.0);
}
fn filter_next_with_str(&mut self, current: &Option<Vec<&'a Value>>, key: &str) {
self.filter(current, |vec, tmp, not_matched| {
let mut visited = HashSet::new();
for (idx, v) in vec.iter().enumerate() {
match v {
Value::Object(map) => {
if map.contains_key(key) {
let ptr = *v as *const Value;
if !visited.contains(&ptr) {
visited.insert(ptr);
tmp.push(v)
}
} else {
not_matched.insert(idx);
}
}
Value::Array(vec) => {
not_matched.insert(idx);
for v in vec {
ValueWalker::walk_dedup(v, tmp, key, &mut visited);
}
}
_ => {
not_matched.insert(idx);
}
}
}
FilterKey::String(key.to_owned())
});
debug!("filter_next_with_str : {}, {:?}", key, self.0);
}
fn collect_next_with_num(&mut self, current: &Option<Vec<&'a Value>>, index: f64) -> Option<Vec<&'a Value>> {
fn _collect<'a>(tmp: &mut Vec<&'a Value>, vec: &'a [Value], index: f64) {
let index = abs_index(index as isize, vec.len());
if let Some(v) = vec.get(index) {
tmp.push(v);
}
}
if let Some(current) = current {
let mut tmp = Vec::new();
for c in current {
match c {
Value::Object(map) => {
for k in map.keys() {
if let Some(Value::Array(vec)) = map.get(k) {
_collect(&mut tmp, vec, index);
}
}
}
Value::Array(vec) => {
_collect(&mut tmp, vec, index);
}
_ => {}
}
}
if tmp.is_empty() {
self.0.pop();
return Some(vec![&Value::Null]);
} else {
return Some(tmp);
}
}
debug!(
"collect_next_with_num : {:?}, {:?}",
&index, &current
);
None
}
fn collect_next_all(&mut self, current: &Option<Vec<&'a Value>>) -> Option<Vec<&'a Value>> {
if let Some(current) = current {
let mut tmp = Vec::new();
for c in current {
match c {
Value::Object(map) => {
for (_, v) in map {
tmp.push(v)
}
}
Value::Array(vec) => {
for v in vec {
tmp.push(v);
}
}
_ => {}
}
}
return Some(tmp);
}
debug!("collect_next_all : {:?}", &current);
None
}
fn collect_next_with_str(&mut self, current: &Option<Vec<&'a Value>>, keys: &[String]) -> Option<Vec<&'a Value>> {
if let Some(current) = current {
let mut tmp = Vec::new();
for c in current {
if let Value::Object(map) = c {
for key in keys {
if let Some(v) = map.get(key) {
tmp.push(v)
}
}
}
}
if tmp.is_empty() {
self.0.pop();
return Some(vec![&Value::Null]);
} else {
return Some(tmp);
}
}
debug!(
"collect_next_with_str : {:?}, {:?}",
keys, &current
);
None
}
fn collect_all(&mut self, current: &Option<Vec<&'a Value>>) -> Option<Vec<&'a Value>> {
if let Some(current) = current {
let mut tmp = Vec::new();
ValueWalker::all(&current, &mut tmp);
return Some(tmp);
}
debug!("collect_all: {:?}", &current);
None
}
fn collect_all_with_str(&mut self, current: &Option<Vec<&'a Value>>, key: &str) -> Option<Vec<&'a Value>> {
if let Some(current) = current {
let mut tmp = Vec::new();
ValueWalker::all_with_str(&current, &mut tmp, key, false);
return Some(tmp);
}
debug!("collect_all_with_str: {}, {:?}", key, &current);
None
}
fn collect_all_with_num(&mut self, current: &Option<Vec<&'a Value>>, index: f64) -> Option<Vec<&'a Value>> {
if let Some(current) = current {
let mut tmp = Vec::new();
ValueWalker::all_with_num(&current, &mut tmp, index);
return Some(tmp);
}
debug!("collect_all_with_num: {}, {:?}", index, &current);
None
}
}
#[derive(Debug)]
pub struct Selector<'a, 'b> {
node: Option<Node>,
node_ref: Option<&'b Node>,
value: Option<&'a Value>,
tokens: Vec<ParseToken>,
terms: Vec<Option<ExprTerm<'a>>>,
current: Option<Vec<&'a Value>>,
selectors: Vec<Selector<'a, 'b>>,
selector_filter: FilterTerms<'a>,
}
impl<'a, 'b> Selector<'a, 'b> {
pub fn default() -> Self {
Self::new()
}
pub fn new() -> Self {
Selector::default()
Self {
node: None,
node_ref: None,
value: None,
tokens: Vec::new(),
current: None,
selectors: Vec::new(),
selector_filter: FilterTerms(Vec::new()),
}
}
pub fn str_path(&mut self, path: &str) -> Result<&mut Self, JsonPathError> {
@ -168,250 +443,6 @@ impl<'a, 'b> Selector<'a, 'b> {
}
}
fn new_filter_context(&mut self) {
self.terms.push(None);
debug!("new_filter_context: {:?}", self.terms);
}
fn in_filter<F: Fn(&Vec<&'a Value>, &mut Vec<&'a Value>, &mut HashSet<usize>) -> FilterKey>(&mut self, fun: F) {
fn get_parent<'a>(prev: Option<Vec<&'a Value>>, current_value: &[&'a Value], not_matched: HashSet<usize>) -> Option<Vec<&'a Value>> {
if prev.is_some() {
return prev;
}
let filtered: Vec<&Value> = current_value.iter().enumerate().filter(|(idx, _)| !not_matched.contains(idx))
.map(|(_, v)| *v)
.collect();
Some(filtered)
}
if let Some(peek) = self.terms.pop() {
match peek {
Some(v) => {
debug!("in_filter 1.: {:?}", v);
match v {
ExprTerm::Json(rel, fk, vec) => {
let mut tmp = Vec::new();
let mut not_matched = HashSet::new();
let filter_key = if let Some(FilterKey::String(key)) = fk {
let key_contained = &vec.iter().map(|v| match v {
Value::Object(map) if map.contains_key(&key) => map.get(&key).unwrap(),
_ => v,
}).collect();
fun(key_contained, &mut tmp, &mut not_matched)
} else {
fun(&vec, &mut tmp, &mut not_matched)
};
let parent = get_parent(rel, &vec, not_matched);
self.terms.push(Some(ExprTerm::Json(parent, Some(filter_key), tmp)));
}
_ => unreachable!(),
};
}
_ => {
debug!("in_filter 2.: {:?}", &self.current);
if let Some(current) = &self.current {
let mut tmp = Vec::new();
let mut not_matched = HashSet::new();
let filter_key = fun(current, &mut tmp, &mut not_matched);
self.terms.push(Some(ExprTerm::Json(None, Some(filter_key), tmp)));
}
}
}
}
}
fn all_in_filter_with_str(&mut self, key: &str) {
self.in_filter(|vec, tmp, _| {
ValueWalker::all_with_str(&vec, tmp, key, true);
FilterKey::All
});
debug!("all_in_filter_with_str : {}, {:?}", key, self.terms);
}
fn next_in_filter_with_str(&mut self, key: &str) {
fn _collect<'a>(
v: &'a Value,
tmp: &mut Vec<&'a Value>,
key: &str,
visited: &mut HashSet<*const Value>,
not_matched: &mut HashSet<usize>,
) {
match v {
Value::Object(map) => {
if map.contains_key(key) {
let ptr = v as *const Value;
if !visited.contains(&ptr) {
visited.insert(ptr);
tmp.push(v)
}
}
}
Value::Array(vec) => {
for v in vec {
_collect(v, tmp, key, visited, not_matched);
}
}
_ => {}
}
}
self.in_filter(|vec, tmp, not_matched| {
let mut visited = HashSet::new();
for (idx, v) in vec.iter().enumerate() {
match v {
Value::Object(map) => {
if map.contains_key(key) {
let ptr = *v as *const Value;
if !visited.contains(&ptr) {
visited.insert(ptr);
tmp.push(v)
}
} else {
not_matched.insert(idx);
}
}
Value::Array(vec) => {
not_matched.insert(idx);
for v in vec {
_collect(v, tmp, key, &mut visited, not_matched);
}
}
_ => {
not_matched.insert(idx);
}
}
}
FilterKey::String(key.to_owned())
});
debug!("next_in_filter_with_str : {}, {:?}", key, self.terms);
}
fn next_from_current_with_num(&mut self, index: f64) {
fn _collect<'a>(tmp: &mut Vec<&'a Value>, vec: &'a [Value], index: f64) {
let index = abs_index(index as isize, vec.len());
if let Some(v) = vec.get(index) {
tmp.push(v);
}
}
if let Some(current) = self.current.take() {
let mut tmp = Vec::new();
for c in current {
match c {
Value::Object(map) => {
for k in map.keys() {
if let Some(Value::Array(vec)) = map.get(k) {
_collect(&mut tmp, vec, index);
}
}
}
Value::Array(vec) => {
_collect(&mut tmp, vec, index);
}
_ => {}
}
}
if tmp.is_empty() {
self.terms.pop();
self.current = Some(vec![&Value::Null]);
} else {
self.current = Some(tmp);
}
}
debug!(
"next_from_current_with_num : {:?}, {:?}",
&index, self.current
);
}
fn next_all_from_current(&mut self) {
if let Some(current) = self.current.take() {
let mut tmp = Vec::new();
for c in current {
match c {
Value::Object(map) => {
for (_, v) in map {
tmp.push(v)
}
}
Value::Array(vec) => {
for v in vec {
tmp.push(v);
}
}
_ => {}
}
}
self.current = Some(tmp);
}
debug!("next_all_from_current : {:?}", self.current);
}
fn next_from_current_with_str(&mut self, keys: &[String]) {
if let Some(current) = self.current.take() {
let mut tmp = Vec::new();
for c in current {
if let Value::Object(map) = c {
for key in keys {
if let Some(v) = map.get(key) {
tmp.push(v)
}
}
}
}
if tmp.is_empty() {
self.current = Some(vec![&Value::Null]);
} else {
self.current = Some(tmp);
}
}
debug!(
"next_from_current_with_str : {:?}, {:?}",
keys, self.current
);
}
fn all_from_current(&mut self) {
if let Some(current) = self.current.take() {
let mut tmp = Vec::new();
ValueWalker::all(&current, &mut tmp);
self.current = Some(tmp);
}
debug!("all_from_current: {:?}", self.current);
}
fn all_from_current_with_str(&mut self, key: &str) {
if let Some(current) = self.current.take() {
let mut tmp = Vec::new();
ValueWalker::all_with_str(&current, &mut tmp, key, false);
self.current = Some(tmp);
}
debug!("all_from_current_with_str: {}, {:?}", key, self.current);
}
fn all_from_current_with_num(&mut self, index: f64) {
if let Some(current) = self.current.take() {
let mut tmp = Vec::new();
ValueWalker::all_with_num(&current, &mut tmp, index);
self.current = Some(tmp);
}
debug!("all_from_current_with_num: {}, {:?}", index, self.current);
}
fn compute_absolute_path_filter(&mut self, token: &ParseToken) -> bool {
if !self.selectors.is_empty() {
match token {
@ -422,9 +453,9 @@ impl<'a, 'b> Selector<'a, 'b> {
let term = current.into();
if let Some(s) = self.selectors.last_mut() {
s.terms.push(Some(term));
s.selector_filter.push_term(Some(term));
} else {
self.terms.push(Some(term));
self.selector_filter.push_term(Some(term));
}
} else {
unreachable!()
@ -466,47 +497,47 @@ impl<'a, 'b> Selector<'a, 'b> {
let array_token = self.tokens.pop();
if let Some(ParseToken::Leaves) = self.tokens.last() {
self.tokens.pop();
self.all_from_current();
self.current = self.selector_filter.collect_all(&self.current);
}
self.tokens.push(array_token.unwrap());
}
self.new_filter_context();
self.selector_filter.new_filter_context();
}
fn visit_array_eof(&mut self) {
if self.is_last_before_token_match(ParseToken::Array) {
if let Some(Some(e)) = self.terms.pop() {
if let Some(Some(e)) = self.selector_filter.pop_term() {
if let ExprTerm::String(key) = e {
self.next_in_filter_with_str(&key);
self.selector_filter.filter_next_with_str(&self.current, &key);
self.tokens.pop();
return;
}
self.terms.push(Some(e));
self.selector_filter.push_term(Some(e));
}
}
if self.is_last_before_token_match(ParseToken::Leaves) {
self.tokens.pop();
self.tokens.pop();
if let Some(Some(e)) = self.terms.pop() {
if let Some(Some(e)) = self.selector_filter.pop_term() {
if let ExprTerm::Number(n) = &e {
self.all_from_current_with_num(to_f64(n));
self.terms.pop();
self.current = self.selector_filter.collect_all_with_num(&self.current, to_f64(n));
self.selector_filter.pop_term();
return;
}
self.terms.push(Some(e));
self.selector_filter.push_term(Some(e));
}
}
if let Some(Some(e)) = self.terms.pop() {
if let Some(Some(e)) = self.selector_filter.pop_term() {
match e {
ExprTerm::Number(n) => {
self.next_from_current_with_num(to_f64(&n));
self.current = self.selector_filter.collect_next_with_num(&self.current, to_f64(&n));
}
ExprTerm::String(key) => {
self.next_from_current_with_str(&[key]);
self.current = self.selector_filter.collect_next_with_str(&self.current, &[key]);
}
ExprTerm::Json(rel, _, v) => {
if v.is_empty() {
@ -543,38 +574,42 @@ impl<'a, 'b> Selector<'a, 'b> {
match self.tokens.last() {
Some(ParseToken::Leaves) => {
self.tokens.pop();
self.all_from_current();
self.current = self.selector_filter.collect_all(&self.current);
}
Some(ParseToken::In) => {
self.tokens.pop();
self.next_all_from_current();
self.current = self.selector_filter.collect_next_all(&self.current);
}
_ => {
self.next_all_from_current();
self.current = self.selector_filter.collect_next_all(&self.current);
}
}
}
fn visit_key(&mut self, key: &str) {
if let Some(ParseToken::Array) = self.tokens.last() {
self.terms.push(Some(ExprTerm::String(key.to_string())));
self.selector_filter.push_term(Some(ExprTerm::String(key.to_string())));
return;
}
if let Some(t) = self.tokens.pop() {
if self.terms.is_empty() {
if self.selector_filter.is_term_empty() {
match t {
ParseToken::Leaves => self.all_from_current_with_str(key),
ParseToken::In => self.next_from_current_with_str(&[key.to_string()]),
ParseToken::Leaves => {
self.current = self.selector_filter.collect_all_with_str(&self.current, key)
}
ParseToken::In => {
self.current = self.selector_filter.collect_next_with_str(&self.current, &[key.to_string()])
}
_ => {}
}
} else {
match t {
ParseToken::Leaves => {
self.all_in_filter_with_str(key);
self.selector_filter.filter_all_with_str(&self.current, key);
}
ParseToken::In => {
self.next_in_filter_with_str(key);
self.selector_filter.filter_next_with_str(&self.current, key);
}
_ => {}
}
@ -583,19 +618,19 @@ impl<'a, 'b> Selector<'a, 'b> {
}
fn visit_keys(&mut self, keys: &[String]) {
if !self.terms.is_empty() {
if !self.selector_filter.is_term_empty() {
unimplemented!("keys in filter");
}
if let Some(ParseToken::Array) = self.tokens.pop() {
self.next_from_current_with_str(keys);
self.current = self.selector_filter.collect_next_with_str(&self.current, keys);
} else {
unreachable!();
}
}
fn visit_filter(&mut self, ft: &FilterToken) {
let right = match self.terms.pop() {
let right = match self.selector_filter.pop_term() {
Some(Some(right)) => right,
Some(None) => ExprTerm::Json(
None,
@ -608,7 +643,7 @@ impl<'a, 'b> Selector<'a, 'b> {
_ => panic!("empty term right"),
};
let left = match self.terms.pop() {
let left = match self.selector_filter.pop_term() {
Some(Some(left)) => left,
Some(None) => ExprTerm::Json(
None,
@ -634,12 +669,12 @@ impl<'a, 'b> Selector<'a, 'b> {
};
if let Some(e) = ret {
self.terms.push(Some(e));
self.selector_filter.push_term(Some(e));
}
}
fn visit_range(&mut self, from: &Option<isize>, to: &Option<isize>, step: &Option<usize>) {
if !self.terms.is_empty() {
if !self.selector_filter.is_term_empty() {
unimplemented!("range syntax in filter");
}
@ -678,7 +713,7 @@ impl<'a, 'b> Selector<'a, 'b> {
}
fn visit_union(&mut self, indices: &[isize]) {
if !self.terms.is_empty() {
if !self.selector_filter.is_term_empty() {
unimplemented!("union syntax in filter");
}
@ -720,13 +755,12 @@ impl<'a, 'b> NodeVisitor for Selector<'a, 'b> {
ParseToken::ArrayEof => self.visit_array_eof(),
ParseToken::All => self.visit_all(),
ParseToken::Bool(b) => {
self.terms.push(Some(ExprTerm::Bool(*b)));
self.selector_filter.push_term(Some(ExprTerm::Bool(*b)));
}
ParseToken::Key(key) => self.visit_key(key),
ParseToken::Keys(keys) => self.visit_keys(keys),
ParseToken::Number(v) => {
self.terms
.push(Some(ExprTerm::Number(Number::from_f64(*v).unwrap())));
self.selector_filter.push_term(Some(ExprTerm::Number(Number::from_f64(*v).unwrap())));
}
ParseToken::Filter(ref ft) => self.visit_filter(ft),
ParseToken::Range(from, to, step) => self.visit_range(from, to, step),

View File

@ -1,4 +1,5 @@
use serde_json::Value;
use std::collections::HashSet;
pub(super) struct ValueWalker;
@ -46,19 +47,13 @@ impl<'a> ValueWalker {
});
}
fn walk<F>(vec: &[&'a Value], tmp: &mut Vec<&'a Value>, fun: &F)
where
F: Fn(&Value) -> Option<Vec<&Value>>,
{
fn walk<F>(vec: &[&'a Value], tmp: &mut Vec<&'a Value>, fun: &F) where F: Fn(&Value) -> Option<Vec<&Value>> {
for v in vec {
Self::_walk(v, tmp, fun);
}
}
fn _walk<F>(v: &'a Value, tmp: &mut Vec<&'a Value>, fun: &F)
where
F: Fn(&Value) -> Option<Vec<&Value>>,
{
fn _walk<F>(v: &'a Value, tmp: &mut Vec<&'a Value>, fun: &F) where F: Fn(&Value) -> Option<Vec<&Value>> {
if let Some(mut ret) = fun(v) {
tmp.append(&mut ret);
}
@ -77,5 +72,28 @@ impl<'a> ValueWalker {
_ => {}
}
}
pub fn walk_dedup(v: &'a Value,
tmp: &mut Vec<&'a Value>,
key: &str,
visited: &mut HashSet<*const Value>, ) {
match v {
Value::Object(map) => {
if map.contains_key(key) {
let ptr = v as *const Value;
if !visited.contains(&ptr) {
visited.insert(ptr);
tmp.push(v)
}
}
}
Value::Array(vec) => {
for v in vec {
Self::walk_dedup(v, tmp, key, visited);
}
}
_ => {}
}
}
}