From c99879cfbb417e6fe608f56ff94d8356971bd49c Mon Sep 17 00:00:00 2001 From: freestrings Date: Mon, 23 Mar 2020 22:50:08 +0900 Subject: [PATCH] =?UTF-8?q?filter,=20collector=20=EC=A0=95=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/select/mod.rs | 594 ++++++++++++++++++++----------------- src/select/value_walker.rs | 34 ++- 2 files changed, 340 insertions(+), 288 deletions(-) diff --git a/src/select/mod.rs b/src/select/mod.rs index b1758af..d3afefd 100644 --- a/src/select/mod.rs +++ b/src/select/mod.rs @@ -61,20 +61,295 @@ impl fmt::Display for JsonPathError { } } -#[derive(Debug, Default)] +#[derive(Debug)] +struct FilterTerms<'a>(Vec>>); + +impl<'a> FilterTerms<'a> { + fn new_filter_context(&mut self) { + self.0.push(None); + debug!("new_filter_context: {:?}", self.0); + } + + fn is_term_empty(&self) -> bool { + self.0.is_empty() + } + + fn push_term(&mut self, term: Option>) { + self.0.push(term); + } + + fn pop_term(&mut self) -> Option>> { + self.0.pop() + } + + fn filter_json_term, &mut Vec<&'a Value>, &mut HashSet) -> FilterKey>( + &mut self, + e: ExprTerm<'a>, + fun: F, + ) { + debug!("filter_json_term: {:?}", e); + + if let ExprTerm::Json(rel, fk, vec) = e { + let mut tmp = Vec::new(); + let mut not_matched = HashSet::new(); + let filter_key = if let Some(FilterKey::String(key)) = fk { + let key_contained = &vec.iter().map(|v| match v { + Value::Object(map) if map.contains_key(&key) => map.get(&key).unwrap(), + _ => v, + }).collect(); + fun(key_contained, &mut tmp, &mut not_matched) + } else { + fun(&vec, &mut tmp, &mut not_matched) + }; + + if rel.is_some() { + self.0.push(Some(ExprTerm::Json(rel, Some(filter_key), tmp))); + } else { + let filtered: Vec<&Value> = vec.iter().enumerate() + .filter( + |(idx, _)| !not_matched.contains(idx) + ) + .map(|(_, v)| *v) + .collect(); + + self.0.push(Some(ExprTerm::Json(Some(filtered), Some(filter_key), tmp))); + } + } else { + unreachable!("unexpected: ExprTerm: {:?}", e); + } + } + + fn push_json_term, &mut Vec<&'a Value>, &mut HashSet) -> FilterKey>( + &mut self, + current: &Option>, + fun: F, + ) { + debug!("push_json_term: {:?}", ¤t); + + if let Some(current) = ¤t { + let mut tmp = Vec::new(); + let mut not_matched = HashSet::new(); + let filter_key = fun(current, &mut tmp, &mut not_matched); + self.0.push(Some(ExprTerm::Json(None, Some(filter_key), tmp))); + } + } + + fn filter, &mut Vec<&'a Value>, &mut HashSet) -> FilterKey>( + &mut self, + current: &Option>, + fun: F, + ) { + if let Some(peek) = self.0.pop() { + if let Some(e) = peek { + self.filter_json_term(e, fun); + } else { + self.push_json_term(current, fun); + } + } + } + + fn filter_all_with_str(&mut self, current: &Option>, key: &str) { + self.filter(current, |vec, tmp, _| { + ValueWalker::all_with_str(&vec, tmp, key, true); + FilterKey::All + }); + + debug!("filter_all_with_str : {}, {:?}", key, self.0); + } + + fn filter_next_with_str(&mut self, current: &Option>, key: &str) { + self.filter(current, |vec, tmp, not_matched| { + let mut visited = HashSet::new(); + for (idx, v) in vec.iter().enumerate() { + match v { + Value::Object(map) => { + if map.contains_key(key) { + let ptr = *v as *const Value; + if !visited.contains(&ptr) { + visited.insert(ptr); + tmp.push(v) + } + } else { + not_matched.insert(idx); + } + } + Value::Array(vec) => { + not_matched.insert(idx); + for v in vec { + ValueWalker::walk_dedup(v, tmp, key, &mut visited); + } + } + _ => { + not_matched.insert(idx); + } + } + } + + FilterKey::String(key.to_owned()) + }); + + debug!("filter_next_with_str : {}, {:?}", key, self.0); + } + + fn collect_next_with_num(&mut self, current: &Option>, index: f64) -> Option> { + fn _collect<'a>(tmp: &mut Vec<&'a Value>, vec: &'a [Value], index: f64) { + let index = abs_index(index as isize, vec.len()); + if let Some(v) = vec.get(index) { + tmp.push(v); + } + } + + if let Some(current) = current { + let mut tmp = Vec::new(); + for c in current { + match c { + Value::Object(map) => { + for k in map.keys() { + if let Some(Value::Array(vec)) = map.get(k) { + _collect(&mut tmp, vec, index); + } + } + } + Value::Array(vec) => { + _collect(&mut tmp, vec, index); + } + _ => {} + } + } + + if tmp.is_empty() { + self.0.pop(); + return Some(vec![&Value::Null]); + } else { + return Some(tmp); + } + } + + debug!( + "collect_next_with_num : {:?}, {:?}", + &index, ¤t + ); + + None + } + + fn collect_next_all(&mut self, current: &Option>) -> Option> { + if let Some(current) = current { + let mut tmp = Vec::new(); + for c in current { + match c { + Value::Object(map) => { + for (_, v) in map { + tmp.push(v) + } + } + Value::Array(vec) => { + for v in vec { + tmp.push(v); + } + } + _ => {} + } + } + return Some(tmp); + } + + debug!("collect_next_all : {:?}", ¤t); + + None + } + + fn collect_next_with_str(&mut self, current: &Option>, keys: &[String]) -> Option> { + if let Some(current) = current { + let mut tmp = Vec::new(); + for c in current { + if let Value::Object(map) = c { + for key in keys { + if let Some(v) = map.get(key) { + tmp.push(v) + } + } + } + } + + if tmp.is_empty() { + self.0.pop(); + return Some(vec![&Value::Null]); + } else { + return Some(tmp); + } + } + + debug!( + "collect_next_with_str : {:?}, {:?}", + keys, ¤t + ); + + None + } + + fn collect_all(&mut self, current: &Option>) -> Option> { + if let Some(current) = current { + let mut tmp = Vec::new(); + ValueWalker::all(¤t, &mut tmp); + return Some(tmp); + } + debug!("collect_all: {:?}", ¤t); + + None + } + + fn collect_all_with_str(&mut self, current: &Option>, key: &str) -> Option> { + if let Some(current) = current { + let mut tmp = Vec::new(); + ValueWalker::all_with_str(¤t, &mut tmp, key, false); + return Some(tmp); + } + + debug!("collect_all_with_str: {}, {:?}", key, ¤t); + + None + } + + fn collect_all_with_num(&mut self, current: &Option>, index: f64) -> Option> { + if let Some(current) = current { + let mut tmp = Vec::new(); + ValueWalker::all_with_num(¤t, &mut tmp, index); + return Some(tmp); + } + + debug!("collect_all_with_num: {}, {:?}", index, ¤t); + + None + } +} + +#[derive(Debug)] pub struct Selector<'a, 'b> { node: Option, node_ref: Option<&'b Node>, value: Option<&'a Value>, tokens: Vec, - terms: Vec>>, current: Option>, selectors: Vec>, + selector_filter: FilterTerms<'a>, } impl<'a, 'b> Selector<'a, 'b> { + pub fn default() -> Self { + Self::new() + } + pub fn new() -> Self { - Selector::default() + Self { + node: None, + node_ref: None, + value: None, + tokens: Vec::new(), + current: None, + selectors: Vec::new(), + selector_filter: FilterTerms(Vec::new()), + } } pub fn str_path(&mut self, path: &str) -> Result<&mut Self, JsonPathError> { @@ -168,250 +443,6 @@ impl<'a, 'b> Selector<'a, 'b> { } } - fn new_filter_context(&mut self) { - self.terms.push(None); - debug!("new_filter_context: {:?}", self.terms); - } - - fn in_filter, &mut Vec<&'a Value>, &mut HashSet) -> FilterKey>(&mut self, fun: F) { - fn get_parent<'a>(prev: Option>, current_value: &[&'a Value], not_matched: HashSet) -> Option> { - if prev.is_some() { - return prev; - } - - let filtered: Vec<&Value> = current_value.iter().enumerate().filter(|(idx, _)| !not_matched.contains(idx)) - .map(|(_, v)| *v) - .collect(); - - Some(filtered) - } - - - if let Some(peek) = self.terms.pop() { - match peek { - Some(v) => { - debug!("in_filter 1.: {:?}", v); - - match v { - ExprTerm::Json(rel, fk, vec) => { - let mut tmp = Vec::new(); - let mut not_matched = HashSet::new(); - let filter_key = if let Some(FilterKey::String(key)) = fk { - let key_contained = &vec.iter().map(|v| match v { - Value::Object(map) if map.contains_key(&key) => map.get(&key).unwrap(), - _ => v, - }).collect(); - fun(key_contained, &mut tmp, &mut not_matched) - } else { - fun(&vec, &mut tmp, &mut not_matched) - }; - - let parent = get_parent(rel, &vec, not_matched); - self.terms.push(Some(ExprTerm::Json(parent, Some(filter_key), tmp))); - } - _ => unreachable!(), - }; - } - _ => { - debug!("in_filter 2.: {:?}", &self.current); - - if let Some(current) = &self.current { - let mut tmp = Vec::new(); - let mut not_matched = HashSet::new(); - let filter_key = fun(current, &mut tmp, &mut not_matched); - self.terms.push(Some(ExprTerm::Json(None, Some(filter_key), tmp))); - } - } - } - } - } - - fn all_in_filter_with_str(&mut self, key: &str) { - self.in_filter(|vec, tmp, _| { - ValueWalker::all_with_str(&vec, tmp, key, true); - FilterKey::All - }); - - debug!("all_in_filter_with_str : {}, {:?}", key, self.terms); - } - - fn next_in_filter_with_str(&mut self, key: &str) { - fn _collect<'a>( - v: &'a Value, - tmp: &mut Vec<&'a Value>, - key: &str, - visited: &mut HashSet<*const Value>, - not_matched: &mut HashSet, - ) { - match v { - Value::Object(map) => { - if map.contains_key(key) { - let ptr = v as *const Value; - if !visited.contains(&ptr) { - visited.insert(ptr); - tmp.push(v) - } - } - } - Value::Array(vec) => { - for v in vec { - _collect(v, tmp, key, visited, not_matched); - } - } - _ => {} - } - } - - self.in_filter(|vec, tmp, not_matched| { - let mut visited = HashSet::new(); - for (idx, v) in vec.iter().enumerate() { - match v { - Value::Object(map) => { - if map.contains_key(key) { - let ptr = *v as *const Value; - if !visited.contains(&ptr) { - visited.insert(ptr); - tmp.push(v) - } - } else { - not_matched.insert(idx); - } - } - Value::Array(vec) => { - not_matched.insert(idx); - for v in vec { - _collect(v, tmp, key, &mut visited, not_matched); - } - } - _ => { - not_matched.insert(idx); - } - } - } - - FilterKey::String(key.to_owned()) - }); - - debug!("next_in_filter_with_str : {}, {:?}", key, self.terms); - } - - fn next_from_current_with_num(&mut self, index: f64) { - fn _collect<'a>(tmp: &mut Vec<&'a Value>, vec: &'a [Value], index: f64) { - let index = abs_index(index as isize, vec.len()); - if let Some(v) = vec.get(index) { - tmp.push(v); - } - } - - if let Some(current) = self.current.take() { - let mut tmp = Vec::new(); - for c in current { - match c { - Value::Object(map) => { - for k in map.keys() { - if let Some(Value::Array(vec)) = map.get(k) { - _collect(&mut tmp, vec, index); - } - } - } - Value::Array(vec) => { - _collect(&mut tmp, vec, index); - } - _ => {} - } - } - - if tmp.is_empty() { - self.terms.pop(); - self.current = Some(vec![&Value::Null]); - } else { - self.current = Some(tmp); - } - } - - debug!( - "next_from_current_with_num : {:?}, {:?}", - &index, self.current - ); - } - - fn next_all_from_current(&mut self) { - if let Some(current) = self.current.take() { - let mut tmp = Vec::new(); - for c in current { - match c { - Value::Object(map) => { - for (_, v) in map { - tmp.push(v) - } - } - Value::Array(vec) => { - for v in vec { - tmp.push(v); - } - } - _ => {} - } - } - self.current = Some(tmp); - } - - debug!("next_all_from_current : {:?}", self.current); - } - - fn next_from_current_with_str(&mut self, keys: &[String]) { - if let Some(current) = self.current.take() { - let mut tmp = Vec::new(); - for c in current { - if let Value::Object(map) = c { - for key in keys { - if let Some(v) = map.get(key) { - tmp.push(v) - } - } - } - } - - if tmp.is_empty() { - self.current = Some(vec![&Value::Null]); - } else { - self.current = Some(tmp); - } - } - - debug!( - "next_from_current_with_str : {:?}, {:?}", - keys, self.current - ); - } - - fn all_from_current(&mut self) { - if let Some(current) = self.current.take() { - let mut tmp = Vec::new(); - ValueWalker::all(¤t, &mut tmp); - self.current = Some(tmp); - } - debug!("all_from_current: {:?}", self.current); - } - - fn all_from_current_with_str(&mut self, key: &str) { - if let Some(current) = self.current.take() { - let mut tmp = Vec::new(); - ValueWalker::all_with_str(¤t, &mut tmp, key, false); - self.current = Some(tmp); - } - debug!("all_from_current_with_str: {}, {:?}", key, self.current); - } - - fn all_from_current_with_num(&mut self, index: f64) { - if let Some(current) = self.current.take() { - let mut tmp = Vec::new(); - ValueWalker::all_with_num(¤t, &mut tmp, index); - self.current = Some(tmp); - } - debug!("all_from_current_with_num: {}, {:?}", index, self.current); - } - fn compute_absolute_path_filter(&mut self, token: &ParseToken) -> bool { if !self.selectors.is_empty() { match token { @@ -422,9 +453,9 @@ impl<'a, 'b> Selector<'a, 'b> { let term = current.into(); if let Some(s) = self.selectors.last_mut() { - s.terms.push(Some(term)); + s.selector_filter.push_term(Some(term)); } else { - self.terms.push(Some(term)); + self.selector_filter.push_term(Some(term)); } } else { unreachable!() @@ -466,47 +497,47 @@ impl<'a, 'b> Selector<'a, 'b> { let array_token = self.tokens.pop(); if let Some(ParseToken::Leaves) = self.tokens.last() { self.tokens.pop(); - self.all_from_current(); + self.current = self.selector_filter.collect_all(&self.current); } self.tokens.push(array_token.unwrap()); } - self.new_filter_context(); + self.selector_filter.new_filter_context(); } fn visit_array_eof(&mut self) { if self.is_last_before_token_match(ParseToken::Array) { - if let Some(Some(e)) = self.terms.pop() { + if let Some(Some(e)) = self.selector_filter.pop_term() { if let ExprTerm::String(key) = e { - self.next_in_filter_with_str(&key); + self.selector_filter.filter_next_with_str(&self.current, &key); self.tokens.pop(); return; } - self.terms.push(Some(e)); + self.selector_filter.push_term(Some(e)); } } if self.is_last_before_token_match(ParseToken::Leaves) { self.tokens.pop(); self.tokens.pop(); - if let Some(Some(e)) = self.terms.pop() { + if let Some(Some(e)) = self.selector_filter.pop_term() { if let ExprTerm::Number(n) = &e { - self.all_from_current_with_num(to_f64(n)); - self.terms.pop(); + self.current = self.selector_filter.collect_all_with_num(&self.current, to_f64(n)); + self.selector_filter.pop_term(); return; } - self.terms.push(Some(e)); + self.selector_filter.push_term(Some(e)); } } - if let Some(Some(e)) = self.terms.pop() { + if let Some(Some(e)) = self.selector_filter.pop_term() { match e { ExprTerm::Number(n) => { - self.next_from_current_with_num(to_f64(&n)); + self.current = self.selector_filter.collect_next_with_num(&self.current, to_f64(&n)); } ExprTerm::String(key) => { - self.next_from_current_with_str(&[key]); + self.current = self.selector_filter.collect_next_with_str(&self.current, &[key]); } ExprTerm::Json(rel, _, v) => { if v.is_empty() { @@ -543,38 +574,42 @@ impl<'a, 'b> Selector<'a, 'b> { match self.tokens.last() { Some(ParseToken::Leaves) => { self.tokens.pop(); - self.all_from_current(); + self.current = self.selector_filter.collect_all(&self.current); } Some(ParseToken::In) => { self.tokens.pop(); - self.next_all_from_current(); + self.current = self.selector_filter.collect_next_all(&self.current); } _ => { - self.next_all_from_current(); + self.current = self.selector_filter.collect_next_all(&self.current); } } } fn visit_key(&mut self, key: &str) { if let Some(ParseToken::Array) = self.tokens.last() { - self.terms.push(Some(ExprTerm::String(key.to_string()))); + self.selector_filter.push_term(Some(ExprTerm::String(key.to_string()))); return; } if let Some(t) = self.tokens.pop() { - if self.terms.is_empty() { + if self.selector_filter.is_term_empty() { match t { - ParseToken::Leaves => self.all_from_current_with_str(key), - ParseToken::In => self.next_from_current_with_str(&[key.to_string()]), + ParseToken::Leaves => { + self.current = self.selector_filter.collect_all_with_str(&self.current, key) + } + ParseToken::In => { + self.current = self.selector_filter.collect_next_with_str(&self.current, &[key.to_string()]) + } _ => {} } } else { match t { ParseToken::Leaves => { - self.all_in_filter_with_str(key); + self.selector_filter.filter_all_with_str(&self.current, key); } ParseToken::In => { - self.next_in_filter_with_str(key); + self.selector_filter.filter_next_with_str(&self.current, key); } _ => {} } @@ -583,19 +618,19 @@ impl<'a, 'b> Selector<'a, 'b> { } fn visit_keys(&mut self, keys: &[String]) { - if !self.terms.is_empty() { + if !self.selector_filter.is_term_empty() { unimplemented!("keys in filter"); } if let Some(ParseToken::Array) = self.tokens.pop() { - self.next_from_current_with_str(keys); + self.current = self.selector_filter.collect_next_with_str(&self.current, keys); } else { unreachable!(); } } fn visit_filter(&mut self, ft: &FilterToken) { - let right = match self.terms.pop() { + let right = match self.selector_filter.pop_term() { Some(Some(right)) => right, Some(None) => ExprTerm::Json( None, @@ -608,7 +643,7 @@ impl<'a, 'b> Selector<'a, 'b> { _ => panic!("empty term right"), }; - let left = match self.terms.pop() { + let left = match self.selector_filter.pop_term() { Some(Some(left)) => left, Some(None) => ExprTerm::Json( None, @@ -634,12 +669,12 @@ impl<'a, 'b> Selector<'a, 'b> { }; if let Some(e) = ret { - self.terms.push(Some(e)); + self.selector_filter.push_term(Some(e)); } } fn visit_range(&mut self, from: &Option, to: &Option, step: &Option) { - if !self.terms.is_empty() { + if !self.selector_filter.is_term_empty() { unimplemented!("range syntax in filter"); } @@ -678,7 +713,7 @@ impl<'a, 'b> Selector<'a, 'b> { } fn visit_union(&mut self, indices: &[isize]) { - if !self.terms.is_empty() { + if !self.selector_filter.is_term_empty() { unimplemented!("union syntax in filter"); } @@ -720,13 +755,12 @@ impl<'a, 'b> NodeVisitor for Selector<'a, 'b> { ParseToken::ArrayEof => self.visit_array_eof(), ParseToken::All => self.visit_all(), ParseToken::Bool(b) => { - self.terms.push(Some(ExprTerm::Bool(*b))); + self.selector_filter.push_term(Some(ExprTerm::Bool(*b))); } ParseToken::Key(key) => self.visit_key(key), ParseToken::Keys(keys) => self.visit_keys(keys), ParseToken::Number(v) => { - self.terms - .push(Some(ExprTerm::Number(Number::from_f64(*v).unwrap()))); + self.selector_filter.push_term(Some(ExprTerm::Number(Number::from_f64(*v).unwrap()))); } ParseToken::Filter(ref ft) => self.visit_filter(ft), ParseToken::Range(from, to, step) => self.visit_range(from, to, step), diff --git a/src/select/value_walker.rs b/src/select/value_walker.rs index 55f3852..e7b4de0 100644 --- a/src/select/value_walker.rs +++ b/src/select/value_walker.rs @@ -1,4 +1,5 @@ use serde_json::Value; +use std::collections::HashSet; pub(super) struct ValueWalker; @@ -46,19 +47,13 @@ impl<'a> ValueWalker { }); } - fn walk(vec: &[&'a Value], tmp: &mut Vec<&'a Value>, fun: &F) - where - F: Fn(&Value) -> Option>, - { + fn walk(vec: &[&'a Value], tmp: &mut Vec<&'a Value>, fun: &F) where F: Fn(&Value) -> Option> { for v in vec { Self::_walk(v, tmp, fun); } } - fn _walk(v: &'a Value, tmp: &mut Vec<&'a Value>, fun: &F) - where - F: Fn(&Value) -> Option>, - { + fn _walk(v: &'a Value, tmp: &mut Vec<&'a Value>, fun: &F) where F: Fn(&Value) -> Option> { if let Some(mut ret) = fun(v) { tmp.append(&mut ret); } @@ -77,5 +72,28 @@ impl<'a> ValueWalker { _ => {} } } + + pub fn walk_dedup(v: &'a Value, + tmp: &mut Vec<&'a Value>, + key: &str, + visited: &mut HashSet<*const Value>, ) { + match v { + Value::Object(map) => { + if map.contains_key(key) { + let ptr = v as *const Value; + if !visited.contains(&ptr) { + visited.insert(ptr); + tmp.push(v) + } + } + } + Value::Array(vec) => { + for v in vec { + Self::walk_dedup(v, tmp, key, visited); + } + } + _ => {} + } + } }