~eanyanwu/jsn

035f5434d98b4cd2766cd4ea3a547ec97c3c9da5 — Eze 3 months ago c6743d2
Flatten the match statement for the `index` mask function
1 files changed, 68 insertions(+), 108 deletions(-)

M src/mask.rs
M src/mask.rs => src/mask.rs +68 -108
@@ 21,8 21,8 @@
//!
//! Token masks can be combined using the [`and`](Mask::and) and [`or`](Mask::or) functions:
//!
//! You can think of using `and()` to express the intersection between two masks.  
//! You can think of using `or()` to express the union between two masks.  
//! Think of using `and()` to express the intersection between two masks.  
//! Think of using `or()` to express the union between two masks.  
//!
//! ```text
//!                                      { "a" : false , "b" : true }


@@ 86,7 86,7 @@
//!
//! ## Gotchas
//!
//! - Be careful using masks when your json has nested arrays. Token masks match the input, in
//! - Be careful using masks when the JSON input has nested arrays. Token masks match the input, in
//! order, as many times as possible without overlapping:
//!
//! ```text


@@ 95,17 95,16 @@
//! ```
//!
//! This result looks "unbalanced" because the first match is an entire array but the second &
//! third matches are single elements. But it _is_ correct. Your mask is looking for things at
//! index 0. Well that first array is at index 0.  _Though this be madness, yet there is method
//! third matches are single elements. But it _is_ correct. The mask is looking for things at
//! index 0, and that first array is at index 0.  _Though this be madness, yet there is method
//! in't._  
//!
//! - More to come as I discover them ...
//!
//!
//! ## Real Example
//! ## Example
//!
//! For a more complex & relistic example, let's find the top 250 most popular packages on NPM, the
//! Javascript package registry.
//! Finding the top 250 most popular packages on NPM, the Javascript package registry.
//!  
//! The request[^1] will return JSON. Try it in your browser to get a feel for the schema:
//! ```text


@@ 121,11 120,11 @@
//!     let file = File::open("tests/files/npm.json")?;
//!     let file = BufReader::new(file);
//!
//!     // We want to select the tokens under the `package.name` key.
//!     // But using `key("package").and(key("name"))` is not enough because
//!     // Select the tokens under the `package.name` key.
//!     // Using `key("package").and(key("name"))` is not enough because
//!     // it will also match tokens under `package.author.name`.
//!     //
//!     // So we use the depth mask to narrow down which level to look at
//!     // So use the depth mask instead to narrow down which level to look at
//!     let name_mask = depth(4).and(key("name"));
//!     let link_mask = key("links").and(key("npm"));
//!     let mask = name_mask.or(link_mask);


@@ 334,7 333,7 @@ pub fn all() -> All {
pub fn key(name: impl AsRef<str>) -> Key {
    Key {
        name: name.as_ref().to_string(),
        state: KeyMatchState::None,
        state: KeyState::None,
        container_count: 0,
    }
}


@@ 367,9 366,9 @@ pub fn key(name: impl AsRef<str>) -> Key {
pub fn index(idx: usize) -> Index {
    Index {
        idx,
        matched_structural_token: false,
        container_count: 0,
        structure: vec![],
        state: IndexState::None,
    }
}



@@ 519,31 518,24 @@ pub fn values() -> Values {
// matching process across invocations.

#[derive(Debug, Clone, PartialEq, Eq)]
enum KeyMatchState {
    // We are yet to find an object key that matches what we are looking for
enum KeyState {
    None,
    // We've seen an object key that matches what we are looking for
    Key,
    // We've seen the colon after the object key that matches what we are looking for
    Colon,
}

/// See [`key()`](crate::mask::key)
#[derive(Debug, Clone)]
pub struct Key {
    // The name of the key to look for
    name: String,
    state: KeyMatchState,
    // If the value of an object key is an array or object, it might contain further nested
    // objects or arrays which are still part of the parent key. We need to know when the outer
    // container ends so we can stop emitting tokens for the key
    state: KeyState,
    container_count: usize,
}

impl Mask for Key {
    #[inline]
    fn match_token(&mut self, token: &RawToken) -> bool {
        use KeyMatchState::*;
        use KeyState::*;
        let token_matches = self.state == Colon;
        match (&self.state, token) {
            (None, RawToken::ObjectKey(s)) if s == &self.name => {


@@ 570,31 562,6 @@ impl Mask for Key {
    }
}

// Matching an array index is a bit tougher than matching an object key.
// We need to keep track of the current index in all the arrays we see. For example, given the
// following json:
//
// ```
// [1, [2, 3, 4], 5]
// ```
//
// Matching values at index `2` means matching numbers 4 & 5. Here is a visualization:
//
// ```
//   0         1         2
//   |   +-----------+   |
//   |   v           v   |
//   v     0   1   2     v
// [ 1 , [ 2 , 3 , 4 ] , 5 ]
// _ _ _ _ _ _ _ _ x _ _ x _
// ```
//
// When we exit from the inner array, we need to know that the outer array was at index 1 and the
// next element is at index 2 to match it.
//
// I do this using a stack of `Container` enums to represent depth. At a given depth in the stack,
// the `Container::Array` variant stores the current index of the array.

#[derive(Debug, Clone)]
enum Container {
    Array(usize),


@@ 604,79 571,71 @@ enum Container {
#[derive(Debug, Clone)]
/// See [`index()`](crate::mask::index)
pub struct Index {
    // The index we are trying to match
    idx: usize,
    // If we found a structural token (i.e. `[` or `{`) at the array index we were looking for.
    // This would mean there are multiple tokens at that index.
    matched_structural_token: bool,
    container_count: usize,
    structure: Vec<Container>,
    state: IndexState,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
enum IndexState {
    None,
    StructuralMatch,
}

impl Mask for Index {
    fn match_token(&mut self, token: &RawToken) -> bool {
        if !self.matched_structural_token {
            match token {
                RawToken::ArrayStart => {
                    if let Some(Container::Array(current)) = self.structure.last() {
                        if current == &self.idx {
                            self.matched_structural_token = true;
                            self.container_count = 1;
                            return true;
                        }
                    }
                    self.structure.push(Container::Array(0));
                    false
                }
                RawToken::ObjectStart => {
                    if let Some(Container::Array(current)) = self.structure.last() {
                        if current == &self.idx {
                            self.matched_structural_token = true;
                            self.container_count = 1;
                            return true;
                        }
                    }
                    self.structure.push(Container::Object);
                    false
                }
                RawToken::ArrayEnd | RawToken::ObjectEnd => {
                    self.structure.pop();
                    false
                }
                RawToken::Comma => {
                    if let Some(Container::Array(current)) = self.structure.last_mut() {
                        *current += 1;
                    }
                    false
                }
        let matched_index = || {
            if let Some(Container::Array(current)) = self.structure.last() {
                current == &self.idx
            } else {
                false
            }
        };

                t if t.is_primitive_value() => {
                    if let Some(Container::Array(current)) = self.structure.last() {
                        return current == &self.idx;
                    }
                    false
                }
                _ => false,
        use IndexState::*;
        let token_matches = match (self.state, token) {
            (None, RawToken::ArrayStart | RawToken::ObjectStart) if matched_index() => {
                self.state = StructuralMatch;
                self.container_count += 1;
                true
            }
            (None, RawToken::ArrayStart) => {
                self.structure.push(Container::Array(0));
                false
            }
            (None, RawToken::ObjectStart) => {
                self.structure.push(Container::Object);
                false
            }
            (None, RawToken::ObjectEnd | RawToken::ArrayEnd) => {
                self.structure.pop();
                false
            }
        } else {
            match token {
                RawToken::ArrayStart | RawToken::ObjectStart => {
                    self.container_count += 1;
                    true
            (None, RawToken::Comma) => {
                if let Some(Container::Array(current)) = self.structure.last_mut() {
                    *current += 1;
                }
                RawToken::ArrayEnd | RawToken::ObjectEnd => {
                    self.container_count -= 1;

                    if self.container_count == 0 {
                        self.matched_structural_token = false;
                    }
                false
            }
            (None, t) if t.is_primitive_value() && matched_index() => true,
            (None, _) => false,

                    true
            (StructuralMatch, RawToken::ArrayStart | RawToken::ObjectStart) => {
                self.container_count += 1;
                true
            }
            (StructuralMatch, RawToken::ArrayEnd | RawToken::ObjectEnd) => {
                self.container_count -= 1;
                if self.container_count == 0 {
                    self.state = None;
                }

                _ => true,
                true
            }
        }
            (StructuralMatch, _) => true,
        };

        token_matches
    }
}



@@ 689,6 648,7 @@ pub struct Depth {

impl Mask for Depth {
    fn match_token(&mut self, token: &RawToken) -> bool {
        // Brackets and braces "straddle" depths, so handle them differently
        match token {
            RawToken::ArrayStart | RawToken::ObjectStart => {
                self.current_depth += 1;