~asayers/inukshuk

9b973131e38aa016247417b49002e9dccecb281f — Alex Sayers 5 years ago 46ec7a2 optimizer
WIP minimize
2 files changed, 263 insertions(+), 27 deletions(-)

M src/main.rs
A src/minimize.rs
M src/main.rs => src/main.rs +11 -27
@@ 1,4 1,5 @@
pub mod flat_vec;
pub mod minimize;

use bytesize::ByteSize;
use chrono::*;


@@ 7,6 8,7 @@ use humantime::format_duration;
use json_patch::*;
use listenfd::ListenFd;
use log::*;
use minimize::*;
use querystring::querify;
use serde_derive::*;
use serde_json::{json, Value};


@@ 237,8 239,8 @@ fn handle_diff(
struct Item {
    path: PathBuf,
    file: BufReader<File>,
    forward_patches: FlatVec<PatchOperation>,
    reverse_patches: FlatVec<PatchOperation>,
    forward_patches: FlatVec<PatchOp>,
    reverse_patches: FlatVec<PatchOp>,
    timestamps: Vec<Option<DateTime<Utc>>>,
    head_val: Value,
    buf: String,


@@ 268,13 270,9 @@ impl Item {
                return Ok(());
            }
            let line = serde_json::from_str::<Line>(&self.buf).unwrap();
            let mut p = line.patch;
            let old_head = self.head_val.clone();
            patch(&mut self.head_val, &p).map_err(|e| Error::BadPatch(e))?;
            // FIXME: This is a naive implementation and can surely be improved in
            // terms of performance.
            let r = diff(&self.head_val, &old_head);
            self.forward_patches.push(&p);
            let (f, mut r) = patch_and_compute(line.patch, &mut self.head_val);
            r.reverse();
            self.forward_patches.push(&f);
            self.reverse_patches.push(&r);
            self.timestamps.push(line.ts);
        }


@@ 291,20 289,18 @@ impl Item {
                Ordering::Equal => Vec::new(),
                Ordering::Greater => {
                    let ps = self.forward_patches.get(from, to).unwrap();
                    let mut p = Vec::new();
                    p.extend_from_slice(ps);
                    p
                    ps.iter().map(|op| op.clone().into()).collect()
                }
                Ordering::Less => {
                    let ps = self.reverse_patches.get(to, from).unwrap();
                    let mut p = Vec::new();
                    p.extend_from_slice(ps);
                    let mut p: Vec<PatchOperation> =
                        ps.iter().map(|op| op.clone().into()).collect();
                    p.reverse();
                    p
                }
            })
        };
        let patch = minimize(patch);
        // let patch = minimize(patch);
        Ok((from, to, patch))
    }



@@ 313,15 309,3 @@ impl Item {
        self.forward_patches.len()
    }
}

fn minimize(patch: Patch) -> Patch {
    // * When we see an "add" or "replace" or "delete" at a path which
    //   doesn't end with '-', we can ignore all preceeding patches at or
    //   below that path.
    // * When all of the fields below a path are set within a patchset,
    //   these can be replaced by a single set.
    //
    // These optimisations are probably good enough for now.
    // TODO: Implement them
    patch
}

A src/minimize.rs => src/minimize.rs +252 -0
@@ 0,0 1,252 @@
/*! Patch optimizer

We simplify the patching model down to just two operations: "add" and "remove".
`{add, remove}` is spanning, in the sense that any patch can be expressed
as a combination of these two.  (N.B.: This reduction means that, if your
patches rely heavily on "move" or "copy" operations, the opimizer may in
fact pessimize them.)

Add and Remove have a single path, which points to the subtree which will
be modified.  In both cases, once the operation has been applied to its
target subtree, all previous modifications to that subtree are irrelevant.
This is the basis of our optimizer.

When a path refers to some key of an object, this is a stable reference:
other patches can't change its referent.  However, when a path refers to
some index of an array, this is unstable: other patches may alter the referent.
*/
use log::*;
use serde_json::{json, Value};
use std::collections::BTreeMap;

// #[derive(Default)]
// struct Foo(BTreeMap<JSONPtr, Vec<Action>>);

// impl Foo {
//     fn add(&mut self, p: Patch) {
//         for (path, op) in p {
//             match op {
//                 PatchOp::Add(_) => self.0.entry(path).or_insert(vec![]).push(op),
//                 PatchOp::Remove => self.0.entry(path).or_insert(vec![]).push(op),
//                 PatchOp::Replace(_) => self.0.entry(path).or_insert(vec![]).push(op),
//             }
//         }
//     }
// }

// impl Into<Patch> for Foo {
//     fn into(&self) -> Patch {}
// }

// pub fn minimize(patch: Patch) -> Patch {
//     // * When we see an "add" or "remove" at a path which
//     //   doesn't end with '-', we can ignore all preceeding patches at or
//     //   below that path.
//     // * When all of the fields below a path are set within a patchset,
//     //   these can be replaced by a single set.
//     //
//     // These optimisations are probably good enough for now.
//     // TODO: Implement them
//     patch
// }

#[derive(Clone, Debug)]
pub struct PatchOp {
    target: JSONPtr,
    action: Action,
}

fn ptr_get(path: &str, val: &Value) -> Option<Value> {
    JSONPtr::val(path, val).cloned()
}

pub fn patch_and_compute(p: json_patch::Patch, val: &mut Value) -> (Vec<PatchOp>, Vec<PatchOp>) {
    let mut f = Vec::new();
    let mut r = Vec::new();
    for op in p.0 {
        let forward_ops = PatchOp::new(&op, &val);
        let reverse_ops = PatchOp::new_inverse(&op, &val);
        json_patch::patch(val, &json_patch::Patch(vec![op])).unwrap();
        f.extend(forward_ops);
        r.extend(reverse_ops);
    }
    (f, r)
}

impl PatchOp {
    fn new(p: &json_patch::PatchOperation, val: &Value) -> Vec<PatchOp> {
        match p {
            json_patch::PatchOperation::Add(op) => vec![PatchOp {
                target: JSONPtr::new(&op.path, val),
                action: Action::Add(op.value.clone()),
            }],
            json_patch::PatchOperation::Remove(op) => vec![PatchOp {
                target: JSONPtr::new(&op.path, val),
                action: Action::Remove,
            }],
            json_patch::PatchOperation::Replace(op) => vec![PatchOp {
                target: JSONPtr::new(&op.path, val),
                action: Action::Add(op.value.clone()),
            }],
            json_patch::PatchOperation::Copy(op) => vec![PatchOp {
                target: JSONPtr::new(&op.path, val),
                action: Action::Add(ptr_get(&op.from, val).unwrap()),
            }],
            json_patch::PatchOperation::Move(op) => vec![
                PatchOp {
                    target: JSONPtr::new(&op.path, val),
                    action: Action::Add(ptr_get(&op.from, val).unwrap()),
                },
                PatchOp {
                    target: JSONPtr::new(&op.from, val),
                    action: Action::Remove,
                },
            ],
            json_patch::PatchOperation::Test(_) => {
                error!("test operations are not supported yet");
                vec![]
            }
        }
    }

    fn new_inverse(p: &json_patch::PatchOperation, val: &Value) -> Vec<PatchOp> {
        match p {
            json_patch::PatchOperation::Add(op) => match ptr_get(&op.path, val) {
                None => vec![PatchOp {
                    target: JSONPtr::new(&op.path, val),
                    action: Action::Remove,
                }],
                Some(oldval) => vec![PatchOp {
                    target: JSONPtr::new(&op.path, val),
                    action: Action::Add(oldval), /* really, Replace */
                }],
            },
            json_patch::PatchOperation::Remove(op) => vec![PatchOp {
                target: JSONPtr::new(&op.path, val),
                action: Action::Add(ptr_get(&op.path, val).unwrap()),
            }],
            json_patch::PatchOperation::Replace(op) => vec![PatchOp {
                target: JSONPtr::new(&op.path, val),
                action: Action::Add(ptr_get(&op.path, val).unwrap()),
            }],
            json_patch::PatchOperation::Copy(op) => match ptr_get(&op.path, val) {
                None => vec![PatchOp {
                    target: JSONPtr::new(&op.path, val),
                    action: Action::Remove,
                }],
                Some(oldval) => vec![PatchOp {
                    target: JSONPtr::new(&op.path, val),
                    action: Action::Add(oldval), /* really, Replace */
                }],
            },
            json_patch::PatchOperation::Move(op) => match ptr_get(&op.path, val) {
                None => vec![
                    PatchOp {
                        target: JSONPtr::new(&op.from, val),
                        action: Action::Add(ptr_get(&op.from, val).unwrap()),
                    },
                    PatchOp {
                        target: JSONPtr::new(&op.path, val),
                        action: Action::Remove,
                    },
                ],
                Some(oldval) => vec![
                    PatchOp {
                        target: JSONPtr::new(&op.from, val),
                        action: Action::Add(ptr_get(&op.from, val).unwrap()),
                    },
                    PatchOp {
                        target: JSONPtr::new(&op.path, val),
                        action: Action::Add(oldval), /* really, Replace */
                    },
                ],
            },
            json_patch::PatchOperation::Test(_) => {
                error!("test operations are not supported yet");
                vec![]
            }
        }
    }
}

impl Into<json_patch::PatchOperation> for PatchOp {
    fn into(self) -> json_patch::PatchOperation {
        match self.action {
            Action::Add(val) => json_patch::PatchOperation::Add(json_patch::AddOperation {
                path: self.target.to_string(),
                value: val,
            }),
            Action::Remove => json_patch::PatchOperation::Remove(json_patch::RemoveOperation {
                path: self.target.to_string(),
            }),
        }
    }
}

#[derive(Clone, Debug)]
enum Action {
    Add(Value),
    Remove,
    // Replace is gone, because it's basically the same as Add (could bring it back later...)
    // Move is gone, because the optimizer can't handle patches which affect mutliple paths.
    // Copy is gone too, since it can't be optimized into an Add.
    // Test is gone, because... well maybe there's no good reason.  TODO: think about Test.
}

#[derive(Clone, Debug)]
struct JSONPtr(Vec<PtrSeg>);

#[derive(Clone, Debug)]
enum PtrSeg {
    ObjKey(String), // FIXME: Replace with &'a str
    ArrIdx(usize),
}

impl JSONPtr {
    fn new(path: &str, value: &Value) -> JSONPtr {
        JSONPtr::new_with_val(path, value).0
    }
    fn val<'a>(path: &str, value: &'a Value) -> Option<&'a Value> {
        JSONPtr::new_with_val(path, value).1
    }
    fn new_with_val<'a>(path: &str, value: &'a Value) -> (JSONPtr, Option<&'a Value>) {
        let mut ret = Vec::new();
        let mut value = Some(value);
        // FIXME: What about paths that don't start with a '/'?
        for seg in path.split('/').skip(1) {
            // TODO: unescape ~0 and ~1
            match value {
                Some(Value::Object(obj)) => {
                    value = obj.get(seg);
                    ret.push(PtrSeg::ObjKey(seg.into()));
                }
                Some(Value::Array(arr)) => {
                    if seg == "-" {
                        value = None;
                        ret.push(PtrSeg::ArrIdx(arr.len()));
                    } else {
                        let idx: usize = seg.parse().unwrap();
                        value = Some(&arr[idx]);
                        ret.push(PtrSeg::ArrIdx(idx));
                    }
                }
                _ => value = None,
            }
        }
        (JSONPtr(ret), value)
    }
}

impl ToString for JSONPtr {
    fn to_string(&self) -> String {
        let mut ret = String::new();
        for seg in &self.0 {
            ret.push('/');
            match seg {
                PtrSeg::ObjKey(x) => ret.push_str(&x),
                PtrSeg::ArrIdx(x) => ret.push_str(&x.to_string()),
            }
        }
        ret
    }
}