~mht/cra

175154e06c774bfb6c2010c44a9b764b43eacf1f — Martin Hafskjold Thoresen 5 months ago 4596b31
Begin on rewriting stats and stats output.
6 files changed, 636 insertions(+), 437 deletions(-)

M cra/Cargo.toml
A cra/README.md
A cra/src/log.rs
M cra/src/main.rs
M cra/src/output.rs
M cra/src/persistence.rs
M cra/Cargo.toml => cra/Cargo.toml +3 -0
@@ 7,3 7,6 @@ description = "blablabla"
[dependencies]
time = "0.1.42"
clap = "2.33"
lazy_static = "1.4"
serde = { version = "1.0.104", features = ["derive"] }
serde_json = "1.0.48"

A cra/README.md => cra/README.md +73 -0
@@ 0,0 1,73 @@
# CRA

This program implements the matrix reduction algorithm for computing the persistence of a complex, in two variants: regular and exhaustive.



## Notes on the algorithm

The implementation of the algorithms is not really aligned all that well with the common descriptions of it, namely as a matrix reduction algorithm.
Instead, we think of the algorithm as working with sets, which corresponds to the boundary of simplices, or matrix columns in the traditional description.
Modulo 2 addition of matrix columns becomes symmetric difference of the sets.

Here's pseudocode that reflects how the algorithms are implemented.

```
reduced = [0; length(simplices)]
for s in simplices
	if s is empty
		continue
	end
	m = max(s)
	if reduced[m] is empty
		reduced[m] = simplex
		continue
	else
		s = symdiff(s, reduced[m])
	end
end
```



## Usage

Run with `--help` to get information on the arguments the program takes.

```sh
cargo run -q -- -help
```
or 
```sh
# this once:
cargo build --release
./target/release/cra --help
```



This will run with `debug` output, which right now prints a comma separated table of which simplices are added to which.
To display this nicely in the terminal, you can pipe it to `column` as follows (only tested on Linux):

```sh
cargo run -- --boundary data/tet.boundary -d | column -ts';'
j   faces             adds          reduced
0   []                []            [0]
0   [0]               []            [0]
1   [0]               [0]           []
2   [0]               [0]           []
3   [0]               [0]           []
5   [1, 2]            []            [1, 2]
6   [1, 3]            []            [1, 3]
7   [1, 4]            []            [1, 4]
8   [2, 3]            [6, 5]        []
9   [2, 4]            [7, 5]        []
10  [3, 4]            [7, 6]        []
11  [5, 6, 8]         []            [5, 6, 8]
12  [5, 7, 9]         []            [5, 7, 9]
13  [6, 7, 10]        []            [6, 7, 10]
14  [8, 9, 10]        [13, 12, 11]  []
15  [11, 12, 13, 14]  []            [11, 12, 13, 14]
```



A cra/src/log.rs => cra/src/log.rs +200 -0
@@ 0,0 1,200 @@
//! This module contains a log of all actions that happens during the run of the algorithms.
//! It is meant to be used by constructing an `Entry`, and operate on it during the course of the algorithm.

use std::io::Write;
use std::collections::HashMap;

use crate::serde::Serialize;

use crate::persistence::Simplex;

/// Stats for a single `simplex_reduce` call.
#[derive(Debug, Copy, Clone)]
pub struct Add {
    /// The dimension of the simplices added together
    pub dim: isize,
    /// The size of the boundary of the simplex that is being reduced.
    pub this_size: usize,
    /// The size of the boundary of the other simplex.
    pub other_size: usize,
    /// This is true when this add was an exhaustive add, namely that low(this) \notin other.
    pub exhaustive: bool,
    /// The ID of the reduced simplex
   	this_j: usize,
   	/// The ID of the other simplex
   	other_j: usize,
}

/// Stats for reducting one simplex.
#[derive(Debug)]
pub struct SimplexReduction {
    /// All adds before the simplex was reduced
    pub adds: Vec<Add>,
    /// This simplex's lowest boundary element after reduced, if any.
    pub place_index: Option<usize>,
    /// Dimension of the simplex
    pub dim: isize,
}

/// Stats for a whole run of the reduction algorithm.
#[derive(Debug)]
pub struct Reduction {
	pub reductions: Vec<SimplexReduction>
}

/// This is the full data that we output.
#[derive(Debug)]
#[derive(Serialize)]
pub struct Aggregate {
    /// Simplex IDs of all additions.
    adds: Vec<(usize, usize)>,
    /// The average number of additions required to reduce a simplex, by dimension.
    avg_adds_required: HashMap<isize, f64>,

	/// The size of a simplex when we add them to reduce the first.
    add_size_by_dim: HashMap<isize, Vec<(usize, usize)>>,
    /// Average of the sizes.
    avg_add_size_by_dim: HashMap<isize, (f64, f64)>,
    /// Histogram of the sizes
    histogram_add_size_by_dim: HashMap<isize, HashMap<usize, (usize, usize)>>,

	/// The sum of sizes of simplices when we reduce
    add_cost_by_dim: HashMap<isize, Vec<usize>>,
    avg_add_cost_by_dim: HashMap<isize, f64>,
    histogram_add_cost_by_dim: HashMap<isize, HashMap<usize, usize>>,
}

impl Reduction {
    pub fn new() -> Self {
        Self {
            reductions: Vec::new()
        }
    }

    pub fn add(&mut self, s: SimplexReduction) {
        self.reductions.push(s);
    }

    pub fn aggregate(&self) -> Aggregate {
        let mut adds = Vec::new();
        for r in self.reductions.iter() {
            for a in r.adds.iter() {
                adds.push((a.this_j, a.other_j));
            }
        }

        let mut avg_adds_required = HashMap::new();
        let mut count = HashMap::new();
        for r in self.reductions.iter() {
            *avg_adds_required.entry(r.dim).or_insert(0.0) += r.adds.len() as f64;
            *count.entry(r.dim).or_insert(0) += 1;
        }
        for (k, &c) in count.iter() {
            *avg_adds_required.get_mut(k).unwrap() /= c as f64;
        }


        let mut add_size_by_dim = HashMap::<isize, Vec<(usize, usize)>>::new();
        for r in self.reductions.iter() {
            if r.adds.len() == 0 { continue; }
            let mut v = add_size_by_dim.entry(r.dim).or_insert(Vec::new());
            for a in r.adds.iter() {
                v.push((a.this_size, a.other_size));
            }
        }

        let mut avg_add_size_by_dim = HashMap::new();
        for (&dim, adds) in add_size_by_dim.iter() {
            if adds.len() == 0 { continue; }
            let avg_f = adds.iter().map(|(f, s)| *f).sum::<usize>() as f64 / adds.len() as f64;
            let avg_s = adds.iter().map(|(f, s)| *s).sum::<usize>() as f64 / adds.len() as f64;
            avg_add_size_by_dim.insert(dim, (avg_f, avg_s));
       	}

       	let mut histogram_add_size_by_dim = HashMap::new();
        for r in self.reductions.iter() {
            if r.adds.len() == 0 { continue; }
            let mut histogram: &mut HashMap<usize, (usize, usize)> = histogram_add_size_by_dim.entry(r.dim).or_insert(HashMap::new());
            for add in r.adds.iter() {
                histogram.entry(add.this_size).or_insert((0, 0)).0 += 1;
                histogram.entry(add.other_size).or_insert((0, 0)).1 += 1;
            }
       	}

        let mut add_cost_by_dim = HashMap::<isize, Vec<usize>>::new();
        for r in self.reductions.iter() {
            if r.adds.len() == 0 { continue; }
            let mut v = add_cost_by_dim.entry(r.dim).or_insert(Vec::new());
            for a in r.adds.iter() {
                v.push(a.this_size + a.other_size);
            }
        }

        let mut avg_add_cost_by_dim = HashMap::new();
        for (&dim, adds) in add_cost_by_dim.iter() {
            if adds.len() == 0 { continue; }
            let avg = adds.iter().map(|f| *f).sum::<usize>() as f64 / adds.len() as f64;
            avg_add_cost_by_dim.insert(dim, avg);
       	}

       	let mut histogram_add_cost_by_dim = HashMap::new();
        for r in self.reductions.iter() {
            if r.adds.len() == 0 { continue; }
            let mut histogram: &mut HashMap<usize, usize> = histogram_add_cost_by_dim.entry(r.dim).or_insert(HashMap::new());
            for add in r.adds.iter() {
                *histogram.entry(add.this_size + add.other_size).or_insert(0) += 1;
            }
       	}


        Aggregate {
            adds,
            avg_adds_required,
            add_size_by_dim,
            avg_add_size_by_dim,
            histogram_add_size_by_dim,
            add_cost_by_dim,
            avg_add_cost_by_dim,
            histogram_add_cost_by_dim,
        }
    }
}

impl Add {
    pub fn regular(this: &Simplex, other: &Simplex) -> Self {
		Self {
    		dim: this.dim,
    		this_size: this.faces.len(),
    		other_size: other.faces.len(),
    		exhaustive: false,
    		this_j: this.j,
    		other_j: other.j,
		}
    }

    pub fn exhaustive(this: &Simplex, other: &Simplex) -> Self {
		Self {
    		dim: this.dim,
    		this_size: this.faces.len(),
    		other_size: other.faces.len(),
    		exhaustive: true,
    		this_j: this.j,
    		other_j: other.j,
		}
    }
}

impl SimplexReduction {
    pub fn new(s: &Simplex) -> Self {
        Self {
            dim: s.dim,
            adds: Vec::new(),
            place_index: None,
        }
    }

	pub fn add(self: &mut Self, add: Add) {
    	self.adds.push(add)
	}
}


M cra/src/main.rs => cra/src/main.rs +33 -123
@@ 1,10 1,15 @@
extern crate clap;
extern crate lazy_static;
extern crate serde;
extern crate serde_json;

// std
use std::fs::File;
use std::io::{BufRead, BufReader, Read, Write};
use std::path::Path;

// mods
mod log;
mod output;
mod persistence;



@@ 14,117 19,6 @@ use persistence::{compute_alpha_values_2d, reduce, Persistence, Simplex};

// consts
const R_CUTOFF: f64 = 250.0;
// TODO: remove this
const MAX_DIM: usize = 10;

/// All the statistics we want to measure for performance reasoning.
#[derive(Debug)]
pub struct Statistics {
    /// Count the number of times we add two simplices
    col_adds: usize,
    /// Count the number of times we add two simplices of a specific dimension
    col_add_dimens: Vec<usize>,
    /// Count the sizes of the simplices we add
    add_size: Vec<(usize, usize)>,
    /// Count the sum of the sizes.
    /// TODO: this is redundant
    add_size_sum: Vec<usize>,
    /// Count the number of iterations for reducing one simplex
    num_iters: Vec<usize>,
    /// Count the number of extra reductions we do when the exhaustive variant is ran.
    ex_reductions: Vec<usize>,
    ex_searches: Vec<usize>,
    /// Count the number of simplices that are reduced to the empty set.
    zeroed: usize,
    /// Count the number of simplices that are reduced to the empty set, but for each dimension.
    zeroed_d: [usize; MAX_DIM],
    placed_single: usize,
    placed_full: usize,
    placed_full_d: [usize; MAX_DIM],
    simplex_with_low_queries: usize,

    // Number of simlpices in all dimensions
    num_of_dimens: [usize; MAX_DIM],
    skip_d: [usize; MAX_DIM],
    // Record the wall-clock time spent.
    time: u64,
}

impl Statistics {
    pub fn new() -> Self {
        Self {
            col_adds: 0,
            col_add_dimens: Vec::new(),
            add_size: Vec::new(),
            add_size_sum: Vec::new(),
            num_iters: Vec::new(),
            ex_reductions: Vec::new(),
            ex_searches: Vec::new(),
            zeroed: 0,
            zeroed_d: [0; MAX_DIM],
            placed_single: 0,
            placed_full: 0,
            placed_full_d: [0; MAX_DIM],
            simplex_with_low_queries: 0,

            num_of_dimens: [0; MAX_DIM],
            skip_d: [0; MAX_DIM],

            time: 0,
        }
    }

    /// Record that there was a column addition, and the dimension of the simplices that were added
    /// together.
    pub fn col_add(&mut self, dim: usize) {
        self.col_adds += 1;
        self.col_add_dimens.push(dim);
    }

    /// Record the number of iterations we needed to find a index containing a low in another
    /// column, while in the exhaustive variant.
    pub fn ex_search(&mut self, c: usize) {
        self.ex_searches.push(c);
    }

    /// Record the number of iterations we needed to exhaustively reduce a column after its `low`
    /// was found.
    pub fn ex_reductions(&mut self, c: usize) {
        self.ex_reductions.push(c);
    }

    /// Record the number of times a reduction ended up removing the simplex.
    pub fn zeroed(&mut self, d: usize) {
        self.zeroed += 1;
        self.zeroed_d[d] += 1;
    }

    /// Record the number of times a reduction ended up with storing a list of indices.
    pub fn placed_full(&mut self, d: usize) {
        self.placed_full += 1;
        self.placed_full_d[d] += 1;
    }

    /// Record the number of times we looked for another column with the same low, and added it to
    /// the current column. This is at least 1.
    pub fn reductions(&mut self, c: usize) {
        self.num_iters.push(c);
    }
    /// Record the sizes of the columns that are summed together
    pub fn add_sizes(&mut self, a: usize, b: usize) {
        self.add_size.push((a, b));
        self.add_size_sum.push(a + b);
    }

    pub fn skip(&mut self, d: usize) {
        self.skip_d[d] += 1;
    }

    /// Record the nubmer of times we're queried the `simplex_with_low` array.
    pub fn swl(&mut self) {
        self.simplex_with_low_queries += 1;
    }
}

#[derive(Copy, Clone, Debug)]
pub struct Point(pub [f64; 2]);


@@ 202,6 96,7 @@ pub fn read_boundary<R: Read>(r: R) -> Result<Vec<Simplex>, Box<dyn std::error::
        simplices.push(Simplex {
            j: i,
            faces: vec![0],
            dim: 0,
        });
    }



@@ 222,7 117,8 @@ pub fn read_boundary<R: Read>(r: R) -> Result<Vec<Simplex>, Box<dyn std::error::
        for f in line.split_whitespace() {
            v.push(f.parse::<usize>()?);
        }
        simplices.push(Simplex { j, faces: v });
        let dim = v.len() as isize - 1;
        simplices.push(Simplex { j, faces: v, dim });
        j += 1;
    }



@@ 340,7 236,7 @@ pub fn read_triangulation<R: Read>(
                [face[2] + 1, face[0] + 1],
            ];
            let mut buf = [0usize; 3];
            for (i, &edge) in current_edges.into_iter().enumerate() {
            for (i, &edge) in current_edges.iter().enumerate() {
                let edge_i = *edge_to_index.entry(sorted_a2(edge)).or_insert_with(|| {
                    let index = simplices.len();
                    simplices.push(edge.to_vec());


@@ 357,7 253,10 @@ pub fn read_triangulation<R: Read>(
    let simplices = simplices
        .into_iter()
        .enumerate()
        .map(|(j, v)| Simplex { j, faces: v })
        .map(|(j, v)| {
            let dim = v.len() as isize - 1;
            Simplex { j, faces: v, dim }
        })
        .collect::<Vec<_>>();

    Ok((points, simplices))


@@ 402,14 301,13 @@ fn main() {
        (@arg diagram: --diagram[FILE] "Use `gnuplot` to print out the persistence diagram.")
        (@arg textable: --("tex-table")[FILE] "Output a `tex` formatted table of statistics.")
        (@arg points: --points[FILE] "Input the location data for the 1-simplices.")
        (@arg debug: -d "print debug information to `stdout`")
        (@arg debug: -d "Print debug information to `stdout`")
    ).arg(Arg::from_usage(
            "<boundary> -b, --boundary <FILE> 'The boundary of all simplices in the complex.'",
        ).long_help("\
This file should contain the boundaries of your simplices.
The boundary of the empty complex and the vertices are implied, and so
the first boundaries in the list should be that of the 1-simplices.
"),
the first boundaries in the list should be that of the 1-simplices.\n "),
    ).get_matches();

    let debug = matches.is_present("debug");


@@ 442,14 340,23 @@ the first boundaries in the list should be that of the 1-simplices.
        alphas: alphas.unwrap_or(Vec::new()),
    };

    let mut stats = Statistics::new();

    let variant = if matches.is_present("exhaustive") {
        Exhaustive
    } else {
        Standard
    };
    let reduction = reduce(&persistence, variant, &mut stats);
    let reduction = reduce(&persistence, variant);



	// println!("{:?}", reduction);
	let aggr = reduction.log.aggregate();
    // println!("{:?}", aggr);
    println!("{}", serde_json::to_string(&aggr).unwrap());
    return;




    if debug {
        let mut out = std::io::stdout();


@@ 460,13 367,16 @@ the first boundaries in the list should be that of the 1-simplices.

    if let Some(path) = matches.value_of("stats") {
        let mut stats_file = open_file_or_stdout(path).unwrap();
        write!(stats_file, "# Standard\n").unwrap();
        output::stats(&stats, &mut stats_file).unwrap();
        write!(stats_file, "# STATISTICS\n").unwrap();
        // TODO: FIXME
        // output::stats(&stats, &mut stats_file).unwrap();
        write!(stats_file, "# STATISTICS end\n").unwrap();
    }

    if let Some(path) = matches.value_of("textable") {
        let mut tex_file = File::create(Path::new(path)).unwrap();
        output::tex_table(&stats, &stats, &mut tex_file).unwrap();
        // TODO: FIXME
        // output::tex_table(&stats, &stats, &mut tex_file).unwrap();
    }

    if let Some(path) = matches.value_of("graphviz") {

M cra/src/output.rs => cra/src/output.rs +307 -308
@@ 4,8 4,8 @@ use std::io::Write;
use std::path::Path;
use std::process::{Command, Stdio};

use crate::{Point, Statistics, MAX_DIM, R_CUTOFF};
use persistence::{Persistence, Reduction, Simplex};
use crate::{Point, R_CUTOFF};
use persistence::{Persistence, Reduction};

/// Format the given `usize` to a `String` with underscores in between each third digit.
///


@@ 390,225 390,225 @@ plot '{}' using 1:2 pt 7 ps 0.25 title "pairs", x"#,
// }

/// Output a `tex` formatted table of statistics to the `writer`.
pub fn tex_table<W: Write>(
    reg: &Statistics,
    exh: &Statistics,
    writer: &mut W,
) -> Result<(), std::io::Error> {
    fn sum(s: &[usize]) -> f64 {
        s.iter().sum::<usize>() as f64
    }

    fn avg(s: &[usize]) -> f64 {
        sum(s) / s.len() as f64
    }

    fn percent(hundred: f64, ours: f64) -> String {
        if hundred == 0.0 {
            format!("")
        } else {
            format!("{:.2}\\%", 100.0 * ours / hundred)
        }
    }

    let header_align = std::iter::repeat("r l")
        .take(MAX_DIM)
        .collect::<Vec<&str>>()
        .join(" ");
    let header_top = (0..MAX_DIM)
        .map(|d| format!("\\multicolumn{{2}}{{c}}{{$d={}$}} ", d))
        .collect::<Vec<_>>()
        .join(" & ");
    write!(
        writer,
        r#"\begin{{tabular}}{{r r {}}}
  Name & Total & {} \\
  \toprule
"#,
        header_align, header_top
    )?;

    let total = reg.num_of_dimens.iter().sum::<usize>();
    let mut line = format!("Simplices & {}", total);
    for &n in reg.num_of_dimens.iter() {
        line.push_str(&format!(
            "& \\hspace{{0.5cm}}{} & {}",
            thousand_split(n),
            percent(total as f64, n as f64)
        ));
    }
    writeln!(writer, "{}\\\\", line)?;

    let total = reg.skip_d.iter().sum::<usize>();
    let mut line = format!("Skipped & {}", total);
    for &n in reg.skip_d.iter() {
        line.push_str(&format!(
            "& {} & {}",
            thousand_split(n),
            percent(total as f64, n as f64)
        ));
    }
    writeln!(writer, "{}\\\\", line)?;

    let total = reg.zeroed_d.iter().sum::<usize>();
    let mut line = format!("Zeroed & {}", total);
    for &n in reg.zeroed_d.iter() {
        line.push_str(&format!(
            "& {} & {}",
            thousand_split(n),
            percent(total as f64, n as f64)
        ));
    }
    writeln!(writer, "{}\\\\", line)?;

    let total = reg.placed_full_d.iter().sum::<usize>();
    let mut line = format!("Stored & {}", total);
    for &n in reg.placed_full_d.iter() {
        line.push_str(&format!(
            "& {} & {}",
            thousand_split(n),
            percent(total as f64, n as f64)
        ));
    }
    writeln!(writer, "{}\\\\", line)?;

    writeln!(writer, "\\end{{tabular}}\n")?;
    writeln!(writer, "\\vspace{{1cm}}")?;

    let mut v: Vec<(String, String, String, String)> = Vec::new();

    v.push((
        format!("Column additions"),
        thousand_split(reg.col_adds),
        thousand_split(exh.col_adds),
        percent(reg.col_adds as f64, exh.col_adds as f64),
    ));

    v.push((
        format!("Estimate \\texttt{{column\\_add}} cost"),
        thousand_split(sum(&reg.add_size_sum) as usize),
        thousand_split(sum(&exh.add_size_sum) as usize),
        percent(sum(&reg.add_size_sum), sum(&exh.add_size_sum)),
    ));

    v.push((
        format!("\\texttt{{simplex\\_with\\_low}} queries"),
        thousand_split(reg.simplex_with_low_queries),
        thousand_split(exh.simplex_with_low_queries),
        percent(
            reg.simplex_with_low_queries as f64,
            exh.simplex_with_low_queries as f64,
        ),
    ));

    v.push((
        format!("Time spent (ns)"),
        thousand_split(reg.time as usize),
        thousand_split(exh.time as usize),
        percent(reg.time as f64, exh.time as f64),
    ));

    let as0 = |s: &[(usize, usize)]| s.iter().map(|(a, _)| a).cloned().collect::<Vec<_>>();
    v.push((
        format!("Size of first operand"),
        format!("{:.4}", avg(&as0(&reg.add_size))),
        format!("{:.4}", avg(&as0(&exh.add_size))),
        percent(avg(&as0(&reg.add_size)), avg(&as0(&exh.add_size))),
    ));

    let as1 = |s: &[(usize, usize)]| s.iter().map(|(_, b)| b).cloned().collect::<Vec<_>>();
    v.push((
        format!("Size of second operand"),
        format!("{:.4}", avg(&as1(&reg.add_size))),
        format!("{:.4}", avg(&as1(&exh.add_size))),
        percent(avg(&as1(&reg.add_size)), avg(&as1(&exh.add_size))),
    ));

    v.push((
        format!("Iters for reducing a column"),
        format!("{:.4}", avg(&reg.num_iters)),
        format!("{:.4}", avg(&exh.num_iters)),
        percent(avg(&reg.num_iters), avg(&exh.num_iters)),
    ));

    v.push((
        format!("Cost of one column addition"),
        format!("{:.4}", avg(&reg.add_size_sum)),
        format!("{:.4}", avg(&exh.add_size_sum)),
        percent(avg(&reg.add_size_sum), avg(&exh.add_size_sum)),
    ));

    v.push((
        format!("Iters to exh.\\ reduce a col"),
        format!(""),
        format!("{:.4}", avg(&exh.ex_reductions)),
        format!(""),
    ));

    v.push((
        format!("Searches to find \\texttt{{j=low(k)}}"),
        format!(""),
        format!("{:.4}", avg(&exh.ex_searches)),
        format!(""),
    ));

    // TODO: 4 could be bigger!
    let mut reg_counts = [0; 4];
    for &d in &reg.col_add_dimens {
        reg_counts[(d + 1) as usize] += 1;
    }
    let mut exh_counts = [0; 4];
    for &d in &exh.col_add_dimens {
        exh_counts[(d + 1) as usize] += 1;
    }

    for (d1, (&r, &e)) in reg_counts.iter().zip(exh_counts.iter()).enumerate().skip(1) {
        v.push((
            format!("Column adds with $d={}$", d1 as isize - 1),
            thousand_split(r),
            thousand_split(e),
            percent(r as f64, e as f64),
        ));
    }

    write!(
        writer,
        r#"\begin{{tabular}}{{r r r r}}
  Counter & Regular & Exhausitve & Difference \\
  \toprule
"#
    )?;
    for (l, a, b, p) in v {
        write!(writer, "  {} & {} & {} & {}\\\\\n", l, a, b, p)?;
    }
    writeln!(writer, "\\end{{tabular}}")?;

    Ok(())
}
// pub fn tex_table<W: Write>(
//     reg: &Statistics,
//     exh: &Statistics,
//     writer: &mut W,
// ) -> Result<(), std::io::Error> {
//     fn sum(s: &[usize]) -> f64 {
//         s.iter().sum::<usize>() as f64
//     }
// 
//     fn avg(s: &[usize]) -> f64 {
//         sum(s) / s.len() as f64
//     }
// 
//     fn percent(hundred: f64, ours: f64) -> String {
//         if hundred == 0.0 {
//             format!("")
//         } else {
//             format!("{:.2}\\%", 100.0 * ours / hundred)
//         }
//     }
// 
//     let header_align = std::iter::repeat("r l")
//         .take(MAX_DIM)
//         .collect::<Vec<&str>>()
//         .join(" ");
//     let header_top = (0..MAX_DIM)
//         .map(|d| format!("\\multicolumn{{2}}{{c}}{{$d={}$}} ", d))
//         .collect::<Vec<_>>()
//         .join(" & ");
//     write!(
//         writer,
//         r#"\begin{{tabular}}{{r r {}}}
//   Name & Total & {} \\
//   \toprule
// "#,
//         header_align, header_top
//     )?;
// 
//     let total = reg.num_of_dimens.iter().sum::<usize>();
//     let mut line = format!("Simplices & {}", total);
//     for &n in reg.num_of_dimens.iter() {
//         line.push_str(&format!(
//             "& \\hspace{{0.5cm}}{} & {}",
//             thousand_split(n),
//             percent(total as f64, n as f64)
//         ));
//     }
//     writeln!(writer, "{}\\\\", line)?;
// 
//     let total = reg.skip_d.iter().sum::<usize>();
//     let mut line = format!("Skipped & {}", total);
//     for &n in reg.skip_d.iter() {
//         line.push_str(&format!(
//             "& {} & {}",
//             thousand_split(n),
//             percent(total as f64, n as f64)
//         ));
//     }
//     writeln!(writer, "{}\\\\", line)?;
// 
//     let total = reg.zeroed_d.iter().sum::<usize>();
//     let mut line = format!("Zeroed & {}", total);
//     for &n in reg.zeroed_d.iter() {
//         line.push_str(&format!(
//             "& {} & {}",
//             thousand_split(n),
//             percent(total as f64, n as f64)
//         ));
//     }
//     writeln!(writer, "{}\\\\", line)?;
// 
//     let total = reg.placed_full_d.iter().sum::<usize>();
//     let mut line = format!("Stored & {}", total);
//     for &n in reg.placed_full_d.iter() {
//         line.push_str(&format!(
//             "& {} & {}",
//             thousand_split(n),
//             percent(total as f64, n as f64)
//         ));
//     }
//     writeln!(writer, "{}\\\\", line)?;
// 
//     writeln!(writer, "\\end{{tabular}}\n")?;
//     writeln!(writer, "\\vspace{{1cm}}")?;
// 
//     let mut v: Vec<(String, String, String, String)> = Vec::new();
// 
//     v.push((
//         format!("Column additions"),
//         thousand_split(reg.col_adds),
//         thousand_split(exh.col_adds),
//         percent(reg.col_adds as f64, exh.col_adds as f64),
//     ));
// 
//     v.push((
//         format!("Estimate \\texttt{{column\\_add}} cost"),
//         thousand_split(sum(&reg.add_size_sum) as usize),
//         thousand_split(sum(&exh.add_size_sum) as usize),
//         percent(sum(&reg.add_size_sum), sum(&exh.add_size_sum)),
//     ));
// 
//     v.push((
//         format!("\\texttt{{simplex\\_with\\_low}} queries"),
//         thousand_split(reg.simplex_with_low_queries),
//         thousand_split(exh.simplex_with_low_queries),
//         percent(
//             reg.simplex_with_low_queries as f64,
//             exh.simplex_with_low_queries as f64,
//         ),
//     ));
// 
//     v.push((
//         format!("Time spent (ns)"),
//         thousand_split(reg.time as usize),
//         thousand_split(exh.time as usize),
//         percent(reg.time as f64, exh.time as f64),
//     ));
// 
//     let as0 = |s: &[(usize, usize)]| s.iter().map(|(a, _)| a).cloned().collect::<Vec<_>>();
//     v.push((
//         format!("Size of first operand"),
//         format!("{:.4}", avg(&as0(&reg.add_size))),
//         format!("{:.4}", avg(&as0(&exh.add_size))),
//         percent(avg(&as0(&reg.add_size)), avg(&as0(&exh.add_size))),
//     ));
// 
//     let as1 = |s: &[(usize, usize)]| s.iter().map(|(_, b)| b).cloned().collect::<Vec<_>>();
//     v.push((
//         format!("Size of second operand"),
//         format!("{:.4}", avg(&as1(&reg.add_size))),
//         format!("{:.4}", avg(&as1(&exh.add_size))),
//         percent(avg(&as1(&reg.add_size)), avg(&as1(&exh.add_size))),
//     ));
// 
//     v.push((
//         format!("Iters for reducing a column"),
//         format!("{:.4}", avg(&reg.num_iters)),
//         format!("{:.4}", avg(&exh.num_iters)),
//         percent(avg(&reg.num_iters), avg(&exh.num_iters)),
//     ));
// 
//     v.push((
//         format!("Cost of one column addition"),
//         format!("{:.4}", avg(&reg.add_size_sum)),
//         format!("{:.4}", avg(&exh.add_size_sum)),
//         percent(avg(&reg.add_size_sum), avg(&exh.add_size_sum)),
//     ));
// 
//     v.push((
//         format!("Iters to exh.\\ reduce a col"),
//         format!(""),
//         format!("{:.4}", avg(&exh.ex_reductions)),
//         format!(""),
//     ));
// 
//     v.push((
//         format!("Searches to find \\texttt{{j=low(k)}}"),
//         format!(""),
//         format!("{:.4}", avg(&exh.ex_searches)),
//         format!(""),
//     ));
// 
//     // TODO: 4 could be bigger!
//     let mut reg_counts = [0; 4];
//     for &d in &reg.col_add_dimens {
//         reg_counts[(d + 1) as usize] += 1;
//     }
//     let mut exh_counts = [0; 4];
//     for &d in &exh.col_add_dimens {
//         exh_counts[(d + 1) as usize] += 1;
//     }
// 
//     for (d1, (&r, &e)) in reg_counts.iter().zip(exh_counts.iter()).enumerate().skip(1) {
//         v.push((
//             format!("Column adds with $d={}$", d1 as isize - 1),
//             thousand_split(r),
//             thousand_split(e),
//             percent(r as f64, e as f64),
//         ));
//     }
// 
//     write!(
//         writer,
//         r#"\begin{{tabular}}{{r r r r}}
//   Counter & Regular & Exhausitve & Difference \\
//   \toprule
// "#
//     )?;
//     for (l, a, b, p) in v {
//         write!(writer, "  {} & {} & {} & {}\\\\\n", l, a, b, p)?;
//     }
//     writeln!(writer, "\\end{{tabular}}")?;
// 
//     Ok(())
// }

// Get the three vertices of a triangle. This isn't super trivial, since the triangle has
// three edges, and so we have to eliminate the duplicates. Still pretty straight forward.
fn get_triangle_xyz(simplex: &Simplex, simplices: &[Simplex]) -> (usize, usize, usize) {
    let xy_i = simplex.faces[0];
    let xy = &simplices[xy_i];
    let zx_i = simplex.faces[2];
    let zx = &simplices[zx_i];
    // The labels here may be messed up due to the sorting when making the edges. Get it
    // right.
    if xy.faces[0] == zx.faces[0] {
        // xy, xz
        (xy.faces[0], xy.faces[1], zx.faces[1])
    } else if xy.faces[0] == zx.faces[1] {
        // xy, zx
        (xy.faces[0], xy.faces[1], zx.faces[0])
    } else if xy.faces[1] == zx.faces[0] {
        // yx, xz
        (xy.faces[1], xy.faces[0], zx.faces[1])
    } else {
        // yx, zx
        (xy.faces[1], xy.faces[0], zx.faces[0])
    }
}
// // Get the three vertices of a triangle. This isn't super trivial, since the triangle has
// // three edges, and so we have to eliminate the duplicates. Still pretty straight forward.
// fn get_triangle_xyz(simplex: &Simplex, simplices: &[Simplex]) -> (usize, usize, usize) {
//     let xy_i = simplex.faces[0];
//     let xy = &simplices[xy_i];
//     let zx_i = simplex.faces[2];
//     let zx = &simplices[zx_i];
//     // The labels here may be messed up due to the sorting when making the edges. Get it
//     // right.
//     if xy.faces[0] == zx.faces[0] {
//         // xy, xz
//         (xy.faces[0], xy.faces[1], zx.faces[1])
//     } else if xy.faces[0] == zx.faces[1] {
//         // xy, zx
//         (xy.faces[0], xy.faces[1], zx.faces[0])
//     } else if xy.faces[1] == zx.faces[0] {
//         // yx, xz
//         (xy.faces[1], xy.faces[0], zx.faces[1])
//     } else {
//         // yx, zx
//         (xy.faces[1], xy.faces[0], zx.faces[0])
//     }
// }

/// Write out a dotfile with the directed edges given by `simplex_with_low`
pub fn graphviz<W: Write>(


@@ 617,7 617,6 @@ pub fn graphviz<W: Write>(
    exh: &Reduction,
    w: &mut W,
) -> Result<(), std::io::Error> {
    const SCALE: f64 = 5.0;
    writeln!(w, "digraph {{")?;
    writeln!(w, "  rankdir=LR;")?;



@@ 693,90 692,90 @@ pub fn run_gnuplot(script: &str, out_file: &Path) {
    f.write_all(&output.stdout)
        .expect("Failed to write to gnuplot output file");
}

pub fn stats<W: Write>(st: &Statistics, writer: &mut W) -> Result<(), std::io::Error> {
    let mut print_pairs = Vec::new();

    print_pairs.push(("Column additions", thousand_split(st.col_adds)));

    let adds = st.add_size_sum.iter().sum::<usize>() as usize;
    print_pairs.push(("Estimate total 'column_add' cost", thousand_split(adds)));

    print_pairs.push((
        "`simplex_with_low` queries",
        thousand_split(st.simplex_with_low_queries),
    ));

    print_pairs.push(("Time spent (ns)", thousand_split(st.time as usize)));

    // ~10 cycles per L2 hit seems reasonable?
    let instr_cost_estimate = st.simplex_with_low_queries * 10 + adds;
    print_pairs.push((
        "Instruction cost estimate",
        thousand_split(instr_cost_estimate),
    ));

    let add_size0 =
        st.add_size.iter().map(|(a, _)| a).sum::<usize>() as f64 / st.add_size.len() as f64;
    print_pairs.push(("Average size of first operand", format!("{}", add_size0)));

    let add_size1 =
        st.add_size.iter().map(|(_, b)| b).sum::<usize>() as f64 / st.add_size.len() as f64;
    print_pairs.push(("Average size of second operand", format!("{}", add_size1)));

    let num_iters = st.num_iters.iter().sum::<usize>() as f64 / st.num_iters.len() as f64;
    print_pairs.push((
        "Average #iterations before reducing a column",
        format!("{}", num_iters),
    ));

    let column_add_cost =
        st.add_size_sum.iter().sum::<usize>() as f64 / st.add_size_sum.len() as f64;
    print_pairs.push((
        "Average cost of one column addition",
        format!("{}", column_add_cost),
    ));

    if st.ex_reductions.len() > 0 {
        let ex_reductions =
            st.ex_reductions.iter().sum::<usize>() as f64 / st.ex_reductions.len() as f64;
        print_pairs.push((
            "Average iterations to exh. reduce a col",
            format!("{}", ex_reductions),
        ));
    }

    if st.ex_searches.len() > 0 {
        let ex_searches = st.ex_searches.iter().sum::<usize>() as f64 / st.ex_searches.len() as f64;
        print_pairs.push((
            "Average searches to find `j=low(k)`",
            format!("{}", ex_searches),
        ));
    }

    print_pairs.push(("Number of zeroed columns", thousand_split(st.zeroed)));
    print_pairs.push(("Number of single columns", thousand_split(st.placed_single)));
    print_pairs.push((
        "Number of remaining columns",
        thousand_split(st.placed_full),
    ));

    let mut last_pos = 0;
    for (i, &n) in st.num_of_dimens.iter().enumerate() {
        if n > 0 {
            last_pos = i;
        }
    }
    let strs = (0..last_pos + 1)
        .map(|d| format!("Simplices with d={}", d))
        .collect::<Vec<_>>();
    for d in 0..last_pos + 1 {
        print_pairs.push((&strs[d], thousand_split(st.num_of_dimens[d])));
    }

    let max_str = print_pairs.iter().map(|(s, _)| s.len()).max().unwrap();
    for (left, right) in print_pairs {
        write!(writer, "{:>2$}: {}\n", left, right, max_str)?;
    }
    Ok(())
}
//
// pub fn stats<W: Write>(st: &Statistics, writer: &mut W) -> Result<(), std::io::Error> {
//     let mut print_pairs = Vec::new();
// 
//     print_pairs.push(("Column additions", thousand_split(st.col_adds)));
// 
//     let adds = st.add_size_sum.iter().sum::<usize>() as usize;
//     print_pairs.push(("Estimate total 'column_add' cost", thousand_split(adds)));
// 
//     print_pairs.push((
//         "`simplex_with_low` queries",
//         thousand_split(st.simplex_with_low_queries),
//     ));
// 
//     print_pairs.push(("Time spent (ns)", thousand_split(st.time as usize)));
// 
//     // ~10 cycles per L2 hit seems reasonable?
//     let instr_cost_estimate = st.simplex_with_low_queries * 10 + adds;
//     print_pairs.push((
//         "Instruction cost estimate",
//         thousand_split(instr_cost_estimate),
//     ));
// 
//     let add_size0 =
//         st.add_size.iter().map(|(a, _)| a).sum::<usize>() as f64 / st.add_size.len() as f64;
//     print_pairs.push(("Average size of first operand", format!("{}", add_size0)));
// 
//     let add_size1 =
//         st.add_size.iter().map(|(_, b)| b).sum::<usize>() as f64 / st.add_size.len() as f64;
//     print_pairs.push(("Average size of second operand", format!("{}", add_size1)));
// 
//     let num_iters = st.num_iters.iter().sum::<usize>() as f64 / st.num_iters.len() as f64;
//     print_pairs.push((
//         "Average #iterations before reducing a column",
//         format!("{}", num_iters),
//     ));
// 
//     let column_add_cost =
//         st.add_size_sum.iter().sum::<usize>() as f64 / st.add_size_sum.len() as f64;
//     print_pairs.push((
//         "Average cost of one column addition",
//         format!("{}", column_add_cost),
//     ));
// 
//     if st.ex_reductions.len() > 0 {
//         let ex_reductions =
//             st.ex_reductions.iter().sum::<usize>() as f64 / st.ex_reductions.len() as f64;
//         print_pairs.push((
//             "Average iterations to exh. reduce a col",
//             format!("{}", ex_reductions),
//         ));
//     }
// 
//     if st.ex_searches.len() > 0 {
//         let ex_searches = st.ex_searches.iter().sum::<usize>() as f64 / st.ex_searches.len() as f64;
//         print_pairs.push((
//             "Average searches to find `j=low(k)`",
//             format!("{}", ex_searches),
//         ));
//     }
// 
//     print_pairs.push(("Number of zeroed columns", thousand_split(st.zeroed)));
//     print_pairs.push(("Number of single columns", thousand_split(st.placed_single)));
//     print_pairs.push((
//         "Number of remaining columns",
//         thousand_split(st.placed_full),
//     ));
// 
//     let mut last_pos = 0;
//     for (i, &n) in st.num_of_dimens.iter().enumerate() {
//         if n > 0 {
//             last_pos = i;
//         }
//     }
//     let strs = (0..last_pos + 1)
//         .map(|d| format!("Simplices with d={}", d))
//         .collect::<Vec<_>>();
//     for d in 0..last_pos + 1 {
//         print_pairs.push((&strs[d], thousand_split(st.num_of_dimens[d])));
//     }
// 
//     let max_str = print_pairs.iter().map(|(s, _)| s.len()).max().unwrap();
//     for (left, right) in print_pairs {
//         write!(writer, "{:>2$}: {}\n", left, right, max_str)?;
//     }
//     Ok(())
// }

M cra/src/persistence.rs => cra/src/persistence.rs +20 -6
@@ 1,8 1,8 @@
use crate::{Point, Statistics};
use crate::{log, Point};
use std::io::Write;

/// The three variants of the algorithm.
#[derive(PartialEq)]
#[derive(PartialEq, Debug, Clone, Copy)]
pub enum Variant {
    /// The starndard version terminates a simplex reduction when no other reduced simplex shares it's
    /// highest boundary simplex.


@@ 22,18 22,20 @@ pub struct Simplex {
    pub faces: Vec<usize>,
    // The ball radius when this simplex is born.
    // pub r_when_born: f64,
    pub dim: isize,
}

impl Simplex {
    /// Get the dimension of this simplex.
    pub fn dim(&self) -> isize {
        self.faces.len() as isize - 1
        self.dim
    }
    /// Make the empty simplex
    pub fn empty() -> Self {
        Simplex {
            j: 0,
            faces: Vec::new(),
            dim: -1,
        }
    }
}


@@ 60,6 62,8 @@ pub struct Reduction {
    /// Here we track which elements are added to which columns.
    /// This was probably used for the output?
    pub additions: Vec<(usize, usize)>,

    pub log: log::Reduction,
}

impl Reduction {


@@ 81,7 85,6 @@ impl Reduction {
            killed.insert(killer, victim);
        }

        let n = simplices.len();
        writeln!(w, "j;faces;adds;reduced")?;
        for s in simplices {
            let empty_vec = Vec::new();


@@ 109,10 112,13 @@ impl Reduction {

/// Run the reduction algorithm on the given `Persistence`. The `variant` is ran,
/// and stats are recorded in `stats`.
pub fn reduce(p: &Persistence, variant: Variant, _stats: &mut Statistics) -> Reduction {
pub fn reduce(p: &Persistence, variant: Variant) -> Reduction {
    let mut pairings = Vec::new();
    let mut additions = Vec::new();
    let mut reduced_with_low: Vec<Option<Simplex>> = vec![None; p.simplices.len()];

    let mut reduction_log = log::Reduction::new();

    // We're reducing each simplex.
    for simplex in &p.simplices {
        // we'll store an index here, if we don't fully reduce the simplex


@@ 124,14 130,18 @@ pub fn reduce(p: &Persistence, variant: Variant, _stats: &mut Statistics) -> Red
        let mut current_reduced = simplex.clone();
        current_reduced.faces.sort();

        let mut simplex_log = log::SimplexReduction::new(&simplex);

        while current_reduced.faces.len() > 0 {
            let low = *current_reduced.faces.last().unwrap();
            if let Some(ref other) = reduced_with_low[low] {
                additions.push((current_reduced.j, other.j));
                simplex_reduce(&mut current_reduced, other);
                simplex_log.add(log::Add::regular(&current_reduced, other));
            } else {
                place_index = Some(low);
                pairings.push((low, simplex.j));
                simplex_log.place_index = place_index;
                break;
            }
        }


@@ 148,6 158,7 @@ pub fn reduce(p: &Persistence, variant: Variant, _stats: &mut Statistics) -> Red
                if let Some(ref cand) = reduced_with_low[cand_i] {
                    additions.push((current_reduced.j, cand.j));
                    simplex_reduce(&mut current_reduced, cand);
                    simplex_log.add(log::Add::exhaustive(&current_reduced, cand));
                    n = current_reduced.faces.len();
                } else {
                    safe_simplices += 1;


@@ 161,16 172,19 @@ pub fn reduce(p: &Persistence, variant: Variant, _stats: &mut Statistics) -> Red
            );
            reduced_with_low[i] = Some(current_reduced);
        }

        reduction_log.add(simplex_log);
    }

    Reduction {
        pairings,
        simplex_with_low: reduced_with_low,
        additions,
        log: reduction_log,
    }
}

/// Reduce `this` by joinint it with `other`.
/// Reduce `this` by joining it with `other`.
fn simplex_reduce(this: &mut Simplex, other: &Simplex) {
    // TODO: This is terrible!
    let mut slow_vec = Vec::new();