~mht/cra

0ae09d4ccc2fc00e0591c85d8885f2cc724679be — Martin Hafskjold Thoresen 2 years ago a3f4797
Fix plot bug, and improve stats printing
4 files changed, 147 insertions(+), 102 deletions(-)

M cra/Cargo.toml
M cra/Makefile
M cra/generate-points.py
M cra/src/main.rs
M cra/Cargo.toml => cra/Cargo.toml +1 -0
@@ 4,3 4,4 @@ version = "0.1.0"
authors = ["Martin Hafskjold Thoresen <git@mht.technology>"]

[dependencies]
time = "0.1.42"

M cra/Makefile => cra/Makefile +1 -1
@@ 8,7 8,7 @@
	python3 svg.py < $^ > $@

%.out: %.tri target/debug/cra src/main.rs
	cargo run -- < $< | grep "pair" | sed -e "s/[^0-9 .-]//g"  > $@
	cargo run --release -- < $< | grep "pair" | sed -e "s/[^0-9 .-]//g"  > $@

%.pdf: %.out
	@echo "\

M cra/generate-points.py => cra/generate-points.py +3 -3
@@ 3,11 3,11 @@
from random import seed, random

min_x = 0
max_x = 800
max_x = 10000
min_y = 0
max_y = 800
max_y = 10000

num_points = 10000
num_points = int(1e6)

seed(0)


M cra/src/main.rs => cra/src/main.rs +142 -98
@@ 1,5 1,6 @@
#![allow(dead_code)]

use std::fmt::Write as fmtWrite;
use std::cmp::Ordering::*;
use std::collections::HashSet;
use std::fs::File;


@@ 7,11 8,15 @@ use std::io::{BufRead, BufReader, Write};
use std::iter;
use std::process::{Command, Stdio};

const R_CUTOFF: f32 = 300.0;
const R_CUTOFF: f64 = 300.0;

fn f32_eq(a: f32, b: f32) -> bool {
fn f64_eq(a: f64, b: f64) -> bool {
    let u = 1_000_000.0;
    let a = (a * u) as usize;
    let b = (b * u) as usize;
    a == b
    // limit is ~0.00119
    (a - b).abs() < 10_000.0 * std::f32::EPSILON
    // (a - b).abs() < 10_000.0 * std::f64::EPSILON
}

/// All the statistics we want to measure for performance reasoning.


@@ 28,6 33,8 @@ pub struct Statistics {
    placed_single: usize,
    placed_full: usize,
    pops: usize,

    time: u64,
}

impl Statistics {


@@ 44,6 51,8 @@ impl Statistics {
            placed_single: 0,
            placed_full: 0,
            pops: 0,

            time: 0,
        }
    }



@@ 79,7 88,8 @@ impl Statistics {
        self.placed_full += 1;
    }

    /// Record the number of iterations we used before finishing reducing a column
    /// Record the number of times we looked for another column with the same low, and added it to
    /// the current column. This is at least 1.
    pub fn reductions(&mut self, c: usize) {
        self.num_iters.push(c);
    }


@@ 94,51 104,85 @@ impl Statistics {
        self.add_iters.push(c);
    }

    pub fn time(&mut self) {
        self.time = time::precise_time_ns();

    }

    pub fn eprint_time(&self) {
        let now = time::precise_time_ns();
        let diff_ns = (now - self.time) as f64;
        let diff_ms = diff_ns / 1_000_000.0;
        eprintln!("Time spent: {}ms", diff_ms.round());
    }

    /// Print out stats to stderr.
    pub fn eprint_avg(&self) {
        eprint!("Column additions: ");
        eprint_thousand_split(self.col_adds);
        eprintln!();
        let mut print_pairs = Vec::new();

        print_pairs.push(("Column additions", thousand_split(self.col_adds)));

        let add_size0 =
            self.add_size.iter().map(|(a, _)| a).sum::<usize>() as f32 / self.add_size.len() as f32;
        eprintln!("  Average size of first operand:  {}", add_size0);
            self.add_size.iter().map(|(a, _)| a).sum::<usize>() as f64 / self.add_size.len() as f64;
        print_pairs.push(("Average size of first operand", format!("{}", add_size0)));

        let add_size1 =
            self.add_size.iter().map(|(_, b)| b).sum::<usize>() as f32 / self.add_size.len() as f32;
        eprintln!("  Average size of second operand: {}", add_size1);
            self.add_size.iter().map(|(_, b)| b).sum::<usize>() as f64 / self.add_size.len() as f64;
        print_pairs.push(("Average size of second operand", format!("{}", add_size1)));

        let num_iters = self.num_iters.iter().sum::<usize>() as f32 / self.num_iters.len() as f32;
        eprintln!(
            "Average #iterations before reducing a column: {}",
            num_iters
        );
        let num_iters = self.num_iters.iter().sum::<usize>() as f64 / self.num_iters.len() as f64;
        print_pairs.push(("Average #iterations before reducing a column", format!("{}", num_iters)));

        let xor_cost =
            self.add_size_sum.iter().sum::<usize>() as f32 / self.add_size_sum.len() as f32;
        eprintln!("Average cost of one column addition: {}", xor_cost);
            self.add_size_sum.iter().sum::<usize>() as f64 / self.add_size_sum.len() as f64;
        print_pairs.push(("Average cost of one column addition", format!("{}", xor_cost)));

        let adds = xor_cost * self.col_adds as f32 + self.pops as f32;
        eprint!("Estimate of number of adds: ");
        eprint_thousand_split(adds as usize);
        eprintln!();
        let adds = (xor_cost * self.col_adds as f64 + self.pops as f64) as usize;
        print_pairs.push(("Estimate of number of adds", thousand_split(adds)));

        if self.ex_reductions.len() > 0 {
            let ex_reductions =
                self.ex_reductions.iter().sum::<usize>() as f32 / self.ex_reductions.len() as f32;
            eprintln!("Average iterations to exh. reduce a col: {}", ex_reductions);
                self.ex_reductions.iter().sum::<usize>() as f64 / self.ex_reductions.len() as f64;
            print_pairs.push(("Average iterations to exh. reduce a col", format!("{}", ex_reductions)));
        }

        if self.ex_searches.len() > 0 {
            let ex_searches =
                self.ex_searches.iter().sum::<usize>() as f32 / self.ex_searches.len() as f32;
            eprintln!("Average searches to find `j=low(k)`: {}", ex_searches);
                self.ex_searches.iter().sum::<usize>() as f64 / self.ex_searches.len() as f64;
            print_pairs.push(("Average searches to find `j=low(k)`", format!("{}", ex_searches)));
        }

        eprintln!("Number of zeroed columns:    {}", self.zeroed);
        eprintln!("Number of single columns:    {}", self.placed_single);
        eprintln!("Number of remaining columns: {}", self.placed_full);
        print_pairs.push(("Number of zeroed columns", thousand_split(self.zeroed)));
        print_pairs.push(("Number of single columns", thousand_split(self.placed_single)));
        print_pairs.push(("Number of remaining columns", thousand_split(self.placed_full)));

        let max_str = print_pairs.iter().map(|(s, _)| s.len()).max().unwrap();
        for (left, right) in print_pairs {
            eprintln!("{:>2$}: {}", left, right, max_str);
        }
    }
}

fn thousand_split(mut num: usize) -> String {
    let mut out = String::new();
    let mut nums = vec![];
    while num > 0 {
        nums.push(num % 1_000);
        num /= 1_000;
    }
    for i in (1..nums.len()).rev() {
        if i == nums.len() - 1 {
            write!(out, "{},", nums[i]);
        } else {
            write!(out, "{:03},", nums[i]);
        }
    }
    if nums.len() == 1 {
        write!(out, "{}", nums[0]);
    } else {
        write!(out, "{:03}", nums[0]);
    }
    out
}

fn eprint_thousand_split(mut num: usize) {


@@ 169,7 213,7 @@ pub struct Simplex {
    /// Indices of the faces of this simplex
    faces: Vec<usize>,
    /// The ball radius when this simplex is born.
    r_when_born: f32,
    r_when_born: f64,
}

impl Simplex {


@@ 184,7 228,7 @@ pub struct Persistence {
    /// All simplices in the data set
    simplices: Vec<Simplex>,
    /// Point location data, for rendering.
    points: Vec<[f32; 2]>,
    points: Vec<[f64; 2]>,
}

type Error = Box<std::error::Error>;


@@ 205,13 249,13 @@ pub fn read_input_stdin2() -> Result<Persistence, Error> {
        .take(number_of_vertices)
        .map(|line| {
            let line = line.unwrap();
            let mut buf = [0f32; 2];
            let mut buf = [0f64; 2];
            for (i, string) in line.split(' ').enumerate() {
                buf[i] = string.parse::<f32>().unwrap();
                buf[i] = string.parse::<f64>().unwrap();
            }
            buf
        })
        .collect::<Vec<[f32; 2]>>();
        .collect::<Vec<[f64; 2]>>();

    assert_eq!(lines.next().unwrap().unwrap(), ""); // the empty line



@@ 346,7 390,7 @@ pub fn read_input_stdin2() -> Result<Persistence, Error> {
                let circumradius = (xy * yz * xz)
                    / ((xy + yz + xz) * (xy + yz - xz) * (xy - yz + xz) * (-xy + yz + xz)).sqrt();

                if am * 180.0 / std::f32::consts::PI >= 90.0 {
                if am * 180.0 / std::f64::consts::PI >= 90.0 {
                    // When one angle is > 90.0, the circumcenter of the triangle is outside.  Then
                    // the balls from the edges that's closest to the circumcenter doesn't meet
                    // before they are at the circumcenter. Thus we must ajust the `r_when_born` on


@@ 420,6 464,7 @@ pub fn reduce(p: &Persistence, exhaustive: bool, stats: &mut Statistics) -> Vec<

        let mut iter = 0;
        loop {
            iter += 1;
            let low = current_simplex.last().unwrap();

            if simplex_with_low[low].is_null() {


@@ 498,7 543,6 @@ pub fn reduce(p: &Persistence, exhaustive: bool, stats: &mut Statistics) -> Vec<
                zeroed.insert(j);
                break;
            }
            iter += 1;
        }
        stats.reductions(iter);
        // Exhaustive goes down here! Go through remaining simplex numbers, and find entries with


@@ 663,7 707,7 @@ impl Index<usize> for IndexList {
}

#[derive(Copy, Clone, Debug)]
pub struct Point(pub [f32; 2]);
pub struct Point(pub [f64; 2]);

impl std::ops::Add<Point> for Point {
    type Output = Point;


@@ 679,23 723,23 @@ impl std::ops::Sub<Point> for Point {
    }
}

impl std::ops::Div<f32> for Point {
impl std::ops::Div<f64> for Point {
    type Output = Point;
    fn div(self, f: f32) -> Point {
    fn div(self, f: f64) -> Point {
        Point([self.0[0] / f, self.0[1] / f])
    }
}

impl Point {
    fn len(self) -> f32 {
    fn len(self) -> f64 {
        (self.0[0].powi(2) + self.0[1].powi(2)).sqrt()
    }

    fn dot(self, other: Self) -> f32 {
    fn dot(self, other: Self) -> f64 {
        self.0[0] * other.0[0] + self.0[1] * other.0[1]
    }

    fn angle(self, other: Point) -> f32 {
    fn angle(self, other: Point) -> f64 {
        let n = self.dot(other) / (self.len() * other.len());
        n.acos()
    }


@@ 713,7 757,7 @@ fn main() {
            Less
        } else if a.faces.contains(&b.j) {
            Greater
        } else if f32_eq(a.r_when_born, b.r_when_born) {
        } else if f64_eq(a.r_when_born, b.r_when_born) {
            a.dim().cmp(&b.dim())
        } else {
            a.r_when_born.partial_cmp(&b.r_when_born).unwrap()


@@ 738,24 782,32 @@ fn main() {
    }

    for (j, s) in persistence.simplices.iter().enumerate() {
        if let Some(&max) = s.faces.iter().max() {
            if max >= j {
                panic!();
        if let Some(&face) = s.faces.iter().max() {
            if face >= j {
                eprintln!("face is after simplex! {} >= {}", face, j);
                eprintln!("{:?}", persistence.simplices[face]);
                eprintln!("{:?}", persistence.simplices[j]);
                eprintln!("{:#?}", &persistence.simplices[(j.saturating_sub(3)..((face + 4).min(persistence.simplices.len())))]);
                panic!("The ordering is not right!");
            }
        }
    }

    let mut r_stats = Statistics::new();
    r_stats.time();
    let _pairings = reduce(&persistence, false, &mut r_stats);

    eprintln!("## Statistics for the =Regular= variant ##");
    r_stats.eprint_time();
    r_stats.eprint_avg();
    eprintln!("\n");

    let mut e_stats = Statistics::new();
    e_stats.time();
    let pairings = reduce(&persistence, true, &mut e_stats);

    eprintln!("## Statistics for the =Exhaustive= variant ##");
    e_stats.eprint_time();
    e_stats.eprint_avg();
    eprintln!("\n");



@@ 783,7 835,7 @@ fn main() {
        let birth = persistence.simplices[a].r_when_born;
        let death = persistence.simplices[b].r_when_born;
        // eprintln!("{}", birth);
        if birth > death && !f32_eq(death, birth) {
        if birth > death && !f64_eq(death, birth) {
            panic!(
                "Birth cannot be _after_ death! {} {} diff={} {:?} {:?}",
                death,


@@ 800,46 852,29 @@ fn main() {
}

fn output_svg(persistence: &Persistence) {
    let mut f = std::fs::File::create("lol.svg").unwrap();

    const PADDING: f32 = 40f32;

    let min_x = persistence
        .points
        .iter()
        .map(|a| a[0])
        .min_by(|a, b| a.partial_cmp(b).unwrap())
        .unwrap();
    let max_x = persistence
        .points
        .iter()
        .map(|a| a[0])
        .max_by(|a, b| a.partial_cmp(b).unwrap())
        .unwrap();
    let min_y = persistence
        .points
        .iter()
        .map(|a| a[1])
        .min_by(|a, b| a.partial_cmp(b).unwrap())
        .unwrap();
    let max_y = persistence
        .points
        .iter()
        .map(|a| a[1])
        .max_by(|a, b| a.partial_cmp(b).unwrap())
        .unwrap();
    let mut f = std::io::BufWriter::new(std::fs::File::create("lol.svg").unwrap());

    const PADDING: f64 = 40f64;

    let mut min_x = persistence.points[0][0];
    let mut max_x = persistence.points[0][0];
    let mut min_y = persistence.points[0][1];
    let mut max_y = persistence.points[0][1];
    for &point in &persistence.points {
        min_x = min_x.min(point[0]);
        min_y = min_y.min(point[1]);
        max_x = max_x.max(point[0]);
        max_y = max_y.max(point[1]);
    }
    let xr = max_x - min_x;
    let yr = max_y - min_y;

    f.write_fmt(format_args!(
        r#"<svg xmlns="http://www.w3.org/2000/svg"
                xmlns:xlink="http://www.w3.org/1999/xlink"
                height="{}" width="{}">"#,
        r#"<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
                height="{}" width="{}">\n"#,
        yr + PADDING * 2.0,
        xr + PADDING * 2.0
    ))
    .unwrap();
    f.write(&[b'\n']).unwrap();
    )).unwrap();

    let xo = PADDING - min_x;
    let yo = PADDING - min_y;


@@ 923,22 958,21 @@ fn output_svg(persistence: &Persistence) {
            }
        }
    }
    println!("{:?}", persistence.points);

    for (_i, v) in persistence.points.iter().enumerate() {
        f.write_fmt(format_args!(
            r#"<circle cx="{}" cy="{}" r="2" fill="red"/>"#,
            r#"<circle cx="{}" cy="{}" r="2" fill="red"/>\n"#,
            v[0] + PADDING - min_x,
            v[1] + PADDING - min_y
        ))
        .unwrap();
        f.write_fmt(format_args!(
            r#"<text x="{}" y="{}" >{}</text>"#,
            v[0] + PADDING - min_x,
            v[1] + PADDING - min_y,
            _i + 1
        ))
        .unwrap();
        f.write(&[b'\n']).unwrap();
        //f.write_fmt(format_args!(
        //    r#"<text x="{}" y="{}" >{}</text>\n"#,
        //    v[0] + PADDING - min_x,
        //    v[1] + PADDING - min_y,
        //    _i + 1
        //))
        //.unwrap();
    }
    f.write_all(b"</svg>").unwrap();
}


@@ 997,7 1031,7 @@ fn output_histogram(data: &[usize], label: &str, out_file: &str) {

    let plot_script = format!(
        r#"
stats 'kek.freq' 
stats 'kek.freq' nooutput
set grid
set terminal pdf size 12cm,10cm
set logscale y


@@ 1007,7 1041,7 @@ set xrange [-0.5:STATS_max_x + 0.5]
set xlabel "{}"
set ylabel "Frequency"
set style fill solid 1.0 noborder
set boxwidth 1.1
set boxwidth 1
plot 'kek.freq' using ($1):($2 + 1) with boxes lc rgb"gray40" notitle
"#,
        label


@@ 1021,22 1055,32 @@ fn output_2histogram(data_a: &[usize], data_b: &[usize], label: &str, out_file: 
        panic!("output_histogram: `data` cannot be empty!");
    }

    let a_hist = histogram(data_a);
    let b_hist = histogram(data_b);
    let mut a_hist = histogram(data_a);
    let mut b_hist = histogram(data_b);
    if a_hist.len() < b_hist.len() {
        for _ in 0..(b_hist.len() - a_hist.len()) {
            a_hist.push(0);
        }
    } else if a_hist.len() > b_hist.len() {
        for _ in 0..(a_hist.len() - b_hist.len()) {
            b_hist.push(0);
        }
    }

    let mut file = File::create("kek.freq").unwrap();

    for (a, b) in a_hist.iter().zip(b_hist.iter()) {
    for (a, b) in a_hist.iter().zip(b_hist.iter()).skip_while(|(&a, &b)| a == 0 && b == 0) {
        file.write_fmt(format_args!("{} {}\n", a, b)).unwrap();
    }

    let plot_script = format!(
        r#"
set terminal pdf size 18cm,14cm
stats 'kek.freq' nooutput
set terminal pdf size 18cm,10cm
set logscale y
set xtics out
set ytics out
set xrange [-1:]
set xrange [STATS_min_x - 0.5:]
set xlabel "{}"
set ylabel "Frequency"
set boxwidth 0.5