use std::fs::File; use std::io::Write; use std::path::Path; use std::process::{Command, Stdio}; use crate::{Point, R_CUTOFF}; use persistence::{Persistence, Reduction}; /// Format the given `usize` to a `String` with underscores in between each third digit. /// /// ## Example /// /// ``` /// assert_eq!(thousand_split(1234567), "1_234_567"); /// ``` // fn thousand_split(mut num: usize) -> String { // let mut out = String::new(); // let mut nums = vec![]; // while num > 0 { // nums.push(num % 1_000); // num /= 1_000; // } // for i in (1..nums.len()).rev() { // if i == nums.len() - 1 { // write!(out, "{},", nums[i]).unwrap(); // } else { // write!(out, "{:03},", nums[i]).unwrap(); // } // } // if nums.len() == 0 { // write!(out, "0").unwrap(); // } else if nums.len() == 1 { // write!(out, "{}", nums[0]).unwrap(); // } else { // write!(out, "{:03}", nums[0]).unwrap(); // } // out // } /// Write an `svg` file to the file `out_path` showing the simplicial complex. /// pub fn svg(persistence: &Persistence, std: &Reduction, exh: &Reduction, out_path: &Path) { // Whether to draw text on all simplices outputted. const DRAW_TEXT: bool = true; // The padding we use for the .svg, so that we know text will be in the image. const PADDING: f64 = 50f64; let mut _f = std::io::BufWriter::new(std::fs::File::create(out_path).unwrap()); let f = &mut _f; let min_x = persistence .points .iter() .fold(std::f64::MAX, |m, a| m.min(a[0])); let min_y = persistence .points .iter() .fold(std::f64::MAX, |m, a| m.min(a[1])); let max_x = persistence .points .iter() .fold(std::f64::MIN, |m, a| m.max(a[0])); let max_y = persistence .points .iter() .fold(std::f64::MIN, |m, a| m.max(a[1])); let xr = max_x - min_x; let yr = max_y - min_y; write!( f, r#"\n"#, yr + PADDING * 2.0, xr + PADDING * 2.0 ) .unwrap(); let xo = PADDING - min_x; let yo = PADDING - min_y; let off = Point([xo, yo]); let alpha = &persistence.alphas; for (simplex_i, simplex) in persistence.simplices.iter().enumerate() { if simplex.dim() == 2 { let xy_i = simplex.faces[0]; let xy = &persistence.simplices[xy_i]; let yz_i = simplex.faces[1]; let yz = &persistence.simplices[yz_i]; let zx_i = simplex.faces[2]; let zx = &persistence.simplices[zx_i]; // The labels here may be messed up due to the sorting when making the edges. Get it // right. let (x_i, y_i, z_i) = if xy.faces[0] == zx.faces[0] { // xy, xz (xy.faces[0], xy.faces[1], zx.faces[1]) } else if xy.faces[0] == zx.faces[1] { // xy, zx (xy.faces[0], xy.faces[1], zx.faces[0]) } else if xy.faces[1] == zx.faces[0] { // yx, xz (xy.faces[1], xy.faces[0], zx.faces[1]) } else { // yx, zx (xy.faces[1], xy.faces[0], zx.faces[0]) }; let p0 = Point(persistence.points[x_i - 1]); let p1 = Point(persistence.points[y_i - 1]); let p2 = Point(persistence.points[z_i - 1]); let draw_line = |f: &mut std::io::BufWriter, x0, y0, x1, y1| { writeln!( f, r#""#, x0 + xo, y0 + yo, x1 + xo, y1 + yo, ) }; let draw_i = |f: &mut std::io::BufWriter, x, y, i| { writeln!( f, "{}", x + xo, y + yo, i ) }; let draw_if = |f: &mut std::io::BufWriter, x, y, i, ff| { writeln!( f, "{} ({:4.1})", x + xo, y + yo, i, ff ) }; // See whether we can fill in the triangle or not. if alpha[simplex_i] >= R_CUTOFF { // Cannot fill triangle. Draw edges, maybe? if alpha[xy_i] < R_CUTOFF { draw_line(f, p0.0[0], p0.0[1], p1.0[0], p1.0[1]).unwrap(); // with text if DRAW_TEXT { let xy_middle = (p1 - p0) / 2.0 + p0; draw_i(f, xy_middle.0[0], xy_middle.0[1], xy.j).unwrap(); } } if alpha[zx_i] < R_CUTOFF { draw_line(f, p0.0[0], p0.0[1], p2.0[0], p2.0[1]).unwrap(); if DRAW_TEXT { let zx_middle = (p0 - p2) / 2.0 + p2; draw_i(f, zx_middle.0[0], zx_middle.0[1], zx.j).unwrap(); } } if alpha[yz_i] < R_CUTOFF { draw_line(f, p1.0[0], p1.0[1], p2.0[0], p2.0[1]).unwrap(); if DRAW_TEXT { let yz_middle = (p2 - p1) / 2.0 + p1; draw_i(f, yz_middle.0[0], yz_middle.0[1], yz.j).unwrap(); } } } else { writeln!(f, r#""#, p0.0[0] + xo, p0.0[1] + yo, p1.0[0] + xo, p1.0[1] + yo, p2.0[0] + xo, p2.0[1] + yo ).unwrap(); let center = (p0 + p1 + p2) / 3.0 + off; writeln!( f, "{} ({:4.1})", center.0[0], center.0[1], simplex_i, alpha[simplex_i] ) .unwrap(); // Draw all edges unconditionally. if DRAW_TEXT { let xy_middle = (p1 - p0) / 2.0 + p0; draw_if(f, xy_middle.0[0], xy_middle.0[1], xy.j, alpha[xy_i]).unwrap(); let yz_middle = (p2 - p1) / 2.0 + p1; draw_if(f, yz_middle.0[0], yz_middle.0[1], yz.j, alpha[yz_i]).unwrap(); let zx_middle = (p0 - p2) / 2.0 + p2; draw_if(f, zx_middle.0[0], zx_middle.0[1], zx.j, alpha[zx_i]).unwrap(); } } } else if simplex.dim() == 1 { // Edges } else if simplex.dim() == 0 { // points } } // TODO: what is this? let n = persistence.simplices.len(); let mut std_jumps = vec![0; n]; let mut exh_jumps = vec![0; n]; for i in 0..n { if std.simplex_with_low[i].is_some() { let s = std.simplex_with_low[i].as_ref().unwrap(); let e = exh.simplex_with_low[i].as_ref().unwrap(); if s.faces.len() == 2 { std_jumps[i] = s.faces[0]; exh_jumps[i] = e.faces[0]; } } } for (i, v) in persistence.points.iter().enumerate() { write!( f, r#"\n"#, v[0] + PADDING - min_x, v[1] + PADDING - min_y ) .unwrap(); if DRAW_TEXT { write!( f, r#"{}[{}/{}]\n"#, v[0] + PADDING - min_x, v[1] + PADDING - min_y, i + 1, std_jumps[i + 1], exh_jumps[i + 1], ) .unwrap(); } } write!(f, "\n").unwrap(); } /// Use `gnuplot` to make a persistence diagram with the given pairs. /// This is really just a point plot with labeled axes. pub fn persistence(pairs: &[(f64, f64)], out_file: &Path) { let in_file_path = "temp-persistence"; let mut file = File::create(in_file_path).unwrap(); for (a, b) in pairs { write!(file, "{} {}\n", a, b).unwrap(); } let plot_script = format!( r#" set terminal pdf size 12cm,10cm set xlabel "Birth" set ylabel "Death" set key outside center top horizontal plot '{}' using 1:2 pt 7 ps 0.25 title "pairs", x"#, in_file_path ); run_gnuplot(&plot_script, out_file); std::fs::remove_file(in_file_path).unwrap(); } // fn make_histogram(data: &[usize]) -> Vec { // if data.len() == 0 { // panic!("Trying to create histogram of empty data set"); // } // let max = data.iter().max().unwrap(); // let mut freq = iter::repeat(0).take(max + 1).collect::>(); // for &d in data.iter() { // freq[d] += 1; // } // freq // } // // pub fn histogram(data: &[usize], label: &str, out_file: &Path) { // if data.len() == 0 { // panic!("output_histogram: `data` cannot be empty!"); // } // // let mut frequency = make_histogram(data); // // Remove single outliers, and don't give out trailing zeroes to gnuplot. // let mut last_i = 0; // for (i, f) in frequency.iter_mut().enumerate() { // if *f == 1 { // *f = 0; // } // if *f > 1 { // last_i = i; // } // } // // let file_path = Path::new("tmp-histogram"); // let mut file = File::create(&file_path).unwrap(); // for (ind, freq) in frequency.iter().enumerate().take(last_i + 1) { // write!(file, "{} {}\n", ind, freq).unwrap(); // } // // let plot_script = format!( // r#" // stats '{file_path}' nooutput // set grid // set terminal pdf size 18cm,10cm // set logscale y // set xtics out // set ytics out // set xrange [STATS_min_x - 0.5:] // set xlabel "{}" // set ylabel "Frequency" // set style fill solid 1.0 noborder // set boxwidth 0.5 // plot '{file_path}' using ($1):($2) with boxes lc rgb"gray40" notitle // "#, // label, // file_path = file_path.display() // ); // // run_gnuplot(&plot_script, out_file); // std::fs::remove_file(file_path).unwrap(); // } // // pub fn histogram2(data_a: &[usize], data_b: &[usize], label: &str, out_file: &Path) { // if data_a.len() == 0 || data_b.len() == 0 { // panic!("output_histogram: `data` cannot be empty!"); // } // // let mut a_hist = make_histogram(data_a); // let mut b_hist = make_histogram(data_b); // if a_hist.len() < b_hist.len() { // for _ in 0..(b_hist.len() - a_hist.len()) { // a_hist.push(0); // } // } else if a_hist.len() > b_hist.len() { // for _ in 0..(a_hist.len() - b_hist.len()) { // b_hist.push(0); // } // } // // let file_path = Path::new("tmp-2histogram"); // let mut file = File::create(&file_path).unwrap(); // // for (a, b) in a_hist // .iter() // .zip(b_hist.iter()) // .skip_while(|(&a, &b)| a == 0 && b == 0) // { // write!(file, "{} {}\n", a, b).unwrap(); // } // // let plot_script = format!( // r#" // stats '{file_path}' nooutput // set terminal pdf size 18cm,10cm // set logscale y // set xtics out // set ytics out // set xrange [STATS_min_x - 0.5:] // set xlabel "{}" // set ylabel "Frequency" // set boxwidth 0.5 // set style fill solid 1.0 noborder // set key outside center top horizontal // a = 0 // cum_a(x, i) = (a=a + x * i, a) // b = 0 // cum_b(x, i) = (b=b + x * i, b) // set grid // plot '{file_path}' using ($0):(cum_a($1, $0)) with lines lc rgb"gray20" title "Regular",\ // '{file_path}' using ($0):(cum_b($2, $0)) with lines lc rgb"0x88bb88" title "Exhaustive",\ // '{file_path}' using ($0 - 0.25):($1+1) with boxes lc rgb"gray20" title "Regular",\ // '{file_path}' using ($0 + 0.25):($2+1) with boxes lc rgb"0x88bb88" title "Exhaustive" // "#, // label, // file_path = file_path.display() // ); // // run_gnuplot(&plot_script, out_file); // std::fs::remove_file(file_path).unwrap(); // } /// Output a `tex` formatted table of statistics to the `writer`. // pub fn tex_table( // reg: &Statistics, // exh: &Statistics, // writer: &mut W, // ) -> Result<(), std::io::Error> { // fn sum(s: &[usize]) -> f64 { // s.iter().sum::() as f64 // } // // fn avg(s: &[usize]) -> f64 { // sum(s) / s.len() as f64 // } // // fn percent(hundred: f64, ours: f64) -> String { // if hundred == 0.0 { // format!("") // } else { // format!("{:.2}\\%", 100.0 * ours / hundred) // } // } // // let header_align = std::iter::repeat("r l") // .take(MAX_DIM) // .collect::>() // .join(" "); // let header_top = (0..MAX_DIM) // .map(|d| format!("\\multicolumn{{2}}{{c}}{{$d={}$}} ", d)) // .collect::>() // .join(" & "); // write!( // writer, // r#"\begin{{tabular}}{{r r {}}} // Name & Total & {} \\ // \toprule // "#, // header_align, header_top // )?; // // let total = reg.num_of_dimens.iter().sum::(); // let mut line = format!("Simplices & {}", total); // for &n in reg.num_of_dimens.iter() { // line.push_str(&format!( // "& \\hspace{{0.5cm}}{} & {}", // thousand_split(n), // percent(total as f64, n as f64) // )); // } // writeln!(writer, "{}\\\\", line)?; // // let total = reg.skip_d.iter().sum::(); // let mut line = format!("Skipped & {}", total); // for &n in reg.skip_d.iter() { // line.push_str(&format!( // "& {} & {}", // thousand_split(n), // percent(total as f64, n as f64) // )); // } // writeln!(writer, "{}\\\\", line)?; // // let total = reg.zeroed_d.iter().sum::(); // let mut line = format!("Zeroed & {}", total); // for &n in reg.zeroed_d.iter() { // line.push_str(&format!( // "& {} & {}", // thousand_split(n), // percent(total as f64, n as f64) // )); // } // writeln!(writer, "{}\\\\", line)?; // // let total = reg.placed_full_d.iter().sum::(); // let mut line = format!("Stored & {}", total); // for &n in reg.placed_full_d.iter() { // line.push_str(&format!( // "& {} & {}", // thousand_split(n), // percent(total as f64, n as f64) // )); // } // writeln!(writer, "{}\\\\", line)?; // // writeln!(writer, "\\end{{tabular}}\n")?; // writeln!(writer, "\\vspace{{1cm}}")?; // // let mut v: Vec<(String, String, String, String)> = Vec::new(); // // v.push(( // format!("Column additions"), // thousand_split(reg.col_adds), // thousand_split(exh.col_adds), // percent(reg.col_adds as f64, exh.col_adds as f64), // )); // // v.push(( // format!("Estimate \\texttt{{column\\_add}} cost"), // thousand_split(sum(®.add_size_sum) as usize), // thousand_split(sum(&exh.add_size_sum) as usize), // percent(sum(®.add_size_sum), sum(&exh.add_size_sum)), // )); // // v.push(( // format!("\\texttt{{simplex\\_with\\_low}} queries"), // thousand_split(reg.simplex_with_low_queries), // thousand_split(exh.simplex_with_low_queries), // percent( // reg.simplex_with_low_queries as f64, // exh.simplex_with_low_queries as f64, // ), // )); // // v.push(( // format!("Time spent (ns)"), // thousand_split(reg.time as usize), // thousand_split(exh.time as usize), // percent(reg.time as f64, exh.time as f64), // )); // // let as0 = |s: &[(usize, usize)]| s.iter().map(|(a, _)| a).cloned().collect::>(); // v.push(( // format!("Size of first operand"), // format!("{:.4}", avg(&as0(®.add_size))), // format!("{:.4}", avg(&as0(&exh.add_size))), // percent(avg(&as0(®.add_size)), avg(&as0(&exh.add_size))), // )); // // let as1 = |s: &[(usize, usize)]| s.iter().map(|(_, b)| b).cloned().collect::>(); // v.push(( // format!("Size of second operand"), // format!("{:.4}", avg(&as1(®.add_size))), // format!("{:.4}", avg(&as1(&exh.add_size))), // percent(avg(&as1(®.add_size)), avg(&as1(&exh.add_size))), // )); // // v.push(( // format!("Iters for reducing a column"), // format!("{:.4}", avg(®.num_iters)), // format!("{:.4}", avg(&exh.num_iters)), // percent(avg(®.num_iters), avg(&exh.num_iters)), // )); // // v.push(( // format!("Cost of one column addition"), // format!("{:.4}", avg(®.add_size_sum)), // format!("{:.4}", avg(&exh.add_size_sum)), // percent(avg(®.add_size_sum), avg(&exh.add_size_sum)), // )); // // v.push(( // format!("Iters to exh.\\ reduce a col"), // format!(""), // format!("{:.4}", avg(&exh.ex_reductions)), // format!(""), // )); // // v.push(( // format!("Searches to find \\texttt{{j=low(k)}}"), // format!(""), // format!("{:.4}", avg(&exh.ex_searches)), // format!(""), // )); // // // TODO: 4 could be bigger! // let mut reg_counts = [0; 4]; // for &d in ®.col_add_dimens { // reg_counts[(d + 1) as usize] += 1; // } // let mut exh_counts = [0; 4]; // for &d in &exh.col_add_dimens { // exh_counts[(d + 1) as usize] += 1; // } // // for (d1, (&r, &e)) in reg_counts.iter().zip(exh_counts.iter()).enumerate().skip(1) { // v.push(( // format!("Column adds with $d={}$", d1 as isize - 1), // thousand_split(r), // thousand_split(e), // percent(r as f64, e as f64), // )); // } // // write!( // writer, // r#"\begin{{tabular}}{{r r r r}} // Counter & Regular & Exhausitve & Difference \\ // \toprule // "# // )?; // for (l, a, b, p) in v { // write!(writer, " {} & {} & {} & {}\\\\\n", l, a, b, p)?; // } // writeln!(writer, "\\end{{tabular}}")?; // // Ok(()) // } // // Get the three vertices of a triangle. This isn't super trivial, since the triangle has // // three edges, and so we have to eliminate the duplicates. Still pretty straight forward. // fn get_triangle_xyz(simplex: &Simplex, simplices: &[Simplex]) -> (usize, usize, usize) { // let xy_i = simplex.faces[0]; // let xy = &simplices[xy_i]; // let zx_i = simplex.faces[2]; // let zx = &simplices[zx_i]; // // The labels here may be messed up due to the sorting when making the edges. Get it // // right. // if xy.faces[0] == zx.faces[0] { // // xy, xz // (xy.faces[0], xy.faces[1], zx.faces[1]) // } else if xy.faces[0] == zx.faces[1] { // // xy, zx // (xy.faces[0], xy.faces[1], zx.faces[0]) // } else if xy.faces[1] == zx.faces[0] { // // yx, xz // (xy.faces[1], xy.faces[0], zx.faces[1]) // } else { // // yx, zx // (xy.faces[1], xy.faces[0], zx.faces[0]) // } // } /// Write out a dotfile with the directed edges given by `simplex_with_low` pub fn graphviz( p: &Persistence, std: &Reduction, exh: &Reduction, w: &mut W, ) -> Result<(), std::io::Error> { writeln!(w, "digraph {{")?; writeln!(w, " rankdir=LR;")?; // TODO: Rethink what we want here for i in 0..p.simplices.len() { if std.simplex_with_low[i].is_some() { let s = std.simplex_with_low[i].as_ref().unwrap(); let e = exh.simplex_with_low[i].as_ref().unwrap(); assert_eq!(s.j, e.j); if s.faces.len() == 2 { writeln!(w, " {} -> {} [color=\"red\"]", s.faces[1], s.faces[0])?; writeln!(w, " {} -> {} [color=\"green\"]", e.faces[1], e.faces[0])?; } } } for &(from, to) in &exh.additions { if to > 80 { continue; } let d = p.simplices[from].dim(); if d == 1 { writeln!(w, " {} -> {} [color=\"green\"]", to, from)?; } if d == 2 { writeln!(w, " {} -> {} [color=\"green\"]", to, from)?; } } for &(from, to) in &std.additions { if to > 80 { continue; } let d = p.simplices[from].dim(); if d == 1 { writeln!(w, " {} -> {} [color=\"red\"]", to, from)?; } if d == 2 { writeln!(w, " {} -> {} [color=\"red\"]", to, from)?; } } writeln!(w, "}}")?; Ok(()) } // Run `gnuplot` with the given script and out file for output. pub fn run_gnuplot(script: &str, out_file: &Path) { let mut gnuplot = Command::new("gnuplot") .stdin(Stdio::piped()) .stdout(Stdio::piped()) .spawn() .expect("Failed to run `gnuplot`"); { let stdin = gnuplot .stdin .as_mut() .expect("Failed to open stdin for `gnuplot`"); stdin .write_all(script.as_bytes()) .expect("Failed to write to stdin"); } let output = gnuplot .wait_with_output() .expect("`wait_for_output` failed!"); match output.status.code() { Some(0) => {} other => panic!("`gnuplot` process returned error: {:?}", other), } println!("{:?}", out_file); let mut f = File::create(out_file).expect("Failed to create gnuplot output file"); f.write_all(&output.stdout) .expect("Failed to write to gnuplot output file"); } // // pub fn stats(st: &Statistics, writer: &mut W) -> Result<(), std::io::Error> { // let mut print_pairs = Vec::new(); // // print_pairs.push(("Column additions", thousand_split(st.col_adds))); // // let adds = st.add_size_sum.iter().sum::() as usize; // print_pairs.push(("Estimate total 'column_add' cost", thousand_split(adds))); // // print_pairs.push(( // "`simplex_with_low` queries", // thousand_split(st.simplex_with_low_queries), // )); // // print_pairs.push(("Time spent (ns)", thousand_split(st.time as usize))); // // // ~10 cycles per L2 hit seems reasonable? // let instr_cost_estimate = st.simplex_with_low_queries * 10 + adds; // print_pairs.push(( // "Instruction cost estimate", // thousand_split(instr_cost_estimate), // )); // // let add_size0 = // st.add_size.iter().map(|(a, _)| a).sum::() as f64 / st.add_size.len() as f64; // print_pairs.push(("Average size of first operand", format!("{}", add_size0))); // // let add_size1 = // st.add_size.iter().map(|(_, b)| b).sum::() as f64 / st.add_size.len() as f64; // print_pairs.push(("Average size of second operand", format!("{}", add_size1))); // // let num_iters = st.num_iters.iter().sum::() as f64 / st.num_iters.len() as f64; // print_pairs.push(( // "Average #iterations before reducing a column", // format!("{}", num_iters), // )); // // let column_add_cost = // st.add_size_sum.iter().sum::() as f64 / st.add_size_sum.len() as f64; // print_pairs.push(( // "Average cost of one column addition", // format!("{}", column_add_cost), // )); // // if st.ex_reductions.len() > 0 { // let ex_reductions = // st.ex_reductions.iter().sum::() as f64 / st.ex_reductions.len() as f64; // print_pairs.push(( // "Average iterations to exh. reduce a col", // format!("{}", ex_reductions), // )); // } // // if st.ex_searches.len() > 0 { // let ex_searches = st.ex_searches.iter().sum::() as f64 / st.ex_searches.len() as f64; // print_pairs.push(( // "Average searches to find `j=low(k)`", // format!("{}", ex_searches), // )); // } // // print_pairs.push(("Number of zeroed columns", thousand_split(st.zeroed))); // print_pairs.push(("Number of single columns", thousand_split(st.placed_single))); // print_pairs.push(( // "Number of remaining columns", // thousand_split(st.placed_full), // )); // // let mut last_pos = 0; // for (i, &n) in st.num_of_dimens.iter().enumerate() { // if n > 0 { // last_pos = i; // } // } // let strs = (0..last_pos + 1) // .map(|d| format!("Simplices with d={}", d)) // .collect::>(); // for d in 0..last_pos + 1 { // print_pairs.push((&strs[d], thousand_split(st.num_of_dimens[d]))); // } // // let max_str = print_pairs.iter().map(|(s, _)| s.len()).max().unwrap(); // for (left, right) in print_pairs { // write!(writer, "{:>2$}: {}\n", left, right, max_str)?; // } // Ok(()) // }