3c83006649f46cdefb037dd38b26d4ee04102ec0 — axect 3 months ago 1661464
Ver 0.15.1 - More BLAS for Vector
8 files changed, 157 insertions(+), 43 deletions(-)

M Cargo.toml
M RELEASES.md
M example_data/lm_test.png
A example_data/ode_test.png
D src/bin/lu.rs
M src/numerical/ode.rs
M src/structure/dual.rs
M src/structure/vector.rs
M Cargo.toml => Cargo.toml +1 -1
@@ 1,6 1,6 @@
 [package]
 name = "peroxide"
-version = "0.15.0"
+version = "0.15.1"
 authors = ["axect <edeftg@gmail.com>"]
 description = "Rust comprehensive scientific computation library contains linear algebra, numerical analysis, statistics and machine learning tools with farmiliar syntax"
 repository = "https://github.com/Axect/Peroxide"

M RELEASES.md => RELEASES.md +5 -1
@@ 1,3 1,8 @@
+# Release 0.15.1 (2019-08-29)
+
+* More optimization of `vector.rs`
+* Can use `openblas` feature in `ode.rs`
+
 # Release 0.15.0 (2019-08-27)
 
 * **[Important]** More features


@@ 230,7 235,6 @@
         let x_dual = dual(2, 1);
         let x_hyper = hyper_dual(2, 1, 0);
     
-    
         f(x_f64).print();
         f(x_dual).print();
         f(x_hyper).print();

M example_data/lm_test.png => example_data/lm_test.png +0 -0

A example_data/ode_test.png => example_data/ode_test.png +0 -0

D src/bin/lu.rs => src/bin/lu.rs +0 -19
@@ 1,19 0,0 @@
-extern crate peroxide;
-use peroxide::*;
-
-fn main() {
-    #[cfg(feature = "openblas")]
-    {
-        let a = ml_matrix("1 2;3 4");
-        let opt_dgrf = lapack_dgetrf(&a);
-        match opt_dgrf {
-            None => println!("None"),
-            Some(dgrf) => {
-                let l = dgrf.get_L();
-                let u = dgrf.get_U();
-                l.print();
-                u.print();
-            }
-        }
-    }
-}>
\ No newline at end of file

M src/numerical/ode.rs => src/numerical/ode.rs +50 -7
@@ 213,6 213,9 @@ use structure::matrix::{Matrix, Row, FP};
 use structure::vector::FPVector;
 use util::non_macro::{cat, zeros};
 use util::print::Printable;
+#[cfg(feature = "openblas")]
+use ::{blas_daxpy, blas_daxpy_return};
+use ::VecOps;
 
 /// Explicit ODE Methods
 ///


@@ 393,9 396,18 @@ impl ODE for ExplicitODE {
                 // Set Derivative from state
                 (self.func)(&mut self.state);
                 let dt = self.step_size;
-                self.state
-                    .value
-                    .mut_zip_with(|x, y| x + y * dt, &self.state.deriv);
+
+                match () {
+                    #[cfg(feature = "openblas")]
+                    () => {
+                        blas_daxpy(dt, &self.state.deriv, &mut self.state.value);
+                    }
+                    _ => {
+                        self.state
+                            .value
+                            .mut_zip_with(|x, y| x + y * dt, &self.state.deriv);
+                    }
+                }
                 self.state.param += dt;
             }
             RK4 => {


@@ 407,20 419,51 @@ impl ODE for ExplicitODE {
                 (self.func)(&mut self.state);
 
                 let k1 = self.state.deriv.clone();
-                let k1_add = k1.fmap(|x| x * h2);
+                let k1_add = k1.s_mul(h2);
                 self.state.param += h2;
-                self.state.value.mut_zip_with(|x, y| x + y, &k1_add);
+
+                match () {
+                    #[cfg(feature = "openblas")]
+                    () => {
+                        blas_daxpy(1f64, &k1_add, &mut self.state.value);
+                    }
+                    _ => {
+                        self.state.value.mut_zip_with(|x, y| x + y, &k1_add);
+                    }
+                }
+
                 (self.func)(&mut self.state);
 
                 let k2 = self.state.deriv.clone();
                 let k2_add = k2.zip_with(|x, y| h2 * x - y, &k1_add);
-                self.state.value.mut_zip_with(|x, y| x + y, &k2_add);
+
+                match () {
+                    #[cfg(feature = "openblas")]
+                    () => {
+                        blas_daxpy(1f64, &k2_add, &mut self.state.value);
+                    }
+                    _ => {
+                        self.state.value.mut_zip_with(|x, y| x + y, &k2_add);
+                    }
+                }
+
                 (self.func)(&mut self.state);
 
                 let k3 = self.state.deriv.clone();
                 let k3_add = k3.zip_with(|x, y| h * x - y, &k2_add);
                 self.state.param += h2;
-                self.state.value.mut_zip_with(|x, y| x + y, &k3_add);
+
+                match () {
+                    #[cfg(feature = "openblas")]
+                    () => {
+                        blas_daxpy(1f64, &k3_add, &mut self.state.value);
+                    }
+                    _ => {
+                        self.state.value.mut_zip_with(|x, y| x + y, &k3_add);
+                    }
+                }
+
+
                 (self.func)(&mut self.state);
 
                 let k4 = self.state.deriv.clone();

M src/structure/dual.rs => src/structure/dual.rs +16 -4
@@ 609,10 609,6 @@ impl VecOps for Vec<Dual> {
         self.zip_with(|x, y| x * y, other)
     }
 
-    fn s_mul(&self, scala: f64) -> Self {
-        self.fmap(|x| x * scala)
-    }
-
     fn div(&self, other: &Self) -> Self {
         self.zip_with(|x, y| x / y, other)
     }


@@ 633,6 629,22 @@ impl VecOps for Vec<Dual> {
     fn normalize(&self) -> Self {
         unimplemented!()
     }
+
+    fn s_add(&self, scala: f64) -> Self {
+        self.fmap(|x| x + scala)
+    }
+
+    fn s_sub(&self, scala: f64) -> Self {
+        self.fmap(|x| x - scala)
+    }
+
+    fn s_mul(&self, scala: f64) -> Self {
+        self.fmap(|x| x * scala)
+    }
+
+    fn s_div(&self, scala: f64) -> Self {
+        self.fmap(|x| x / scala)
+    }
 }
 
 // =============================================================================

M src/structure/vector.rs => src/structure/vector.rs +85 -11
@@ 262,9 262,9 @@
 //!     }
 //!     ```
 
-#[cfg(feature = "native")]
+#[cfg(feature = "openblas")]
 extern crate blas;
-#[cfg(feature = "native")]
+#[cfg(feature = "openblas")]
 use blas::{daxpy, ddot, dnrm2, dscal, idamax};
 
 use operation::extra_ops::Real;


@@ 546,8 546,11 @@ pub trait VecOps {
     fn add(&self, other: &Self) -> Self;
     fn sub(&self, other: &Self) -> Self;
     fn mul(&self, other: &Self) -> Self;
-    fn s_mul(&self, scala: f64) -> Self;
     fn div(&self, other: &Self) -> Self;
+    fn s_add(&self, scala: f64) -> Self;
+    fn s_sub(&self, scala: f64) -> Self;
+    fn s_mul(&self, scala: f64) -> Self;
+    fn s_div(&self, scala: f64) -> Self;
     fn dot(&self, other: &Self) -> Self::Scalar;
     fn norm(&self) -> Self::Scalar;
     fn normalize(&self) -> Self;


@@ 560,7 563,7 @@ impl VecOps for Vector {
     /// Addition
     fn add(&self, other: &Self) -> Self {
         match () {
-            #[cfg(feature = "native")]
+            #[cfg(feature = "openblas")]
             () => {
                 let n_i32 = self.len() as i32;
                 let mut y = other.clone();


@@ 578,7 581,7 @@ impl VecOps for Vector {
     /// Subtraction
     fn sub(&self, other: &Self) -> Self {
         match () {
-            #[cfg(feature = "native")]
+            #[cfg(feature = "openblas")]
             () => {
                 let n_i32 = self.len() as i32;
                 let mut y = self.clone();


@@ 598,9 601,48 @@ impl VecOps for Vector {
         self.zip_with(|x, y| x * y, other)
     }
 
+    /// Division
+    fn div(&self, other: &Self) -> Self {
+        self.zip_with(|x, y| x / y, other)
+    }
+
+    fn s_add(&self, scala: f64) -> Self {
+        match () {
+            #[cfg(feature = "openblas")]
+            () => {
+                let n_i32 = self.len() as i32;
+                let mut y = self.clone();
+                unsafe {
+                    daxpy(n_i32, 1f64, &vec![scala; self.len()], 1, &mut y, 1);
+                }
+                y
+            }
+            _ => {
+                self.fmap(|x| x + scala)
+            }
+        }
+    }
+
+    fn s_sub(&self, scala: f64) -> Self {
+        match () {
+            #[cfg(feature = "openblas")]
+            () => {
+                let n_i32 = self.len() as i32;
+                let mut y = self.clone();
+                unsafe {
+                    daxpy(n_i32, -1f64, &vec![scala; self.len()], 1, &mut y, 1);
+                }
+                y
+            }
+            _ => {
+                self.fmap(|x| x - scala)
+            }
+        }
+    }
+
     fn s_mul(&self, scala: f64) -> Self {
         match () {
-            #[cfg(feature = "native")]
+            #[cfg(feature = "openblas")]
             () => {
                 let mut x = self.clone();
                 let n_i32 = self.len() as i32;


@@ 615,15 657,27 @@ impl VecOps for Vector {
         }
     }
 
-    /// Division
-    fn div(&self, other: &Self) -> Self {
-        self.zip_with(|x, y| x / y, other)
+    fn s_div(&self, scala: f64) -> Self {
+        match () {
+            #[cfg(feature = "openblas")]
+            () => {
+                let mut x = self.clone();
+                let n_i32 = self.len() as i32;
+                unsafe {
+                    dscal(n_i32, 1f64/scala, &mut x, 1);
+                }
+                x
+            }
+            _ => {
+                self.fmap(|x| x / scala)
+            }
+        }
     }
 
     /// Dot product
     fn dot(&self, other: &Self) -> f64 {
         match () {
-            #[cfg(feature = "native")]
+            #[cfg(feature = "openblas")]
             () => {
                 let n_i32 = self.len() as i32;
                 let res: f64;


@@ 641,7 695,7 @@ impl VecOps for Vector {
     /// Norm
     fn norm(&self) -> f64 {
         match () {
-            #[cfg(feature = "native")]
+            #[cfg(feature = "openblas")]
             () => {
                 let n_i32 = self.len() as i32;
                 let res: f64;


@@ 662,3 716,23 @@ impl VecOps for Vector {
         self.s_mul(1f64 / alpha)
     }
 }
+
+#[cfg(feature = "openblas")]
+pub fn blas_daxpy(a: f64, x: &Vec<f64>, y: &mut Vec<f64>) {
+    assert_eq!(x.len(), y.len());
+    unsafe {
+        let n = x.len() as i32;
+        daxpy(n, a, x, 1, y, 1)
+    }
+}
+
+#[cfg(feature = "openblas")]
+pub fn blas_daxpy_return(a: f64, x: &Vec<f64>, y: &Vec<f64>) -> Vec<f64> {
+    assert_eq!(x.len(), y.len());
+    let mut result = y.clone();
+    let n = x.len() as i32;
+    unsafe {
+        daxpy(n, a, x, 1, &mut result, 1);
+    }
+    result
+}<
\ No newline at end of file