A => .gitignore +1 -0
A => CMakeLists.txt +29 -0
@@ 1,29 @@
+project(particle-mesh-ewald CXX)
+
+cmake_minimum_required(VERSION 2.8)
+
+find_package(Armadillo REQUIRED)
+
+find_package(PkgConfig REQUIRED)
+pkg_check_modules(FFTW REQUIRED fftw3)
+
+find_package(OpenMP)
+
+include_directories(${ARMADILLO_INCLUDE_DIRS} ${FFTW_INCLUDE_DIRS})
+add_definitions(${FFTW_CFLAGS})
+# set(CXX_WARNING_FLAGS "-W -Wall -Wextra -Wconversion -Wshadow -Wpointer-arith -Wcast-qual -Wwrite-strings")
+set(CXX_WARNING_FLAGS "-W -Wall")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Wall ${OpenMP_CXX_FLAGS} ${CXX_WARNING_FLAGS} -std=c++11")
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -O3 -finline-functions -funroll-loops")
+
+add_library(pme pme.cpp pme.h cube.cpp cube.h linear-algebra.cpp linear-algebra.h spline.cpp spline.h)
+
+add_executable(make-box make-box.cpp)
+target_link_libraries(make-box ${ARMADILLO_LIBRARIES})
+
+add_executable(dispersive dispersive.cpp)
+target_link_libraries(dispersive pme ${ARMADILLO_LIBRARIES} ${FFTW_LDFLAGS} ${FFTW_LDFLAGS_OTHER} -lfftw3_omp)
+
+add_executable(run-pme run-pme.cpp)
+target_link_libraries(run-pme pme ${ARMADILLO_LIBRARIES} ${FFTW_LDFLAGS} ${FFTW_LDFLAGS_OTHER} -lfftw3_omp)
A => abort_unless.h +21 -0
@@ 1,21 @@
+#ifndef ABORT_UNLESS_H
+#define ABORT_UNLESS_H
+
+#include <cstdio>
+#include <cstdlib>
+
+/*
+ * Unlike the assert() macro, abort_unless() is not disabled by
+ * defining the preprocessor macro NDEBUG.
+ */
+
+#define abort_unless(expr) do { \
+ if (!(expr)) { \
+ std::fprintf(stderr, "%s:%u (%s): Assertion `%s' failed.\n", \
+ __FILE__, __LINE__, __func__, __STRING(expr)); \
+ std::fflush(stderr); \
+ abort(); \
+ } \
+ } while (0)
+
+#endif
A => cube.cpp +64 -0
@@ 1,64 @@
+#include <cassert>
+
+#include <algorithm>
+
+#include "cube.h"
+
+namespace ewald {
+
+using std::complex;
+
+template <typename T>
+cube<T>::cube(int d0, int d1, int d2)
+ : dim0(d0), dim1(d1), dim2(d2) {
+ memory = new T[dim0 * dim1 * dim2];
+}
+
+template <typename T>
+cube<T>::~cube() {
+ delete [] memory;
+}
+
+template <typename T>
+void cube<T>::zeros() {
+ std::fill_n(memory, dim0 * dim1 * dim2, T(0));
+}
+
+template <typename T>
+T cube<T>::operator()(int k0, int k1, int k2) const {
+ assert(0 <= k0 && k0 < dim0);
+ assert(0 <= k1 && k1 < dim1);
+ assert(0 <= k2 && k2 < dim2);
+
+ return memory[k0 * dim2 * dim1 + k1 * dim2 + k2];
+}
+
+template <typename T>
+T& cube<T>::operator()(int k0, int k1, int k2) {
+ assert(0 <= k0 && k0 < dim0);
+ assert(0 <= k1 && k1 < dim1);
+ assert(0 <= k2 && k2 < dim2);
+
+ return memory[k0 * dim2 * dim1 + k1 * dim2 + k2];
+}
+
+template <typename T>
+cube<T>& cube<T>::operator*=(cube const& rhs) {
+ assert(rhs.dim0 == dim0);
+ assert(rhs.dim1 == dim1);
+ assert(rhs.dim2 == dim2);
+
+ for (int k0 = 0; k0 < dim0; ++k0)
+ for (int k1 = 0; k1 < dim1; ++k1)
+ for (int k2 = 0; k2 < dim2; ++k2)
+ operator()(k0, k1, k2) *= rhs(k0, k1, k2);
+
+ return *this;
+}
+
+template <typename T>
+T* cube<T>::memptr() {
+ return memory;
+}
+
+}
A => cube.h +33 -0
@@ 1,33 @@
+#ifndef CUBE_H
+#define CUBE_H
+
+#include <complex>
+
+namespace ewald {
+
+template <typename T>
+class cube {
+ public:
+ cube(int d0, int d1, int d2);
+ ~cube();
+
+ void zeros();
+
+ T operator()(int k0, int k1, int k2) const;
+ T& operator()(int k0, int k1, int k2);
+
+ cube<T>& operator*=(cube const& rhs);
+
+ T* memptr();
+
+ private:
+ const int dim0, dim1, dim2;
+ T* memory;
+};
+
+typedef cube<double> real_cube;
+typedef cube<std::complex<double> > complex_cube;
+
+}
+
+#endif
A => dispersive.cpp +332 -0
@@ 1,332 @@
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cassert>
+#include <cfenv>
+
+#include <complex>
+#include <iomanip>
+#include <iostream>
+#include <vector>
+
+#include <armadillo>
+
+#ifdef HAVE_OMP
+#include <omp.h>
+#endif
+
+#include "pme.h"
+#include "abort_unless.h"
+#include "energies.h"
+
+using namespace std;
+using namespace arma;
+
+const complex<double> I(0.0, 1.0);
+
+ewald::exclusion_vector exclusions;
+
+static bool excluded(size_t i, size_t j);
+
+int main(int argc, char* argv[]) {
+ if (argc < 8) {
+ cout << "Usage: " << argv[0]
+ << " dimension cell-file positions-file charges-file cut-off tolerance num-shells\n";
+ return EXIT_FAILURE;
+ }
+
+ const long dim0 = atoi(argv[1]);
+ const long dim1 = dim0;
+ const long dim2 = dim0;
+
+ mat::fixed<3, 3> L;
+ L.load(argv[2]);
+ const mat::fixed<3, 3> LinvT = trans(inv(L));
+ L.print();
+
+ mat r;
+ r.load(argv[3]);
+ abort_unless(r.n_rows == 3);
+ const mat x = inv(L) * r;
+
+ vec qq;
+ qq.load(argv[4]);
+ std::vector<double> q(r.n_cols);
+ {
+ for (size_t k = 0; k < q.size(); ++k)
+ q[k] = qq(k);
+ }
+
+ const size_t num_particles = q.size();
+ clog << "Read " << num_particles << " particles into memory.\n";
+
+ std::vector<double> coor_x(num_particles);
+ std::vector<double> coor_y(num_particles);
+ std::vector<double> coor_z(num_particles);
+ {
+ for (size_t k = 0; k < num_particles; ++k) {
+ coor_x[k] = r(0, k);
+ coor_y[k] = r(1, k);
+ coor_z[k] = r(2, k);
+ }
+ }
+
+ const double cut_off = atof(argv[5]);
+ const double cut_off2 = cut_off * cut_off;
+ const double tolerance = atof(argv[6]);
+ clog << "Real space cut-off: " << cut_off << "\n"
+ << "Real space tolerance: " << tolerance << "\n\n";
+
+
+ exclusions = ewald::exclusion_vector(num_particles);
+ { // Populate exclusion vector.
+/*
+ for (size_t i = 0; i < num_particles; ++i)
+ for (size_t j = 0; j < num_particles; ++j)
+ // if (i != j && ((i % 2) != (j % 2))) {
+ if (abs(int(i) - int(j)) <= 2) {
+ exclusions[i].push_back(j);
+ }
+*/
+ /*
+ for (size_t i = 0; i < num_particles; ++i) {
+ std::cout << i << ": ";
+ ewald::index_vector const& is = exclusions[i];
+ for (size_t j = 0; j < is.size(); ++j)
+ std::cout << is[j] << " ";
+ std::cout << std::endl;
+ }
+ */
+ }
+
+ const double max_shell = int(atof(argv[7]));
+
+ mat forces = zeros<mat>(3, num_particles);
+ mat force_cutoff = zeros<mat>(3, num_particles);
+ mat force_direct = zeros<mat>(3, num_particles);
+ mat force_recip = zeros<mat>(3, num_particles);
+ mat force_excluded = zeros<mat>(3, num_particles);
+ mat force_excluded_pme = zeros<mat>(3, num_particles);
+
+ std::vector<double> force_x(num_particles, 0.0);
+ std::vector<double> force_y(num_particles, 0.0);
+ std::vector<double> force_z(num_particles, 0.0);
+ mat force_recip_pme = zeros<mat>(3, num_particles);
+ double LL[3 * 3];
+ for (size_t i = 0; i < 3; ++i)
+ for (size_t j = 0; j < 3; ++j)
+ LL[3*i+j] = L(i, j);
+
+ energies energy_pme, energy_ewald;
+
+ wall_clock timer;
+ timer.tic();
+
+#ifdef HAVE_OMP
+ ewald::pme p(dim0, dim1, dim2, LL, num_particles, &q[0], cut_off, tolerance, omp_get_max_threads());
+#else
+ ewald::pme p(dim0, dim1, dim2, LL, num_particles, &q[0], cut_off, tolerance, 1);
+#endif
+ double seconds = timer.toc();
+
+ clog << "PME initialization took " << seconds << " seconds.\n\n";
+
+ const double t = p.get_threshold();
+ if (std::isnan(t)) {
+ cerr << "Unable to run PME for the prescribed values of cut off and tolerance.\n";
+ return EXIT_FAILURE;
+ }
+
+ timer.tic();
+
+ energy_pme.recip = p.energy(&coor_x[0], &coor_y[0], &coor_z[0],
+ &force_x[0], &force_y[0], &force_z[0]);
+
+ for (size_t k = 0; k < num_particles; ++k) {
+ force_recip_pme(0, k) = force_x[k];
+ force_recip_pme(1, k) = force_y[k];
+ force_recip_pme(2, k) = force_z[k];
+ }
+
+ seconds = timer.toc();
+ clog << "PME took " << seconds << " seconds.\n\n";
+
+ std::vector<double> force_excl_x(num_particles, 0.0);
+ std::vector<double> force_excl_y(num_particles, 0.0);
+ std::vector<double> force_excl_z(num_particles, 0.0);
+
+ energy_pme.masked = p.excluded(exclusions,
+ &coor_x[0],
+ &coor_y[0],
+ &coor_z[0],
+ &force_excl_x[0],
+ &force_excl_y[0],
+ &force_excl_z[0]);
+
+ for (size_t k = 0; k < num_particles; ++k) {
+ force_excluded_pme(0, k) = force_excl_x[k];
+ force_excluded_pme(1, k) = force_excl_y[k];
+ force_excluded_pme(2, k) = force_excl_z[k];
+ }
+
+ energy_ewald.extra = energy_pme.extra = p.energy_extra();
+ energy_ewald.self = energy_pme.self = p.energy_self();
+
+ double Eexhaustive = 0.0;
+ double Ecutoff = 0.0;
+
+ mat force_ewald, force_pme;
+
+ double elapsed = 0.0;
+
+ for (int n = 0; n <= max_shell; n++) {
+ wall_clock timer;
+ timer.tic();
+ for (int nx = -n; nx <= n; nx++) {
+ for (int ny = -n; ny <= n; ny++) {
+ for (int nz = -n; nz <= n; nz++) {
+ if (abs(nx) != n && abs(ny) != n && abs(nz) != n)
+ continue;
+
+ const bool origin = (n == 0);
+
+ const vec::fixed<3> m = { double(nx), double(ny), double(nz) };
+ const vec::fixed<3> Lm = L * m;
+ const vec::fixed<3> LinvTm = LinvT * m;
+
+ // Compute direct space contribution.
+ for (size_t i = 0; i < num_particles; i++) {
+ for (size_t j = 0; j < num_particles; j++) {
+ if (origin && i == j)
+ continue;
+
+ const vec::fixed<3> v = r.col(i) - r.col(j) + Lm;
+ const double r2 = dot(v, v);
+ const double r6 = r2 * r2 * r2;
+ const double r8 = r6 * r2;
+
+ const double qij = q[i] * q[j];
+
+ const bool is_excluded = excluded(i, j);
+
+ const double ener = 0.5 * qij / r6;
+ const vec::fixed<3> f = 6.0 * qij / r8 * v;
+
+ if (origin) {
+ if (!is_excluded) { // Unmasked pair.
+ Eexhaustive += ener;
+ forces.col(i) += f;
+
+ if (r2 < cut_off2) {
+ Ecutoff += ener;
+ force_cutoff.col(i) += f;
+
+ vec::fixed<3> dv;
+ const double pot = 0.5 * qij
+ * p.direct_convergence_term(v.memptr(), dv.memptr());
+ energy_pme.direct += pot;
+ energy_ewald.direct += pot;
+ force_direct.col(i) += -qij * dv;
+ }
+ } else { // Masked pair.
+ // Note that we don't do cut offs here because we
+ // want to cancel the Fourier space contribution,
+ // which does not involve cut offs.
+ vec::fixed<3> dv;
+ energy_ewald.masked += (ener - 0.5 * qij
+ * p.direct_convergence_term(v.memptr(),
+ dv.memptr()));
+ force_excluded.col(i) += f + qij * dv;
+ }
+ } else { // Outer shells.
+ Eexhaustive += ener;
+ forces.col(i) += f;
+
+ if (r2 < cut_off2) {
+ Ecutoff += ener;
+ force_cutoff.col(i) += f;
+
+ vec::fixed<3> dv;
+ const double pot = 0.5 * qij
+ * p.direct_convergence_term(v.memptr(), dv.memptr());
+ energy_pme.direct += pot;
+ energy_ewald.direct += pot;
+ force_direct.col(i) += -qij * dv;
+ }
+ }
+ }
+ }
+
+ // Compute contribution from Fourier space.
+ if (!origin) {
+ complex<double> rho(0.0, 0.0);
+
+ for (size_t i = 0; i < num_particles; i++)
+ rho += q[i] * exp(2.0 * M_PI * I * dot(m, x.col(i)));
+
+ const double h2 = dot(LinvTm, LinvTm);
+ energy_ewald.recip += p.recip_convergence_term(h2, rho);
+
+ // force_recip.col(0) = -4.0 * M_PI * q[0] * q[1] * p.psi(h2) * sin(2.0 * M_PI * dot(LinvTm, r.col(0) - r.col(1))) * (-m);
+ // force_recip.col(1) = -4.0 * M_PI * q[0] * q[1] * p.psi(h2) * sin(2.0 * M_PI * dot(LinvTm, r.col(0) - r.col(1))) * m;
+ }
+ }
+ }
+ }
+
+ elapsed += timer.toc();
+
+ force_ewald = force_direct + force_recip;
+ force_pme = force_direct + force_recip_pme - force_excluded_pme;
+
+ {
+ const double pme_total = energy_pme.total();
+ const double ewald_total = energy_ewald.total();
+
+ const double rel_error_ewald_vs_pme = fabsl(ewald_total - pme_total) / fabsl(ewald_total);
+ const double rel_error_ewald_vs_cutoff = fabsl(ewald_total - Ecutoff) / fabsl(ewald_total);
+
+ const double rel_error_exhaustive_vs_cutoff = fabsl(Eexhaustive - Ecutoff) / fabsl(Eexhaustive);
+ const double rel_error_exhaustive_vs_ewald = fabsl(Eexhaustive - ewald_total) / fabsl(Eexhaustive);
+ const double rel_error_exhaustive_vs_pme = fabsl(Eexhaustive - pme_total) / fabsl(Eexhaustive);
+
+ cout << "==============================================\n"
+ << fixed << setprecision(4)
+ << "Shell " << n << " (" << elapsed << " seconds)" << "\n"
+ << "==============================================\n"
+ << scientific << setprecision(9)
+ << "\n"
+ << "Eexhaustive = " << Eexhaustive << "\n"
+ << "Ecutoff = " << Ecutoff << "\n"
+ << "Ewald = " << energy_ewald << "\n"
+ << "PME = " << energy_pme << "\n"
+ << "\n"
+ << "Abs. error in excluded forces = " << norm(force_excluded - force_excluded_pme, "inf") << "\n"
+ << "\n"
+ << "Abs. error in forces (Exhaustive vs. cutoff) = " << norm(forces - force_cutoff, "inf") << "\n"
+ << "Abs. error in forces (Exhaustive vs. PME) = " << norm(forces - force_pme, "inf") << "\n"
+ << "\n"
+ << "Rel. error in total energies (Ewald vs. cutoff) = " << rel_error_ewald_vs_cutoff << "\n"
+ << "Rel. error in total energies (Ewald vs. PME) = " << rel_error_ewald_vs_pme << "\n"
+ << "\n"
+ << "Rel. error in total energies (Exhaustive vs. cutoff) = " << rel_error_exhaustive_vs_cutoff << "\n"
+ << "Rel. error in total energies (Exhaustive vs. Ewald) = " << rel_error_exhaustive_vs_ewald << "\n"
+ << "Rel. error in total energies (Exhaustive vs. PME) = " << rel_error_exhaustive_vs_pme << "\n"
+ << endl;
+ }
+ }
+
+ return 0;
+}
+
+bool excluded(size_t i, size_t j) {
+ // assert(i < num_particles);
+ // assert(j < num_particles);
+
+ ewald::index_vector const& is = exclusions[i];
+ for (size_t k = 0; k < is.size(); ++k)
+ if (j == is[k])
+ return true;
+
+ return false;
+}
A => energies.h +32 -0
@@ 1,32 @@
+#ifndef ENERGIES_H
+#define ENERGIES_H
+
+#include <ostream>
+
+struct energies {
+ double direct;
+ double recip;
+ double extra;
+ double self;
+ double masked;
+
+ energies()
+ : direct(0.0), recip(0.0), extra(0.0),
+ self(0.0), masked(0.0) {}
+
+ double total() const {
+ return direct + recip + extra - self - masked;
+ }
+
+ friend std::ostream& operator<<(std::ostream& stream, energies& ener) {
+ return stream << ener.total()
+ << " = direct + recip + extra - self - masked = "
+ << ener.direct << " + "
+ << ener.recip << " + "
+ << ener.extra << " - "
+ << ener.self << " - "
+ << ener.masked;
+ }
+};
+
+#endif
A => linear-algebra.cpp +68 -0
@@ 1,68 @@
+#include <cstdlib>
+#include <cstring>
+
+#include <sys/types.h>
+
+#include "linear-algebra.h"
+
+namespace ewald {
+
+void matrix_vector_product(double const* A, double* X) {
+ double Y[] = { 0.0, 0.0, 0.0 };
+
+ for (size_t i = 0; i < 3; ++i)
+ for (size_t j = 0; j < 3; ++j)
+ Y[i] += A[3*i+j] * X[j];
+
+ memcpy(X, Y, 3 * sizeof(double));
+}
+
+void matrix_vector_product_transpose(double const* A, double* X) {
+ double Y[] = { 0.0, 0.0, 0.0 };
+
+ for (size_t i = 0; i < 3; ++i)
+ for (size_t j = 0; j < 3; ++j)
+ Y[i] += A[3*j+i] * X[j];
+
+ memcpy(X, Y, 3 * sizeof(double));
+}
+
+void matrix_matrix_product(double const* A, double const* B, double* C) {
+ memset(C, 0, 3 * 3 * sizeof(double));
+
+ for (size_t i = 0; i < 3; ++i)
+ for (size_t j = 0; j < 3; ++j)
+ for (size_t k = 0; k < 3; ++k)
+ C[3*i+j] += A[3*i+k] * B[3*k+j];
+}
+
+double determinant(double const* A) {
+ return A[3*0+0] * (A[3*1+1] * A[3*2+2] - A[3*1+2] * A[3*2+1])
+ - A[3*0+1] * (A[3*1+0] * A[3*2+2] - A[3*1+2] * A[3*2+0])
+ + A[3*0+2] * (A[3*1+0] * A[3*2+1] - A[3*1+1] * A[3*2+0]);
+}
+
+void matrix_inverse(double const* A, double* Ainv) {
+ const double invdet = 1.0 / determinant(A);
+ memset(Ainv, 0, 3 * 3 * sizeof(double));
+ Ainv[3*0+0] = ( A[3*1+1]*A[3*2+2] - A[3*1+2]*A[3*2+1]) * invdet;
+ Ainv[3*0+1] = (-A[3*0+1]*A[3*2+2] + A[3*0+2]*A[3*2+1]) * invdet;
+ Ainv[3*0+2] = ( A[3*0+1]*A[3*1+2] - A[3*0+2]*A[3*1+1]) * invdet;
+ Ainv[3*1+0] = (-A[3*1+0]*A[3*2+2] + A[3*1+2]*A[3*2+0]) * invdet;
+ Ainv[3*1+1] = ( A[3*0+0]*A[3*2+2] - A[3*0+2]*A[3*2+0]) * invdet;
+ Ainv[3*1+2] = (-A[3*0+0]*A[3*1+2] + A[3*0+2]*A[3*1+0]) * invdet;
+ Ainv[3*2+0] = ( A[3*1+0]*A[3*2+1] - A[3*1+1]*A[3*2+0]) * invdet;
+ Ainv[3*2+1] = (-A[3*0+0]*A[3*2+1] + A[3*0+1]*A[3*2+0]) * invdet;
+ Ainv[3*2+2] = ( A[3*0+0]*A[3*1+1] - A[3*0+1]*A[3*1+0]) * invdet;
+}
+
+double dot_product(double const* u, double const* v) {
+ double result = 0.0;
+
+ for (size_t k = 0; k < 3; ++k)
+ result += u[k] * v[k];
+
+ return result;
+}
+
+}
A => linear-algebra.h +20 -0
@@ 1,20 @@
+#ifndef LINEAR_ALGEBRA_H
+#define LINEAR_ALGEBRA_H
+
+namespace ewald {
+
+void matrix_vector_product(double const* A, double* X);
+
+void matrix_vector_product_transpose(double const* A, double* X);
+
+void matrix_matrix_product(double const* A, double const* B, double* C);
+
+double determinant(double const* A);
+
+void matrix_inverse(double const* A, double* Ainv);
+
+double dot_product(double const* u, double const* v);
+
+}
+
+#endif
A => make-box.cpp +37 -0
@@ 1,37 @@
+#include <cstdio>
+#include <cstdlib>
+
+#include <iostream>
+
+#include <armadillo>
+
+using namespace std;
+using namespace arma;
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ cerr << "Usage: " << argv[0] << " num-particles rand-seed\n";
+ return EXIT_FAILURE;
+ }
+
+ const size_t num_particles = atoi(argv[1]);
+
+ const ulong rand_seed = strtoul(argv[2], nullptr, 10);
+ clog << "Using random seed: " << rand_seed << "\n";
+ srand(rand_seed);
+
+ const double sigma = pow(2.0, 1.0 / 6.0);
+ mat::fixed<3, 3> L = 40.0 * sigma * eye(3, 3);
+ L.save("cell.mat", arma::csv_ascii);
+
+ mat x = randu<mat>(3, num_particles);
+ mat r = L * x;
+ r.save("positions.mat", arma::csv_ascii);
+
+ vec q = 2e1 * randu<vec>(num_particles);
+ // vec q = ones<vec>(num_particles);
+ for (size_t n = 0; n < num_particles; ++n) if (n % 3 != 0) q[n] = 0.0;
+ q.save("charges.mat", arma::csv_ascii);
+
+ return EXIT_SUCCESS;
+}
A => pme.cpp +436 -0
@@ 1,436 @@
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+#include <limits>
+#include <numeric>
+#include <functional>
+#include <iostream>
+
+#include <fftw3.h>
+
+#include "cube.cpp"
+#include "pme.h"
+#include "spline.h"
+
+#include "linear-algebra.h"
+
+namespace ewald {
+
+const spline4 spline;
+const int order = spline.order;
+
+const double epsilon = std::numeric_limits<double>::epsilon();
+
+pme::pme(long d0, long d1, long d2,
+ double const unit_cell[],
+ size_t N_,
+ double const* q_,
+ double cut_off_,
+ double tolerance,
+ int num_threads)
+ : N(N_),
+ q(q_),
+ cut_off(cut_off_),
+ threshold(compute_threshold(cut_off, tolerance)),
+ threshold2(threshold * threshold),
+ dim0(d0), dim1(d1), dim2(d2),
+ Q(d0, d1, d2),
+ Qhat(d0, d1, d2),
+ BC(d0, d1, d2),
+ s(3, N, order),
+ ds(3, N, order)
+{
+ assert(dim0 > 0 && dim1 > 0 && dim2 > 0);
+
+ std::memcpy(L, unit_cell, 3 * 3 * sizeof(double));
+ volume = determinant(L);
+ assert(volume > 1e2 * epsilon);
+ matrix_inverse(L, Linv);
+
+ std::memset(K, 0, 3 * 3 * sizeof(double));
+ K[3*0+0] = dim0;
+ K[3*1+1] = dim1;
+ K[3*2+2] = dim2;
+
+ matrix_matrix_product(K, Linv, H);
+
+ initialize_table();
+
+ fftw_init_threads();
+ fftw_plan_with_nthreads(num_threads);
+
+ {
+ // TODO: Benchmark r2c.
+ fftw_complex* in = reinterpret_cast<fftw_complex*>(Q.memptr());
+ fftw_complex* out = reinterpret_cast<fftw_complex*>(Qhat.memptr());
+ forward_plan = fftw_plan_dft_3d(dim0, dim1, dim2,
+ in, out,
+ FFTW_FORWARD, FFTW_MEASURE);
+ }
+
+ {
+ fftw_complex* in = reinterpret_cast<fftw_complex*>(Qhat.memptr());
+ fftw_complex* out = reinterpret_cast<fftw_complex*>(Qhat.memptr());
+ backward_plan = fftw_plan_dft_3d(dim0, dim1, dim2,
+ in, out,
+ FFTW_BACKWARD, FFTW_MEASURE);
+ }
+}
+
+pme::~pme() {
+ fftw_destroy_plan(forward_plan);
+ fftw_destroy_plan(backward_plan);
+
+ fftw_cleanup_threads();
+}
+
+void pme::initialize_table() {
+ long k0, k1, k2;
+ for (k0 = 0; k0 < dim0; ++k0) {
+ const double h0 = k0 < dim0 / 2 ? k0 : k0 - dim0;
+
+ for (k1 = 0; k1 < dim1; ++k1) {
+ const double h1 = k1 < dim1 / 2 ? k1 : k1 - dim1;
+
+ for (k2 = 0; k2 < dim2; ++k2) {
+
+ if (k0 == 0 && k1 == 0 && k2 == 0)
+ continue;
+
+ const double h2 = k2 < dim2 / 2 ? k2 : k2 - dim2;
+
+ double LinvTm[] = { h0, h1, h2 };
+ matrix_vector_product_transpose(Linv, LinvTm);
+
+ const double hh = dot_product(LinvTm, LinvTm);
+
+ const double nrm = norm(spline.factor(k0, dim0)
+ * spline.factor(k1, dim1)
+ * spline.factor(k2, dim2));
+
+ BC(k0, k1, k2) = psi(hh) * nrm;
+ }
+ }
+ }
+}
+
+static void compute_spline(double u,
+ double* __restrict__ s,
+ double* __restrict__ ds,
+ double& floor_u) {
+ floor_u = floor(u);
+ double x = u - floor_u + double(order) - 1.0;
+
+ // Compute splines and their derivatives using Horner's method.
+ const double a[] = {
+ 32.0 / 3.0, -8.0, 2.0, -1.0 / 6.0,
+ -22.0 / 3.0, 10.0, -4.0, 1.0 / 2.0,
+ 2.0 / 3.0, -2.0, 2.0, -1.0 / 2.0,
+ 0.0, 0.0, 0.0, 1.0 / 6.0
+ };
+
+ for (size_t k = 0; k < 4; ++k, --x) {
+ s[k] = a[4*k] + (a[4*k+1] + (a[4*k+2] + a[4*k+3] * x) * x) * x;
+ ds[k] = a[4*k+1] + (2.0 * a[4*k+2] + 3.0 * a[4*k+3] * x) * x;
+ }
+}
+
+void pme::interpolate(double const* __restrict__ x,
+ double const* __restrict__ y,
+ double const* __restrict__ z) {
+ for (size_t n = 0; n < N; ++n) {
+ double u[] = { x[n], y[n], z[n] };
+ matrix_vector_product(H, u);
+
+ const double q_n = q[n];
+
+ double floor_u[3];
+ for (size_t d = 0; d < 3; ++d)
+ compute_spline(u[d], &s(d, n, 0), &ds(d, n, 0), floor_u[d]);
+
+ for (long c0 = 0; c0 < order; ++c0) {
+ const long l0 = floor_u[0] + c0 - order + 1;
+ const long k0 = l0 < 0 ? l0 + dim0 : l0;
+
+ const double q_n_s0 = q_n * s(0, n, c0);
+
+ for (long c1 = 0; c1 < order; ++c1) {
+ const long l1 = floor_u[1] + c1 - order + 1;
+ const long k1 = l1 < 0 ? l1 + dim1 : l1;
+
+ const double q_n_s0_s1 = q_n_s0 * s(1, n, c1);
+
+ for (long c2 = 0; c2 < order; ++c2) {
+ const long l2 = floor_u[2] + c2 - order + 1;
+ const long k2 = l2 < 0 ? l2 + dim2 : l2;
+
+ Q(k0, k1, k2) += q_n_s0_s1 * s(2, n, c2);
+ // printf("cube (%s): %d %d %d %d %g\n", __func__, n, k0, k1, k2, std::real(Qhat(k0, k1, k2)));
+ }
+ }
+ }
+ }
+}
+
+double pme::energy(double const* __restrict__ x,
+ double const* __restrict__ y,
+ double const* __restrict__ z,
+ double* __restrict__ force_x,
+ double* __restrict__ force_y,
+ double* __restrict__ force_z) {
+ Q.zeros();
+ Qhat.zeros();
+ s.zeros();
+ ds.zeros();
+
+ interpolate(x, y, z);
+ /*
+ {
+ std::complex<double> const* Qptr = Q.memptr();
+ std::complex<double> total { 0.0, 0.0 };
+ for (long r = 0; r < dim0 * dim1 * dim2; ++r)
+ total += Qptr[r];
+ std::printf("%s: Interpolated a total \"charge\" of %g + %g i\n", __func__, real(total), imag(total));
+ }
+ */
+
+
+ fftw_execute(forward_plan);
+
+ Qhat *= BC;
+
+ fftw_execute(backward_plan);
+
+ double Erecip = 0.0;
+ for (long k0 = 0; k0 < dim0; ++k0)
+ for (long k1 = 0; k1 < dim1; ++k1)
+ for (long k2 = 0; k2 < dim2; ++k2)
+ Erecip += real(Q(k0, k1, k2)) * real(Qhat(k0, k1, k2));
+
+ double Erecip1 = 0.0;
+ for (size_t n = 0; n < N; ++n) {
+ double u[] = { x[n], y[n], z[n] };
+ matrix_vector_product(H, u);
+
+ for (long c0 = 0; c0 < order; ++c0) {
+ const long l0 = floor(u[0]) + c0 - order + 1;
+ const long k0 = l0 < 0 ? l0 + dim0 : l0;
+
+ for (long c1 = 0; c1 < order; ++c1) {
+ const long l1 = floor(u[1]) + c1 - order + 1;
+ const long k1 = l1 < 0 ? l1 + dim1 : l1;
+
+ const double ds0_s1 = ds(0, n, c0) * s(1, n, c1);
+ const double s0_ds1 = s(0, n, c0) * ds(1, n, c1);
+ const double s0_s1 = s(0, n, c0) * s(1, n, c1);
+
+ for (long c2 = 0; c2 < order; ++c2) {
+ const long l2 = floor(u[2]) + c2 - order + 1;
+ const long k2 = l2 < 0 ? l2 + dim2 : l2;
+
+ const double Q_k = std::real(Qhat(k0, k1, k2));
+
+ Erecip1 += Q_k * std::real(Q(k0, k1, k2));
+
+ double w[] = {
+ ds0_s1 * s(2, n, c2),
+ s0_ds1 * s(2, n, c2),
+ s0_s1 * ds(2, n, c2)
+ };
+ matrix_vector_product_transpose(H, w);
+
+ // if (n == 1)
+ // printf("cube (%s): %3d\t%3d\t%3d\t%3d\t%3.6f\t%3.6f\t%3.6f\t%3.6f\t%3.9f\n",
+ // __func__, n, k0, k1, k2,
+ // std::real(Q(k0, k1, k2)), std::imag(Q(k0, k1, k2)),
+ // std::real(Qhat(k0, k1, k2)), std::imag(Qhat(k0, k1, k2)),
+ // Q_k * std::real(Q(k0, k1, k2)));
+
+ const double factor = -2.0 * Q_k * q[n];
+ force_x[n] += factor * w[0];
+ force_y[n] += factor * w[1];
+ force_z[n] += factor * w[2];
+ }
+ }
+ }
+ }
+
+ printf("%s: Erecip = %3.16f vs. %3.16f\n", __func__, Erecip, Erecip1);
+ return double(Erecip1);
+}
+
+double pme::energy_extra() const {
+ const double sum_q = std::accumulate(q, q + N, 0.0, std::plus<double>());
+
+ return 1.0 / 6.0 * pow(M_PI * threshold, 3.0) / volume * sum_q * sum_q;
+}
+
+double pme::energy_self() const {
+ const double q2 = std::inner_product(q, q + N, q, 0.0);
+
+ return 1.0 / 12.0 * pow(M_PI, 3.0) * pow(threshold, 6.0) * q2;
+}
+
+double pme::psi(double h2) const {
+ const double b2 = M_PI * h2 / threshold2;
+ const double b = sqrt(b2);
+ const double b3 = b2 * b;
+
+ const double h = sqrt(h2);
+ const double h3 = h2 * h;
+
+ return pow(M_PI, 9.0 / 2.0) / (3.0 * volume) * h3
+ * (sqrt(M_PI) * erfc(b) + (1.0 / (2.0 * b3) - 1.0 / b) * exp(-b2));
+}
+
+double pme::recip_convergence_term(double h2,
+ std::complex<double> const& rho) const {
+ const double norm_rho2 = norm(rho);
+ return norm_rho2 * psi(h2);
+}
+
+static inline double direct_term(double r2, double threshold2, double& fac) {
+ const double r6 = r2 * r2 * r2;
+ const double r8 = r6 * r2;
+ const double a2 = M_PI * threshold2 * r2;
+ const double a4 = a2 * a2;
+ const double a6 = a4 * a2;
+ const double exp_m_a2 = exp(-a2);
+
+ fac = -(a6 + 3.0 * a4 + 6.0 * a2 + 6.0) * exp_m_a2 / r8;
+
+ return (1.0 + a2 + 0.5 * a4) * exp_m_a2 / r6;
+}
+
+double pme::direct_convergence_term(double const* v, double* dv) const {
+ const double r2 = dot_product(v, v);
+ double fac;
+
+ const double energy = direct_term(r2, threshold2, fac);
+
+ for (size_t k = 0; k < 3; ++k)
+ dv[k] = fac * v[k];
+
+ return energy;
+}
+
+static double diff_direct_term(double threshold, double cut_off) {
+ // Computes the derivative of direct_term() with respect to the
+ // variable threshold. Note that the input is threshold, not
+ // threshold squared (threshold2).
+
+ const double threshold2 = threshold * threshold;
+ const double threshold4 = threshold2 * threshold2;
+ const double threshold5 = threshold4 * threshold;
+ const double cut_off2 = cut_off * cut_off;
+
+ return -threshold5 * exp(-threshold2 * cut_off2);
+}
+
+/** Obtains a suitable value of the threshold parameter. The threshold
+ * parameter (beta) is obtained by solving the non-linear equation
+ * direct_term(beta) = tol for a given tolerance tol.
+ *
+ * \param cut_off Cut off distance.
+ *
+ * \param tol Maximum value of the real space contribution for the
+ * distance given by cut_off.
+ *
+ * \return The solution beta of the non-linear equation.
+ */
+double pme::compute_threshold(double cut_off, double tol) const {
+ // XXX Maybe throw exception rather than return an integer status flag.
+ const double r2 = cut_off * cut_off;
+ double aux;
+
+ // Our initial guess comes from imposing that the value of exp(-a2)
+ // in direct_term should be greater than or equal to machine
+ // epsilon.
+ double beta_init = sqrt(-log(epsilon) / M_PI) / cut_off;
+ double beta = beta_init;
+ double beta2 = beta * beta;
+ double val = direct_term(r2, beta2, aux);
+
+ std::clog << "Starting non-linear solver with beta = " << beta << std::endl;
+
+ size_t iter;
+ const size_t max_iters = size_t(1e6);
+ for (iter = 0; iter < max_iters; ++iter) {
+ beta -= (direct_term(r2, beta2, aux) - tol) / diff_direct_term(beta, cut_off);
+ beta2 = beta * beta;
+ if (isinf(beta) || isnan(beta)) {
+ beta_init /= 2.0;
+ beta = beta_init;
+
+ if (beta_init == 0.0) {
+ std::clog << "Non-linear solver failed to find a suitable value of beta."
+ << std::endl;
+ // Red alert. XXX This should fail more gracefully.
+ beta = NAN;
+ break;
+ }
+ }
+
+ val = direct_term(r2, beta2, aux);
+ if (fabs(val - tol) < epsilon)
+ break;
+ }
+
+ std::clog << "Solver stopped after " << iter << " iterations.\n"
+ << "Using beta = " << beta << " with value = " << val << "\n";
+
+ if (iter == max_iters)
+ std::clog << "Warning: Non-linear solver did not converge."
+ << std::endl;
+
+ return beta;
+}
+
+double pme::excluded(exclusion_vector const& exclusions,
+ double const* __restrict__ x,
+ double const* __restrict__ y,
+ double const* __restrict__ z,
+ double* __restrict__ force_x,
+ double* __restrict__ force_y,
+ double* __restrict__ force_z) const {
+ double excluded_energy = 0.0;
+
+ for (size_t i = 0; i < N; ++i) {
+ const double q_i = q[i];
+
+ index_vector const& excl = exclusions[i];
+ for (size_t r = 0; r < excl.size(); ++r) {
+ const size_t j = excl[r];
+ if (j <= i)
+ continue;
+
+ const double v[] = { x[i] - x[j], y[i] - y[j], z[i] - z[j] };
+ const double r2 = dot_product(v, v);
+ // Note that we must NOT enforce a cut off here. We are
+ // compensating a masked contribution to the reciprocal part,
+ // and that contribution does not involve cut offs.
+ const double r6 = r2 * r2 * r2;
+ const double invr6 = 1.0 / (r2 * r2 * r2);
+ const double invr8 = 1.0 / (r6 * r2);
+
+ double term;
+ double f[3];
+ const double qi_qj = q_i * q[j];
+ excluded_energy += qi_qj * (invr6 - direct_term(r2, threshold2, term));
+ const double factor = qi_qj * (6.0 * invr8 + term);
+ for (size_t k = 0; k < 3; ++k)
+ f[k] = factor * v[k];
+ force_x[i] += f[0];
+ force_y[i] += f[1];
+ force_z[i] += f[2];
+ force_x[j] -= f[0];
+ force_y[j] -= f[1];
+ force_z[j] -= f[2];
+ }
+ }
+
+ return excluded_energy;
+}
+
+}
A => pme.h +87 -0
@@ 1,87 @@
+#ifndef PME_H
+#define PME_H
+
+#include <vector>
+#include <complex>
+
+#include <fftw3.h>
+
+#include "cube.h"
+
+namespace ewald {
+
+typedef std::vector<size_t> index_vector;
+typedef std::vector<index_vector> exclusion_vector;
+
+class pme {
+ public:
+ pme(long dimension0, long dimension1, long dimension2,
+ double const* unit_cell,
+ size_t num_particles,
+ double const* q,
+ double cut_off, double tolerance,
+ int num_threads);
+
+ ~pme();
+
+ double get_threshold() const { return threshold; }
+
+ double energy(double const* x, double const* y, double const* z,
+ double* force_x, double* force_y, double* force_z);
+
+ double energy_extra() const;
+
+ double energy_self() const;
+
+ double direct_convergence_term(double const* v,
+ double* dv) const;
+
+ // XXX Make the following two methods private at some point.
+ double recip_convergence_term(double h2,
+ std::complex<double> const& rho) const;
+
+ double psi(double h2) const;
+
+ double excluded(exclusion_vector const& exclusions,
+ double const* x, double const* y, double const* z,
+ double* force_x, double* force_y, double* force_z) const;
+
+ private:
+ double compute_threshold(double cut_off, double tol) const;
+
+ void initialize_table();
+
+ void interpolate(double const* x, double const* y, double const* z);
+
+ private:
+ const size_t N; // Number of particles.
+
+ double L[3 * 3];
+ double Linv[3 * 3];
+ double volume;
+
+ double const* q; // B params. in OPLS LJ model.
+
+ const double cut_off;
+
+ const double threshold; // Real-space threshold.
+ const double threshold2;
+
+ double K[3 * 3];
+ double H[3 * 3]; // Coordinate transformation.
+
+ const long dim0, dim1, dim2;
+
+ complex_cube Q;
+ complex_cube Qhat;
+ complex_cube BC;
+
+ real_cube s;
+ real_cube ds;
+
+ fftw_plan forward_plan, backward_plan;
+};
+
+}
+
+#endif
A => run-pme.cpp +182 -0
@@ 1,182 @@
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cassert>
+#include <cfenv>
+
+#include <complex>
+#include <iomanip>
+#include <iostream>
+
+#include <armadillo>
+
+#include <omp.h>
+
+#include "pme.h"
+#include "abort_unless.h"
+
+using namespace std;
+using namespace arma;
+
+const complex<double> I(0.0, 1.0);
+
+const double sigma = pow(2.0, 1.0 / 6.0);
+
+int main(int argc, char* argv[]) {
+ if (argc < 7) {
+ cout << "Usage: " << argv[0]
+ << " dimension cell-file positions-file charges-file cut-off tolerance\n";
+ return EXIT_FAILURE;
+ }
+
+ const long dim0 = atoi(argv[1]);
+ const long dim1 = dim0;
+ const long dim2 = dim0;
+
+ mat::fixed<3, 3> L;
+ L.load(argv[2]);
+ const mat::fixed<3, 3> LinvT = trans(inv(L));
+ // const double V = det(L);
+ L.print("L = ");
+
+ mat r;
+ r.load(argv[3]);
+ abort_unless(r.n_rows == 3);
+ // r.print("r = ");
+ const mat x = inv(L) * r;
+
+ vec qq;
+ qq.load(argv[4]);
+ std::vector<double> q(r.n_cols);
+ {
+ for (size_t k = 0; k < q.size(); ++k)
+ q[k] = qq(k);
+ }
+
+ const size_t num_particles = q.size();
+ clog << "Read " << num_particles << " particles into memory.\n";
+
+ std::vector<double> coor_x(num_particles);
+ std::vector<double> coor_y(num_particles);
+ std::vector<double> coor_z(num_particles);
+ {
+ for (size_t k = 0; k < num_particles; ++k) {
+ coor_x[k] = r(0, k);
+ coor_y[k] = r(1, k);
+ coor_z[k] = r(2, k);
+ }
+ }
+
+ const double cut_off = atof(argv[5]);
+ const double tolerance = atof(argv[6]);
+ clog << "Real space cut-off: " << cut_off << "\n"
+ << "Real space tolerance: " << tolerance << "\n\n";
+
+ mat forces = zeros<mat>(3, num_particles);
+ mat forces_cutoff = zeros<mat>(3, num_particles);
+ mat forces_direct = zeros<mat>(3, num_particles);
+ mat forces_recip = zeros<mat>(3, num_particles);
+
+ long double ErecipPME = 0.0L;
+ std::vector<double> forces_x(num_particles);
+ std::vector<double> forces_y(num_particles);
+ std::vector<double> forces_z(num_particles);
+ mat forcesPME = zeros<mat>(3, num_particles);
+ wall_clock timer;
+ timer.tic();
+
+ double LL[3 * 3];
+ for (size_t i = 0; i < 3; ++i)
+ for (size_t j = 0; j < 3; ++j)
+ LL[3*i+j] = L(i, j);
+
+ ewald::pme p(dim0, dim1, dim2, LL,
+ num_particles, &q[0],
+ cut_off, tolerance,
+ omp_get_max_threads());
+
+ double seconds = timer.toc();
+ clog << "PME initialization took " << seconds << " seconds.\n\n";
+
+ timer.tic();
+
+ ErecipPME = p.energy(&coor_x[0], &coor_y[0], &coor_z[0],
+ &forces_x[0], &forces_y[0], &forces_z[0]);
+
+ for (size_t k = 0; k < num_particles; ++k) {
+ forcesPME(0, k) = forces_x[k];
+ forcesPME(1, k) = forces_y[k];
+ forcesPME(2, k) = forces_z[k];
+ }
+
+ seconds = timer.toc();
+ clog << "PME took " << seconds << " seconds.\n\n";
+
+ long double Ebrute = 0.0L, Edirect = 0.0L;
+ long double Ecutoff = 0.0L;
+ const long double Eextra = p.energy_extra();
+ const long double Eself = p.energy_self();
+
+ mat forces_ewald, forces_pme;
+
+ double elapsed = 0.0;
+ for (int n = 0; n < 1; n++) {
+ wall_clock timer;
+ timer.tic();
+ for (int nx = -n; nx <= n; nx++) {
+ for (int ny = -n; ny <= n; ny++) {
+ for (int nz = -n; nz <= n; nz++) {
+ if (abs(nx) != n && abs(ny) != n && abs(nz) != n)
+ continue;
+
+ const bool origin = (nx == 0 && ny == 0 && nz == 0);
+
+ const vec::fixed<3> m = { double(nx), double(ny), double(nz) };
+ const vec::fixed<3> Lm = L * m;
+ const vec::fixed<3> LinvTm = LinvT * m;
+
+ for (size_t i = 0; i < num_particles; i++) {
+ for (size_t j = 0; j < num_particles; j++) {
+ if (origin && i == j)
+ continue;
+
+ const vec::fixed<3> v = r.col(i) - r.col(j) + Lm;
+ const double r2 = dot(v, v);
+ const double r6 = r2 * r2 * r2;
+ const double r8 = r6 * r2;
+
+ assert(r2 >= 1e-16);
+
+ Ebrute += 0.5 * q[i] * q[j] / r6;
+ forces.col(i) -= -6.0 * q[i] * q[j] / r8 * v;
+
+ if (abs(nx) < 2 && abs(ny) < 2 && abs(nz) < 2) {
+ forces_cutoff.col(i) -= -6.0 * q[i] * q[j] / r8 * v;
+ Ecutoff = Ebrute;
+ }
+
+ vec::fixed<3> dv;
+ Edirect += 0.5 * q[i] * q[j] * p.direct_convergence_term(v.memptr(), dv.memptr());
+ forces_direct.col(i) -= q[i] * q[j] * dv;
+ }
+ }
+ }
+ }
+ }
+
+ elapsed += timer.toc();
+
+ const long double EtotalPME = Edirect + ErecipPME + Eextra - Eself;
+
+ forces_pme = forces_direct + forcesPME;
+
+ cout << "Real space computation over one shell took " << elapsed << " seconds." << "\n"
+ << fixed << setprecision(16)
+ << "Ecutoff = " << Ecutoff << "\n"
+ << "Etotal (PME) = " << EtotalPME << "\n"
+ << "Edirect + Erecip + Eextra - Eself = "
+ << Edirect << " + " << ErecipPME << " + " << Eextra << " - " << Eself << "\n";
+ }
+
+ return 0;
+}
A => spline.cpp +48 -0
@@ 1,48 @@
+#include <cmath>
+
+#include <complex>
+
+#include "spline.h"
+
+using std::floor;
+using std::complex;
+
+const complex<double> I(0.0, 1.0);
+
+spline4::spline4() {}
+
+double spline4::operator()(double x) const {
+ if (0.0 < x && x < 1.0)
+ return 1.0 / 6.0 * x * x * x;
+ else if (1.0 <= x && x < 2.0)
+ return 2.0 / 3.0 + (-2.0 + (2.0 - 1.0 / 2.0 * x) * x) * x;
+ else if (2.0 <= x && x < 3.0)
+ return -22.0 / 3.0 + (10.0 + (-4.0 + 1.0 / 2.0 * x) * x) * x;
+ else if (3.0 <= x && x < 4.0)
+ return 32.0 / 3.0 + (-8.0 + (2.0 - 1.0 / 6.0 * x) * x) * x;
+ else
+ return 0.0;
+}
+
+double spline4::diff(double x) const {
+ if (0.0 < x && x < 1.0)
+ return 1.0 / 2.0 * x * x;
+ else if (1.0 <= x && x < 2.0)
+ return -2.0 + (4.0 - 3.0 / 2.0 * x) * x;
+ else if (2.0 <= x && x < 3.0)
+ return 10.0 + (-8.0 + 3.0 / 2.0 * x) * x;
+ else if (3.0 <= x && x < 4.0)
+ return -8.0 + (4.0 - 1.0 / 2.0 * x) * x;
+ else
+ return 0.0;
+}
+
+complex<double> spline4::factor(double m, double K) const {
+ const double m_over_K = m / K;
+
+ complex<double> denom = 0.0;
+ for (double k = 0.0; k <= order - 2.0; k++)
+ denom += operator()(k + 1.0) * exp(2.0 * M_PI * I * m_over_K * k);
+
+ return exp(2.0 * M_PI * I * (order - 1.0) * m_over_K) / denom;
+}
A => spline.h +18 -0
@@ 1,18 @@
+#ifndef SPLINE_H
+#define SPLINE_H
+
+#include <complex>
+
+struct spline4 {
+ spline4();
+
+ double operator()(double x) const;
+
+ double diff(double x) const;
+
+ std::complex<double> factor(double m, double K) const;
+
+ static const unsigned int order = 4;
+};
+
+#endif