M README.md => README.md +3 -3
@@ 2,9 2,9 @@
# HalfSpectral.jl
A convenient interface for working with half-spectral covariance functions in
-the space-time domain. This is part of the software companion to *Flexible
-nonstationary spatio-temporal modeling of high-frequency monitoring data* (arxiv
-link coming soon).
+the space-time domain. This is part of the software companion to [Flexible
+nonstationary spatio-temporal modeling of high-frequency monitoring
+data](http://arxiv.org/abs/2007.11418).
See the paper for a discussion of the covariance function itself, and see the
example file `./example/basicmodel.jl` for an example specification of the
A example/paperscripts/README.txt => example/paperscripts/README.txt +12 -0
@@ 0,0 1,12 @@
+
+These files are most of the code used to fit the model described in the paper.
+They will not "just run" on your computer as I have removed several hard paths
+for storing output and reading data files. Nonetheless, I think they are helpful
+in illustrating a more advanced workflow for several data sources and a
+reasonably complex model.
+
+I will NOT be updating these files to reflect potential breaking changes in
+HalfSpectral.jl, so please refer to the commit tagged "paper" to see a version
+of HalfSpectral.jl that was the source code that these scripts actually ran when
+generating results for the paper.
+
A => +24 -0
@@ 0,0 1,24 @@
using HalfSpectral
# UNITS:
# time (ix)
# altitudes (ix)
# data (m/s)
function extract_block(trange, times, gates, dopp, ints, fn)
qj = findall(x->trange[1]<=x<=trange[2], times)
minr = minimum.(eachrow(ints[:,qj]))
snrj = filter(x->in(x, gates), findall(x->x>1.008, minr))
tgrd = timegrid(times[qj])
tgrd isa TimeFFTKernels.TimeGridFailure && return (tgrd, nothing)
pts = reverse.(vec(collect(Iterators.product(snrj.*30.0, tgrd.Xt))))
dat = vec(dopp[snrj,qj])
if fn isa Nothing
mldat = (pts=pts, dat=dat)
else
println("Transforming data...")
mldat = (pts=pts, dat=fn(dat))
end
(tgrd, snrj.*30.0, mldat) # (timegrid object, altitude (m), named tuple)
end
A example/paperscripts/fit_trustregion.jl => example/paperscripts/fit_trustregion.jl +107 -0
@@ 0,0 1,107 @@
+
+using HDF5, HalfSpectral, LinearAlgebra, DelimitedFiles, GPMaxlik, JLD, Random
+
+# Set the seed:
+Random.seed!(123456)
+
+# At this point, do five iterations, then start using an exact gradient:
+GPMaxlik.SETTINGS.GRAD_INFNORM_TOL = 0.5
+
+include("model.jl")
+include("extractor.jl")
+include("writer.jl")
+include("nsscale.jl")
+
+# no build indices: re-building the integrand kernel object isn't necessary for
+# these parameter indices.
+const nob = (1,2,3,4,24,25)
+
+# Create a NonstationaryScale object:
+NS = NonstationaryScale{Float64,false,0,4}((156, 311, 466, 621), (1,2,3,4))
+dNS = [NonstationaryScale{Float64,true,j,4}((156, 311, 466, 621), (1,2,3,4)) for j in 1:4]
+
+# Some simple wrapper functions:
+function nll_grad_fish(pts, data, kfn, dfns, p, saa)
+ nll_call = nll(pts, data, kfn, dfns, p, saa=saa, nll=true, grad=true, fish=true)
+ (nll_call.nll, nll_call.grad, nll_call.fish)
+end
+
+# Convert decimal hours to minutes:
+decimalhours_to_minutes(dhours) = (dhours .- 14.0).*60.0
+
+# An extra function to print the Newton step every five iterations:
+function print_ns(c, fx, xv, gx, hx, dl; kwargs...)
+ if iszero(rem(c, 5))
+ println("NS: $(round.(hx\gx, digits=4))")
+ end
+ return (false, nothing)
+end
+
+function fit_gates_trustregion(day, timewin, gates, its, ini, rt, gt,
+ trfun=nothing, sim=false, dc=1.0e-4)
+ # Load in the data:
+ println("Working with day $day")
+ rdata = read(h5open(__YOUR_PATH_TO_RAW_DATA__)) # Put your path here
+ (times, dopp, ints) = (rdata[x] for x in ("Time", "Doppler", "Intensity"))
+ (tgrid, alts, data) = extract_block(timewin, times, gates, dopp, ints, trfun)
+ if unique(diff(alts)) != [30.0]
+ @warn "You are missing gates in the range you've requested."
+ end
+ # Create the necessary objects:
+ saav = rand([-1.0, 1.0], length(data.pts), 250) # was 125 for first stage of fitting
+ kfun = tftkernel(integrand, tgrid, alts, ini, addfn=nugfn, mulfn=NS, nob=nob)
+ dfns = vcat(
+ [tftkernel(integrand, tgrid, alts, ini, mulfn=dNSj, nob=nob) for dNSj in dNS],
+ [tftkernel(dk, tgrid, alts, ini, mulfn=NS, nob=nob) for dk in dfuns],
+ dnugfn1, dnugfn2
+ )
+ # Do the optimization:
+ obj = p-> nll(data.pts, data.dat, kfun, dfns, p, saa=saav,
+ nll=true, grad=false, fish=false).nll
+ objgradfish = p -> nll_grad_fish(data.pts, data.dat, kfun, dfns, p, saav)
+ maxlik_result = trustregion(obj, objgradfish, ini, vrb=true, maxit=its, gtol=gt,
+ rtol=rt, dcut=dc, iter_funs=(GPMaxlik.convg_info, print_ns))
+ println("Estimation result: $(maxlik_result.status)")
+ println()
+ if sim
+ L = cholesky!(Symmetric([kfun(x, y) for x in data.pts, y in data.pts])).L
+ d = reshape(data.dat, length(alts), length(tgrid.Tv))
+ s = reshape(L*randn(length(data.pts)), length(alts), length(tgrid.Tv))
+ gnuplot_save_matrix!(__YOUR_PATH__*"sim_"*day*"_14.csv", s, alts, # Put your path here
+ decimalhours_to_minutes(tgrid.Tv))
+ gnuplot_save_matrix!(__YOUR_PATH__*"tru_"*day*"_14.csv", d, alts, # Put your path here
+ decimalhours_to_minutes(tgrid.Tv))
+ end
+ return maxlik_result
+end
+
+
+
+
+
+
+const FITTED_DAYS = tuple() # Satisfactorily fitted days
+const tw = (14.02, 14.24)
+
+for (day, gates, mit) in zip(("02", "03", "06", "20", "24", "28"),
+ (7:23, 7:28, 7:23, 7:28, 7:28, 7:28),
+ (30, 30, 30, 30, 30, 30))
+ if in(day, FITTED_DAYS)
+ println("Estimate for day $day was marked acceptable, moving on.")
+ println()
+ continue
+ end
+ init = load(__YOUR_PATH__*"fit_"*day*"_results.jld")["result"].p
+ try
+ result = fit_gates_trustregion(day, tw, gates, mit, init,
+ 1.0e-6, 1.0e-3, nothing, true, 1.0e-7)
+ # Deal with results:
+ GPMaxlik.reset_settings!()
+ JLD.save(__YOUR_PATH__*"fit_"*day*"_results.jld", "result", result)
+ catch
+ println()
+ println("Day $day broke somehow. Check out the logs.")
+ println()
+ end
+end
+
A example/paperscripts/fitted_estimates_untransformed.jl => example/paperscripts/fitted_estimates_untransformed.jl +65 -0
@@ 0,0 1,65 @@
+
+using HDF5, TimeFFTKernels, LinearAlgebra, DelimitedFiles, GPMaxlik, JLD, Random
+
+# Set the seed:
+Random.seed!(123456)
+
+# At this point, do five iterations, then start using an exact gradient:
+GPMaxlik.SETTINGS.GRAD_INFNORM_TOL = 0.5
+
+include("model_untransformed.jl")
+include("extractor.jl")
+include("writer.jl")
+include("nsscale.jl")
+include("untransformer.jl")
+
+# no build indices: re-building the integrand kernel object isn't necessary for
+# these parameter indices.
+const nob = (1,2,3,4,23,24,25)
+
+# Create a NonstationaryScale object:
+NS = NonstationaryScale{Float64,false,0,4}((156, 311, 466, 621), (1,2,3,4))
+dNS = [NonstationaryScale{Float64,true,j,4}((156, 311, 466, 621), (1,2,3,4)) for j in 1:4]
+
+# Some simple wrapper functions:
+function nll_grad_fish(pts, data, kfn, dfns, p, saa)
+ nll_call = nll(pts, data, kfn, dfns, p, saa=saa, nll=true, grad=true, fish=true)
+ (nll_call.nll, nll_call.grad, nll_call.fish)
+end
+
+function untransformed_uncertainty(day, timewin, gates, trfun=nothing)
+ # Load in the data:
+ println("Working with day $day")
+ rdata = read(h5open(__YOUR_PATH_TO_RAW_DATA)) # your path to the raw files
+ (times, dopp, ints) = (rdata[x] for x in ("Time", "Doppler", "Intensity"))
+ (tgrid, alts, data) = extract_block(timewin, times, gates, dopp, ints, trfun)
+ # Load in the MLE and un-transform it:
+ mle_result = load(__YOUR_PATH__*"fit_"*day*"_results.jld")["result"]
+ mle = transf(mle_result.p)
+ # Create the necessary objects:
+ saav = rand([-1.0, 1.0], length(data.pts), 125)
+ kfun = tftkernel(integrand, tgrid, alts, mle, addfn=nugfn, mulfn=NS, nob=nob)
+ dfns = vcat(
+ [tftkernel(integrand, tgrid, alts, mle, mulfn=dNSj, nob=nob) for dNSj in dNS],
+ [tftkernel(dk, tgrid, alts, mle, mulfn=NS, nob=nob) for dk in dfuns],
+ dnugfn1, dnugfn2
+ )
+ # Obtain the fisher information matrix for the untransformed model at the MLE:
+ out = nll_grad_fish(data.pts, data.dat, kfun, dfns, mle, saav)
+ println("Likelihood at MLE for un-transformed data: $(round(out[1], digits=5))")
+ (mle, out)
+end
+
+const tw = (14.02, 14.24)
+const gater = (7:23, 7:28, 7:23, 7:28, 7:28, 7:28)
+for (day, gates) in zip(("02", "03", "06", "20", "24", "28"), gater)
+ # Obtain the un-transformed MLE and uncertainty:
+ untransf_result = untransformed_uncertainty(day, tw, gates, nothing)
+ # Deal with results:
+ GPMaxlik.reset_settings!()
+ JLD.save(__YOUR_PATH__*"fit_"*day*"_results_untransformed.jld",
+ "mle", untransf_result[1],
+ "nllresult", untransf_result[2])
+end
+
+
A example/paperscripts/model.jl => example/paperscripts/model.jl +88 -0
@@ 0,0 1,88 @@
+
+using SpecialFunctions, ForwardDiff, DelimitedFiles
+
+# For the derivatives of the kernel:
+function coord_deriv(f, p, j)
+ ForwardDiff.derivative(z->f(vcat(p[1:(j-1)], z, p[(j+1):end])), p[j])
+end
+
+# The matern correlation for smoothness 1/2 and 3/2:
+mtn_cor_12(x) = exp(-abs(x))
+mtn_cor_32(x) = exp(-abs(x))*(1.0 + abs(x))
+function mtn_cor(nu, x)
+ abs(x)<1.0e-8 && return 1.0
+ out = (sqrt(2.0*nu)*x)^nu
+ out *= besselk(nu, sqrt(2.0*nu)*x)
+ out /= gamma(nu)*2.0^(nu-1.0)
+ out
+end
+
+# The butterworth-like function. Several tweaks for numerical reasons to avoid
+# breaking AD.
+function butter(w,p)
+ iszero(w) && return p[1] # tweak one: a branch for w=0.
+ arg = abs2(w/p[2])^p[3]
+ arg > 1.0e18 && return 0.0 # tweak two: a branch for the arg being too large.
+ p[1]/(1.0 + arg)
+end
+
+function _logistic(x, p_0, p_1, shape, center)
+ alpha = 1.0/(1.0 + exp(-shape*(x-center)))
+ (1-alpha)*p_0 + alpha*p_1
+end
+
+function _logistic_multiple(x, shape, center, a::NTuple, b::NTuple)
+ map(z->_logistic(x, z[1], z[2], shape, center), zip(a,b))
+end
+
+# Marginal spectral density:
+function Sf(w,x,p)
+ (_i, _v) = _logistic_multiple(x, p[16], p[15], (exp(p[5]), p[6]), (exp(p[7]), p[8]))
+ extra_parm = _logistic(x, exp(p[17]), exp(p[18]), p[16], p[15])
+ extra = 1.0+butter(sinpi(w), (extra_parm, exp(p[20]), exp(p[19])))
+ scale_x = x <= p[15] ? 1.0 : butter(x-p[15], (1.0, exp(p[21]), exp(p[22])))
+ scale_x*extra*(_i*sinpi(w)^2 + 1)^(-_v-1/2)
+end
+
+function Cw(w,x,y,p)
+ x == y && return 1.0
+ (gp1x, gp1y) = (_logistic(z, exp(p[9]), exp(p[12]), p[16], p[15]) for z in (x,y))
+ (gp2x, gp2y) = (_logistic(z, exp(p[10]), exp(p[13]), p[16], p[15]) for z in (x,y))
+ (gp3x, gp3y) = (_logistic(z, exp(p[11]), exp(p[14]), p[16], p[15]) for z in (x,y))
+ nux = x <= p[15] ? 0.5 : 0.75
+ nuy = y <= p[15] ? 0.5 : 0.75
+ gamx = butter(sinpi(w), (gp1x, gp2x, gp3x))
+ gamy = butter(sinpi(w), (gp1y, gp2y, gp3y))
+ gamxy = sqrt(0.5*(gamx+gamy))
+ gamx_y = (gamx^0.25)*(gamy^0.25)
+ return (gamx_y/gamxy)*mtn_cor(0.5*(nux+nuy), abs(x-y)/gamxy)
+end
+
+# Phase term:
+phase(w,x,y,p) = exp(2.0*pi*im*p[23]*sinpi(w)*(x-y))
+function phased1(w,x,y,p)
+ 2.0*pi*im*sinpi(w)*(x-y)*phase(w,x,y,p)
+end
+
+# Whole integrand, and manual derivative with respect to phase parameter:
+integrand(w,x,y,p) = sqrt(Sf(w,x,p)*Sf(w,y,p))*Cw(w,x,y,p)*phase(w,x,y,p)
+integrand_dp1(w,x,y,p) = sqrt(Sf(w,x,p)*Sf(w,y,p))*Cw(w,x,y,p)*phased1(w,x,y,p)
+
+# Integrand with no phase for autodiff:
+integrand_np(w,x,y,p) = sqrt(Sf(w,x,p)*Sf(w,y,p))*Cw(w,x,y,p)
+
+# Derivatives of the kernel:
+dfuns =convert(Vector{Function},
+ [(w,x,y,p)->coord_deriv(z->integrand_np(w,x,y,z), p, j)*phase(w,x,y,p)
+ for j in 5:22])
+
+# do the phase functions derivative by hand because ForwardDiff doesn't
+# support complex numbers....
+push!(dfuns, integrand_dp1)
+
+# The nugget function, added as a post-call. This derivative function gets
+# added to the list of kernel derivatives in the space-time domain.
+nugfn(x,y,p) = Float64(x==y)*p[24]^2 + Float64(x[1]==y[1])*p[25]^2
+dnugfn1(x,y,p) = Float64(x==y)*2.0*p[24]
+dnugfn2(x,y,p) = Float64(x[1]==y[1])*2.0*p[25]
+
A example/paperscripts/model_untransformed.jl => example/paperscripts/model_untransformed.jl +88 -0
@@ 0,0 1,88 @@
+
+using SpecialFunctions, ForwardDiff, DelimitedFiles
+
+# For the derivatives of the kernel:
+function coord_deriv(f, p, j)
+ ForwardDiff.derivative(z->f(vcat(p[1:(j-1)], z, p[(j+1):end])), p[j])
+end
+
+# The matern correlation for smoothness 1/2 and 3/2:
+mtn_cor_12(x) = exp(-abs(x))
+mtn_cor_32(x) = exp(-abs(x))*(1.0 + abs(x))
+function mtn_cor(nu, x)
+ abs(x)<1.0e-8 && return 1.0
+ out = (sqrt(2.0*nu)*x)^nu
+ out *= besselk(nu, sqrt(2.0*nu)*x)
+ out /= gamma(nu)*2.0^(nu-1.0)
+ out
+end
+
+# The butterworth-like function. Several tweaks for numerical reasons to avoid
+# breaking AD.
+function butter(w,p)
+ iszero(w) && return p[1] # tweak one: a branch for w=0.
+ arg = abs2(w/p[2])^p[3]
+ arg > 1.0e18 && return 0.0 # tweak two: a branch for the arg being too large.
+ p[1]/(1.0 + arg)
+end
+
+function _logistic(x, p_0, p_1, shape, center)
+ alpha = 1.0/(1.0 + exp(-shape*(x-center)))
+ (1-alpha)*p_0 + alpha*p_1
+end
+
+function _logistic_multiple(x, shape, center, a::NTuple, b::NTuple)
+ map(z->_logistic(x, z[1], z[2], shape, center), zip(a,b))
+end
+
+# Marginal spectral density:
+function Sf(w,x,p)
+ (_i, _v) = _logistic_multiple(x, p[16], p[15], (exp(p[5]), p[6]), (exp(p[7]), p[8]))
+ extra_parm = _logistic(x, p[17], p[18], p[16], p[15])
+ extra = 1.0+butter(sinpi(w), (extra_parm, p[20], p[19]))
+ scale_x = x <= p[15] ? 1.0 : butter(x-p[15], (1.0, p[21], p[22]))
+ scale_x*extra*(_i*sinpi(w)^2 + 1)^(-_v-1/2)
+end
+
+function Cw(w,x,y,p)
+ x == y && return 1.0
+ (gp1x, gp1y) = (_logistic(z, exp(p[9]), exp(p[12]), p[16], p[15]) for z in (x,y))
+ (gp2x, gp2y) = (_logistic(z, p[10], p[13], p[16], p[15]) for z in (x,y))
+ (gp3x, gp3y) = (_logistic(z, p[11], p[14], p[16], p[15]) for z in (x,y))
+ nux = x <= p[15] ? 0.5 : 0.75
+ nuy = y <= p[15] ? 0.5 : 0.75
+ gamx = butter(sinpi(w), (gp1x, gp2x, gp3x))
+ gamy = butter(sinpi(w), (gp1y, gp2y, gp3y))
+ gamxy = sqrt(0.5*(gamx+gamy))
+ gamx_y = (gamx^0.25)*(gamy^0.25)
+ return (gamx_y/gamxy)*mtn_cor(0.5*(nux+nuy), abs(x-y)/gamxy)
+end
+
+# Phase term:
+phase(w,x,y,p) = exp(2.0*pi*im*p[23]*sinpi(w)*(x-y))
+function phased1(w,x,y,p)
+ 2.0*pi*im*sinpi(w)*(x-y)*phase(w,x,y,p)
+end
+
+# Whole integrand, and manual derivative with respect to phase parameter:
+integrand(w,x,y,p) = sqrt(Sf(w,x,p)*Sf(w,y,p))*Cw(w,x,y,p)*phase(w,x,y,p)
+integrand_dp1(w,x,y,p) = sqrt(Sf(w,x,p)*Sf(w,y,p))*Cw(w,x,y,p)*phased1(w,x,y,p)
+
+# Integrand with no phase for autodiff:
+integrand_np(w,x,y,p) = sqrt(Sf(w,x,p)*Sf(w,y,p))*Cw(w,x,y,p)
+
+# Derivatives of the kernel:
+dfuns =convert(Vector{Function},
+ [(w,x,y,p)->coord_deriv(z->integrand_np(w,x,y,z), p, j)*phase(w,x,y,p)
+ for j in 5:22])
+
+# do the phase functions derivative by hand because ForwardDiff doesn't
+# support complex numbers....
+push!(dfuns, integrand_dp1)
+
+# The nugget function, added as a post-call. This derivative function gets
+# added to the list of kernel derivatives in the space-time domain.
+nugfn(x,y,p) = Float64(x==y)*p[24]^2 + Float64(x[1]==y[1])*p[25]^2
+dnugfn1(x,y,p) = Float64(x==y)*2.0*p[24]
+dnugfn2(x,y,p) = Float64(x[1]==y[1])*2.0*p[25]
+
A example/paperscripts/nsscale.jl => example/paperscripts/nsscale.jl +35 -0
@@ 0,0 1,35 @@
+
+using LinearAlgebra
+
+mutable struct NonstationaryScale{T,B,F,N} <: Function
+ locs::NTuple{N,T}
+ ix::NTuple{N,Int64}
+end
+
+function nwts(pt, locs::NTuple{N,T})::NTuple{N, Float64} where{N,T}
+ tmp = map(z->exp(-abs(pt-z)/50), locs)
+ tmp ./ sum(tmp)
+end
+
+# Nonstationary scale function evaluate at space-time location
+function (NS::NonstationaryScale{T,false,0,N})(x, y, p) where{T,N}
+ out = 1.0
+ for _x in (x,y)
+ (time, space) = _x
+ wts = nwts(time, NS.locs)
+ out *= sum(z->p[z[1]]*z[2], zip(NS.ix, wts))
+ end
+ out
+end
+
+# Efficient derivatives---but need the chain rule for two applications!
+function (NS::NonstationaryScale{T,true,J,N})(x, y, p) where{T,J,N}
+ (wx, wy) = map(pt->nwts(pt[1], NS.locs), (x,y))
+ # Evaluations for x and y:
+ (nx, ny) = map(w->sum(z->p[z[1]]*z[2], zip(NS.ix, w)), (wx,wy))
+ # Derivatives for x and y:
+ (dx, dy) = map(w->w[J]*2.0*p[NS.ix[J]], (wx,wy))
+ # Return chain rule output:
+ nx*dy + dx*ny
+end
+
A example/paperscripts/untransformer.jl => example/paperscripts/untransformer.jl +29 -0
@@ 0,0 1,29 @@
+
+function transf(v)
+ [v[1],
+ v[2],
+ v[3],
+ v[4],
+ v[5],
+ v[6],
+ v[7],
+ v[8],
+ v[9],
+ exp(v[10]),
+ exp(v[11]),
+ v[12],
+ exp(v[13]),
+ exp(v[14]),
+ v[15],
+ v[16],
+ exp(v[17]),
+ exp(v[18]),
+ exp(v[19]),
+ exp(v[20]),
+ exp(v[21]),
+ exp(v[22]),
+ v[23],
+ v[24],
+ v[25]]
+end
+
A example/paperscripts/writer.jl => example/paperscripts/writer.jl +31 -0
@@ 0,0 1,31 @@
+using DelimitedFiles
+
+function tablesave!(name, M::Matrix; rlabels=nothing, clabels=nothing)
+ out = string.(M)
+ if !isnothing(rlabels)
+ out = hcat(string.(rlabels), out)
+ end
+ if !isnothing(clabels)
+ is_row_nothing = isnothing(rlabels)
+ rw1 = string.(clabels)
+ if !is_row_nothing
+ pushfirst!(rw1, "")
+ end
+ rw1[end] = rw1[end] * "\\\\"
+ writedlm("header_"*name, reshape(rw1, 1, length(rw1)), '&')
+ end
+ out[:,end] .= out[:,end] .* repeat(["\\\\"], size(out,1))
+ writedlm(name, out, '&')
+end
+
+function gnuplot_save_matrix!(name, M::Matrix{Float64}, row_pts, col_pts)
+ out = zeros(size(M,1)+1, size(M,2)+1)
+ out[1,2:end] .= col_pts
+ out[2:end,1] .= row_pts
+ out[2:end, 2:end] .= M
+ writedlm(name, out, ',')
+end
+
+function gnuplot_save_vector!(name, M, pts)
+ writedlm(name, hcat(pts, M), ',')
+end