From 0565ddcae62e2ec597368f4316f21d1ed22555bc Mon Sep 17 00:00:00 2001 From: Chris Geoga Date: Mon, 21 Dec 2020 20:44:14 -0500 Subject: [PATCH] Add a paperscript file for generating Whittle-type MLEs that can then in turn be used as estimators in a true maximum likelihood computation. --- example/paperscripts/generate_nls_init.jl | 169 ++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 example/paperscripts/generate_nls_init.jl diff --git a/example/paperscripts/generate_nls_init.jl b/example/paperscripts/generate_nls_init.jl new file mode 100644 index 0000000..b5b38c6 --- /dev/null +++ b/example/paperscripts/generate_nls_init.jl @@ -0,0 +1,169 @@ + +using DelimitedFiles, Statistics, HDF5, SMultitaper, Optim, ForwardDiff, HalfSpectral + +# Load in the model and the extractor function. +include("model.jl") +include("extractor.jl") + +# A function that reads in the data and parses it into the format I want. In +# this case, a Dict, indexed by gate _number_, not altitude, since indexing a +# hashmap by a floating point number is bad style (to m +function extract_day_slice(day, timewin, gates) + # Load in the data: + rdata = read(h5open("../../data/raw/Vert_2015_06_"*day*"_14.h5")) + (times, dopp, ints) = (rdata[x] for x in ("Time", "Doppler", "Intensity")) + (tgrid, alts, data) = extract_block(timewin, times, gates, dopp, ints, + nothing, asmatrix=true) + if unique(diff(alts)) != [30.0] + @warn "You are missing gates in the range you've requested." + end + # Reshape to a dict for easier reading in the fitting: + datadict = Dict{Int64, Vector{Float64}}() + for (j, gatej) in enumerate(gates) + datadict[gatej] = data[2][j,:] + end + datadict +end + +function cross_specs(datadict; coherence=true) + out_type = coherence ? Complex{Float64} : Float64 + crosses = Dict{Tuple{Int64, Int64}, Vector{out_type}}() + gates = sort(collect(keys(datadict))) + for gj in gates + for gk in (gj+1):gates[end] + if coherence + cross = real(multitaper(datadict[gj], datadict[gk], nw=15.0, k=25, + coherence=true, shift=true, center=true)) + else + cross = multitaper(datadict[gj], datadict[gk], nw=5.0, k=8, + aweight=true, shift=true, center=true) + end + crosses[(gj, gk)] = cross + end + end + crosses +end + +function marginal_specs(datadict) + specs = Dict{Int64, Vector{Float64}}() + gates = sort(collect(keys(datadict))) + for gj in gates + specs[gj] = multitaper(datadict[gj], nw=5.0, k=8, + aweight=true, shift=true, center=true) + end + specs +end + +# For coherences. A very simple sum of squares. But honestly, these parameters +# are hard enough to estimate even for maximum likelihood that you might be +# better off just plugging in random values that don't make obviously wrong +# curves. +function cross_objective(parms, crosses, gates) + out = 0.0 + len = length(crosses[(gates[1], gates[2])]) + fgd = collect(range(-0.5,0.5,length=len+1)[1:len]) + for gj in gates + for gk in (gj+1):gates[end] + model_cross = [Cw(f, gj*30, gk*30, parms) for f in fgd] + out += sum(abs2, model_cross .- crosses[(gj,gk)]) + end + end + out +end + +function marginal_objective(parms, specs, gates) + out = 0.0 + len = length(specs[gates[1]]) + fgd = collect(range(-0.5,0.5,length=len+1)[1:len]) + for gj in gates + model_spec = [Sf(f,gj*30.0,parms) + parms[24]^2 for f in fgd] + out += sum(log.(model_spec) .+ specs[gj]./model_spec) + end + out +end + + +# So, the INIT here can't be truly naive, like just a bunch of ones or +# something, because then things like \beta will never come through. But it CAN +# be pretty naive, as you can see here. Here is some justification for the +# things that aren't just one: +# +# (i) 13.0: this is a range parameter for the coherence, which is exponentiated. +# They spatial locations are coded as 30,60,90,etc, so if I set that to one the +# coherence functions will be zero for x \neq y. No optimization software could +# overcome that. This value is chosen so that the coherence at the zero +# frequency for adjacent gates is about 0.95. +# +# (ii, iii) -5.0, 0.0: again an exponentiated parameter for the coherence. +# These are chosen to be what they are just to make the initial coherence +# function have an even vaguely reasonable shape based on how the model is +# paramterized. They're set to the same above and below the boundary layer. +# +# (iv) 500: boundary layer height. If this were one, every altitude would be +# completely below the boundary layer and so the gradient of this objective with +# respect to beta would be zero. +# +# (v) 0.0: a phase term. I just don't try to capture this at all here. Zero is a +# more natural naive init than one for a parameter like this. +# +# (vi) 1.0e-3: a pretty small but nontrivial nugget. 1.0 is obviously too large for +# this data and 0 is not a good init. +# +const INIT = vcat(ones(4), # ns scale parameters. Not using those here because of the extra low-frequency multiplier, and because maxlik is very good at picking these parms up. + ones(4), # sdf parameters, will come through. + 13.0, # first coherence parameter (scale-dependent). (i) + -5.0, # second coherence parameter (ii) + 0.0, # third coherence parameter (iii) + 13.0, # first coherence parameter (scale-dependent). (i) + -5.0, # second coherence parameter (ii) + 0.0, # third coherence parameter (iii) + 500.0, # beta height (iv) + 1.0, # tau parameter + ones(4), # extra low frequency power, will come through + ones(2), # scale decay above boundary layer + 0.0, # phase term, won't come through. (v) + ones(2).*1.0e-3 # nuggets, won't come through. (vi) + ) + + +# An incremental approach to the incremental generation of initializations for +# the maximum likelihood estimation: fit marginal spectra first, then fit +# cross-spectra. You need to use real derivatives here or else the generic first +# order methods will drive the irrelevant parameters up to ridiculous values. +# This is not particularly speedy on my box. But it does serve as nice +# reproducible proof that you could start from an impossibly stupid +# initialization and bootstrap yourself to a good MLE for the half-spectral model. +function fit_day(day, timewin, gates, init=INIT) + datadict = extract_day_slice(day, timewin, gates) + out = Dict{String, Vector{Float64}}() + out["init"] = deepcopy(init) + for (objfn, spfn, nam) in ((marginal_objective, marginal_specs, "marginal"), + (cross_objective, cross_specs, "coherence")) + println("Fitting $nam spectra...") + sp = spfn(datadict) + obj = p -> objfn(p, sp, gates) + gr = (g,p) -> ForwardDiff.gradient!(g, obj, p) + hs = (h,p) -> ForwardDiff.hessian!(h, obj, p) + ini = (nam == "marginal") ? out["init"] : out["marginal"] + opts = Optim.Options(iterations=3_000, g_tol=1.0e-5, + show_trace=true, show_every=10) + est = optimize(obj, gr, hs, ini, NewtonTrustRegion(), opts) + display(est) + out[nam] = deepcopy(est.minimizer) + end + out +end + +for (day, gates) in zip(("02", "03", "06", "20", "24", "28"), + (7:23, 7:28, 7:23, 7:28, 7:28, 7:28)) + mle_init = fit_day(day, (14.02, 14.24), gates) + println("Working with day $day...") + outfilename = string("../../data/initialization/init_", day, "_14.csv") + if haskey(mle_init, "coherence") + writedlm(outfilename, mle_init["coherence"], ',') + else + writedlm(outfilename, mle_init["marginal"], ',') + end + println("Finished with day $day.\n\n\n") +end + -- 2.45.2