~cgeoga/HalfSpectral.jl

0565ddcae62e2ec597368f4316f21d1ed22555bc — Chris Geoga 2 years ago abea9db
Add a paperscript file for generating Whittle-type MLEs that can then in
turn be used as estimators in a true maximum likelihood computation.
1 files changed, 169 insertions(+), 0 deletions(-)

A example/paperscripts/generate_nls_init.jl
A example/paperscripts/generate_nls_init.jl => example/paperscripts/generate_nls_init.jl +169 -0
@@ 0,0 1,169 @@

using DelimitedFiles, Statistics, HDF5, SMultitaper, Optim, ForwardDiff, HalfSpectral

# Load in the model and the extractor function.
include("model.jl")
include("extractor.jl")

# A function that reads in the data and parses it into the format I want. In
# this case, a Dict, indexed by gate _number_, not altitude, since indexing a
# hashmap by a floating point number is bad style (to m
function extract_day_slice(day, timewin, gates)
  # Load in the data:
  rdata =  read(h5open("../../data/raw/Vert_2015_06_"*day*"_14.h5")) 
  (times, dopp, ints) = (rdata[x] for x in ("Time", "Doppler", "Intensity"))
  (tgrid, alts, data) = extract_block(timewin, times, gates, dopp, ints, 
                                      nothing, asmatrix=true)
  if unique(diff(alts)) != [30.0]
    @warn "You are missing gates in the range you've requested."
  end
  # Reshape to a dict for easier reading in the fitting:
  datadict = Dict{Int64, Vector{Float64}}()
  for (j, gatej) in enumerate(gates)
    datadict[gatej] = data[2][j,:]
  end
  datadict
end

function cross_specs(datadict; coherence=true)
  out_type = coherence ? Complex{Float64} : Float64
  crosses = Dict{Tuple{Int64, Int64}, Vector{out_type}}()
  gates   = sort(collect(keys(datadict)))
  for gj in gates
    for gk in (gj+1):gates[end]
      if coherence
        cross  = real(multitaper(datadict[gj], datadict[gk], nw=15.0, k=25,
                                 coherence=true, shift=true, center=true))
      else
        cross = multitaper(datadict[gj], datadict[gk], nw=5.0, k=8,
                           aweight=true, shift=true, center=true)
      end
      crosses[(gj, gk)] = cross
    end
  end
  crosses
end

function marginal_specs(datadict)
  specs   = Dict{Int64, Vector{Float64}}()
  gates   = sort(collect(keys(datadict)))
  for gj in gates
    specs[gj] = multitaper(datadict[gj], nw=5.0, k=8,
                           aweight=true, shift=true, center=true)
  end
  specs
end

# For coherences. A very simple sum of squares. But honestly, these parameters
# are hard enough to estimate even for maximum likelihood that you might be
# better off just plugging in random values that don't make obviously wrong
# curves.
function cross_objective(parms, crosses, gates)
  out = 0.0
  len = length(crosses[(gates[1], gates[2])])
  fgd = collect(range(-0.5,0.5,length=len+1)[1:len])
  for gj in gates
    for gk in (gj+1):gates[end]
      model_cross = [Cw(f, gj*30, gk*30, parms) for f in fgd]
      out += sum(abs2, model_cross .- crosses[(gj,gk)])
    end
  end
  out
end

function marginal_objective(parms, specs, gates)
  out = 0.0
  len = length(specs[gates[1]])
  fgd = collect(range(-0.5,0.5,length=len+1)[1:len])
  for gj in gates
    model_spec = [Sf(f,gj*30.0,parms) + parms[24]^2 for f in fgd]
    out += sum(log.(model_spec) .+ specs[gj]./model_spec)
  end
  out
end


# So, the INIT here can't be truly naive, like just a bunch of ones or
# something, because then things like \beta will never come through. But it CAN
# be pretty naive, as you can see here. Here is some justification for the
# things that aren't just one:
#
# (i) 13.0: this is a range parameter for the coherence, which is exponentiated.
# They spatial locations are coded as 30,60,90,etc, so if I set that to one the
# coherence functions will be zero for x \neq y. No optimization software could
# overcome that. This value is chosen so that the coherence at the zero
# frequency for adjacent gates is about 0.95.
#
# (ii, iii) -5.0, 0.0: again an exponentiated parameter for the coherence.
# These are chosen to be what they are just to make the initial coherence
# function have an even vaguely reasonable shape based on how the model is
# paramterized. They're set to the same above and below the boundary layer.
#
# (iv) 500: boundary layer height. If this were one, every altitude would be
# completely below the boundary layer and so the gradient of this objective with
# respect to beta would be zero. 
#
# (v) 0.0: a phase term. I just don't try to capture this at all here. Zero is a
# more natural naive init than one for a parameter like this.
#
# (vi) 1.0e-3: a pretty small but nontrivial nugget. 1.0 is obviously too large for
# this data and 0 is not a good init. 
#
const INIT = vcat(ones(4),    # ns scale parameters. Not using those here because of the extra low-frequency multiplier, and because maxlik is very good at picking these parms up.
                  ones(4),    # sdf parameters, will come through.
                  13.0,       # first coherence parameter (scale-dependent). (i)
                 -5.0,        # second coherence parameter (ii)
                  0.0,        # third coherence parameter (iii)
                  13.0,       # first coherence parameter (scale-dependent). (i)
                 -5.0,        # second coherence parameter (ii)
                  0.0,        # third coherence parameter (iii)
                  500.0,      # beta height (iv)
                  1.0,        # tau parameter 
                  ones(4),    # extra low frequency power, will come through
                  ones(2),    # scale decay above boundary layer
                  0.0,        # phase term, won't come through. (v)
                  ones(2).*1.0e-3 # nuggets, won't come through. (vi)
                 )


# An incremental approach to the incremental generation of initializations for
# the maximum likelihood estimation: fit marginal spectra first, then fit
# cross-spectra. You need to use real derivatives here or else the generic first
# order methods will drive the irrelevant parameters up to ridiculous values.
# This is not particularly speedy on my box. But it does serve as nice
# reproducible proof that you could start from an impossibly stupid
# initialization and bootstrap yourself to a good MLE for the half-spectral model.
function fit_day(day, timewin, gates, init=INIT)
  datadict = extract_day_slice(day, timewin, gates)
  out      = Dict{String, Vector{Float64}}()
  out["init"] = deepcopy(init)
  for (objfn, spfn, nam) in ((marginal_objective, marginal_specs, "marginal"),
                             (cross_objective,    cross_specs,    "coherence"))
    println("Fitting $nam spectra...")
    sp   = spfn(datadict)
    obj  = p -> objfn(p, sp, gates)
    gr   = (g,p) -> ForwardDiff.gradient!(g, obj, p)
    hs   = (h,p) -> ForwardDiff.hessian!(h, obj, p)
    ini  = (nam == "marginal") ? out["init"] : out["marginal"]
    opts = Optim.Options(iterations=3_000, g_tol=1.0e-5, 
                         show_trace=true, show_every=10)
    est  = optimize(obj, gr, hs, ini, NewtonTrustRegion(), opts)
    display(est)
    out[nam] = deepcopy(est.minimizer)
  end
  out
end

for (day, gates) in zip(("02", "03", "06", "20", "24", "28"),
                        (7:23, 7:28, 7:23, 7:28, 7:28, 7:28))
  mle_init = fit_day(day, (14.02, 14.24), gates)
  println("Working with day $day...")
  outfilename = string("../../data/initialization/init_", day, "_14.csv")
  if haskey(mle_init, "coherence")
    writedlm(outfilename, mle_init["coherence"], ',')
  else
    writedlm(outfilename, mle_init["marginal"], ',')
  end
  println("Finished with day $day.\n\n\n")
end