~enricoschumann/neighbours

c8c2846bd7628cef77b18632572b6bb345fbd0d2 — Enrico Schumann 1 year, 3 months ago 6fb8efd
Update vignette and docs
4 files changed, 182 insertions(+), 96 deletions(-)

M DESCRIPTION
M inst/tinytest/test_neighbours.R
M man/neighbourfun.Rd
M vignettes/neighbours.Rnw
M DESCRIPTION => DESCRIPTION +3 -2
@@ 2,7 2,7 @@ Package: neighbours
Type: Package
Title: Neighbourhood Functions for Local-Search Algorithms
Version: 0.1-0
Date: 2022-07-29
Date: 2022-08-11
Maintainer: Enrico Schumann <es@enricoschumann.net>
Authors@R: person(given = "Enrico", family = "Schumann",
                  role  = c("aut", "cre"),


@@ 23,6 23,7 @@ Description: Neighbourhood functions are key components of
  Optimization in Finance" by M. Gilli, D. Maringer and
  E. Schumann (2019, ISBN:978-0128150658).
License: GPL-3
URL: http://enricoschumann.net/R/packages/neighbours/,
URL: http://enricoschumann.net/R/packages/neighbours/ ,
     https://sr.ht/~enricoschumann/neighbours/ ,
     https://github.com/enricoschumann/neighbours
Suggests: NMOF, tinytest

M inst/tinytest/test_neighbours.R => inst/tinytest/test_neighbours.R +11 -0
@@ 269,3 269,14 @@ for (i in 1:steps) {
    expect_true(all(xn <= x.max))
    x <- xn
}




## ------ permute: with list
N <- neighbourfun(type = "permute", stepsize = 2)
x <- as.list(letters[1:5])
xx <- N(x)
expect_equal(mode(xx), "list")
expect_equal(sum(unlist(x) == unlist(xx)), 3)
expect_equal(sum(unlist(x) == sort(unlist(xx))), 5)

M man/neighbourfun.Rd => man/neighbourfun.Rd +56 -32
@@ 22,49 22,43 @@ neighborfun (min = 0, max = 1, kmin = NULL, kmax = NULL,
             A = NULL, ...)
}
\arguments{
  \item{min}{a numeric vector. A scalar is recycled to \code{length}.
  \item{min}{a numeric vector. A scalar is recycled to \code{length},
    if specified.
  }
  \item{max}{a numeric vector. A scalar is recycled to \code{length}.
  \item{max}{a numeric vector. A scalar is recycled to \code{length},
    if specified.
  }
  \item{kmin}{integer
  \item{kmin}{\code{\link{NULL}} or \code{integer}:
    the minimum number of \code{\link{TRUE}} values in logical vectors
  }
  \item{kmax}{a numeric vector
  \item{kmax}{\code{\link{NULL}} or \code{integer}:
    the maximum number of \code{\link{TRUE}} values in logical vectors
  }
  \item{stepsize}{numeric. For numeric neighbourhoods, the (average)
    stepsize. For logical neighbourhoods, the number of elements that
             are changed.
  \item{stepsize}{numeric. For numeric neighbourhoods,
    the (average) stepsize. For logical neighbourhoods,
    the number of elements that are changed.
  }
  \item{sum}{
    logical or numeric. If specified and of length 1, only zero-sum
    changes will be applied to a solution (i.e. the sum over all
  \item{sum}{logical or numeric.
    If specified and of length 1, only zero-sum
    changes will be applied to a numeric vector (i.e. the sum over all
    elements in a solution remains unchanged).

  }
  \item{random}{
    logical. Should the stepsize be random or fixed?
  \item{random}{logical. Should the stepsize be random or fixed?
  }
  \item{active}{

    a vector: either the positions of elements that may
    be changed, or a logical vector. The default is a
  \item{active}{a vector: either the positions of elements
    that may be changed, or a logical vector. The default is a
    length-one logical vector, which means that all
    elements may be changed.

  }
  \item{update}{

    either \code{logical} (the default \code{FALSE}) or
  \item{update}{either \code{logical} (the default is \code{FALSE}) or
    character, specifying the type of
    updating. Currently supported is \code{"Ax"}, in
    which case the matrix \code{A} be specified. See
    which case the matrix \code{A} must be specified. See
    examples.

  }
  \item{A}{
    a numeric matrix
  }
  \item{type}{
    string: either \code{"numeric"}, \code{"logical"} or \code{"permute"}
  \item{A}{a numeric matrix}
  \item{type}{string: either \code{"numeric"},
    \code{"logical"} or \code{"permute"}
  }
  \item{length}{
    integer: the length of a vector


@@ 79,6 73,18 @@ neighborfun (min = 0, max = 1, kmin = NULL, kmax = NULL,

  \code{neighborfun} is an alias for \code{neighbourfun}.


  Three types of solution vectors are supported:
  \describe{

    \item{\code{numeric}}{a neighbour is created by adding or
      subtracting typically small numbers to random elements of a solution}
    \item{\code{logical}}{\code{\link{TRUE}},
      \code{\link{FALSE}} values are switched}
    \item{\code{permute}}{elements of \code{x} are exchanged. Works with
      atomic and generic vectors (aka lists).}
  }

}
\value{



@@ 126,7 132,7 @@ for (i in 1:5) {



## UPDATING
## UPDATING a numeric neighbourhood
##   the vector is 'x' is used in the product 'Ax'
A <- array(rnorm(100*25), dim = c(100, 25))
N <- neighbourfun(type = "numeric",


@@ 142,9 148,27 @@ all.equal(A \%*\% x, attr(x, "Ax"))



## ## a useful way to store/specify parameters
## settings <- list(...)
## do.call(neighbourfun, settings)
## a PERMUTATION neighbourhood
x <- 1:5

N <- neighbourfun(type = "permute")
N(x)
## [1] 1 2 5 4 3
##         ^   ^

N <- neighbourfun(type = "permute", stepsize = 5)
N(x)

## 'x' is not restricted to integers
x <- letters[1:5]
N(x)


## a useful way to STORE/SPECIFY PARAMETERS, e.g. in config files
settings <- list(type = "numeric",
                 min = 0.0,
                 max = 0.2)
do.call(neighbourfun, settings)
}

\keyword{ optimize }

M vignettes/neighbours.Rnw => vignettes/neighbours.Rnw +112 -62
@@ 50,6 50,7 @@ a seed.
<<package-seed>>=
library("neighbours")
set.seed(347343)
set.seed(34734)
@

\noindent In the examples that follow, we will use a


@@ 66,7 67,7 @@ LSopt <- if (requireNamespace("NMOF")) {
             function(OF, algo = list(), ...) {
                 xc  <- algo$x0
                 xcF <- OF(xc, ...)
                 for (s in seq_len(algo$nS)) {
                 for (s in seq_len(algo$nI)) {
                     xn <- algo$neighbour(xc, ...)
                     xnF <- OF(xn, ...)
                     if (xnF <= xcF) {


@@ 151,7 152,7 @@ sol.ls <- LSopt(column_cor,
@

Let us evaluate the final solution.
<<>>=
<<check-sol>>=
column_cor(sol.ls$xbest, X, y)
@



@@ 207,8 208,8 @@ compare_vectors(x)

We restrict the changes that can be made to the
solution: the first three elements must not be touched;
only the remaining elements may change. (They are
\texttt{active}.)
only the remaining elements may change, i.e. are
\texttt{active}.

<<restrict>>=
active <- c(rep(FALSE, 3),


@@ 232,7 233,6 @@ do.call(compare_vectors, xs)

\section*{Another example: minimising portfolio risk}


Suppose we are given a matrix \texttt{R} and aim to
find a vector $x$ such that the variance of the
elements in the product~$Rx$ is minimised.  This is


@@ 254,26 254,29 @@ could write an objective function as follows:
<<variance1>>=
variance <- function(x, S, ...)
    x %*% S %*% x
@ 
@
(In the code, \texttt{S} stands for the
variance--covariance matrix $\Sigma$.)


An alternative way to
write the objective function is the following.
<<>>=
<<variance2>>=
variance2 <- function(x, R, ...)
    var(R %*% x)
@ 
The disadvantage of this second version is efficiency
@
The disadvantage of the second version is efficiency:
$R$ might have many rows, and then computing the product~$Rx$
in every iteration would be more expensive than using $S$,
which essentially is the crossproduct of~$R$.  But as we shall see
below, this inefficiency can be remedied.


Suppose we start with an equal-weight portfolio.

<<>>=
R <- NMOF::randomReturns(20, 120, 0.03, rho = 0.6)
R <- NMOF::randomReturns(na = 80, 120, 0.03, rho = 0.6)
S <- cov(R)  ## Sigma
x0 <- rep(1/20, 20)
x0 <- rep(1/ncol(R), ncol(R))
@

When the argument \texttt{type} is set to


@@ 282,21 285,20 @@ will randomly select elements, and then add and
subtract numbers to and from those elements.  The size
of those numbers is controlled by argument
\texttt{stepsize}, and with argument \texttt{random}
set to TRUE (the default), the step sizes will vary
randomly.  When argument \texttt{sum} is \texttt{TRUE},
the function will add and subtract from chosen elements
in such a way that the sum over all elements remains
unchanged.  (That is a typical restriction in
portfolio-selection models.)
set to \texttt{TRUE} (the default), the step sizes will
vary randomly.  When argument \texttt{sum} is
\texttt{TRUE}, the function will add and subtract from
chosen elements in such a way that the sum over all
elements remains unchanged.  (That is a typical
restriction in portfolio-selection models.)

<<>>=
nb <- neighbourfun(type = "numeric", min = 0, max = 0.10,
<<nb>>=
nb <- neighbourfun(type = "numeric", min = 0, max = 0.20,
                   stepsize = 0.005)
@

@ 
The objective function.

We can solve this problem with Local Search.
We can solve this problem with Local Search with the
first version of the objective function.
<<mv-ls1>>=
sol.ls <- LSopt(variance,
                list(x0 = x0,


@@ 305,29 307,31 @@ sol.ls <- LSopt(variance,
                     printBar = FALSE),
                S = S)

NMOF::minvar(S, wmin = 0, wmax = 0.1)
sol.qp <- if (requireNamespace("NMOF"))
              round(NMOF::minvar(S, wmin = 0, wmax = 0.2), 2) else NA
data.frame(LS = round(sol.ls$xbest, 2)[1:10],
           QP = sol.qp[1:10])
@

When we feed it to \texttt{LSopt}, we arrive at the same solution.
<<mv-ls2>>=
nb <- neighbourfun(type = "numeric", min = 0, max = 0.10,
                   stepsize = 0.005)
sol.ls <- LSopt(variance2,
sol.ls2 <- LSopt(variance2,
                list(x0 = x0,
                     neighbour = nb,
                     nI = 1000,
                     printBar = FALSE),
                R = R)

NMOF::minvar(S, wmin = 0, wmax = 0.1)
data.frame(LS = round(sol.ls2$xbest, 2)[1:10],
           QP = sol.qp[1:10])
@

This second objective function is less
efficient than the first.  But it is much more flexible:
only for minimising variance,  could we take the shortcut via
the variance--covariance matrix.  But for other
measures of risk, we cannot do that.  One example is the so-called
semi-variance, defined as
This second objective function is, as described above,
less efficient than the first.  But it is much more
flexible: only for minimising variance could we take
the shortcut via the variance--covariance matrix.  But
for other measures of risk, we cannot do that.  One
example is the so-called semi-variance, defined as

\begin{equation}
  \label{eq:sv}


@@ 340,65 344,111 @@ semivariance <- function(x, R, ...) {
    Rx <- R %*% x
    Rx.lower <- pmin(Rx - mean(Rx), 0)
    sum(Rx.lower)
} 
@ 
}
@


\section*{Updating}

\section*{Updating}

In many applications, such as the example in the
previous section, but also in many other cases when
doing data analysis, the solution \texttt{x} is used to
compute a matrix/vector product $Ax$, in which $A$ is a
$m$ times $n$~matrix, and $x$ is of length$n$.

$m$ times $n$~matrix, and $x$ is of length $n$.

If we only change few elements in the solution, we can
actually update this product and save computing time
(the longer $x$ is, the more time we can save).


Let $x^{\mathrm{c}}$ denote the current solution and
$x^{\mathrm{n}}$ the neighbour solution. This latter
solution is produced by adding element-wise the
sparse vector $x^{\Delta}$ to $x^{\mathrm{c}}$.
sparse vector $x^{\Delta}$ to $x^{\mathrm{c}}$:

\begin{equation*}
   x^{\mathrm{n}}  = \phantom{A(}x^{\mathrm{c}} + x^{\Delta}
   x^{\mathrm{n}} = \phantom{A(}x^{\mathrm{c}} + x^{\Delta}\,.
\end{equation*}

Then we have:
\begin{equation*}
  Ax^{\mathrm{n}}  = A(x^{\mathrm{c}}+ x^{\Delta}) =
  \underbrace{\ \ Ax^{\mathrm{c}}\ \ }_{\text{known}} + Ax^{\Delta}
  Ax^{\mathrm{n}} = A(x^{\mathrm{c}} + x^{\Delta}) =
  \underbrace{\ \ Ax^{\mathrm{c}}\ \ }_{\text{known}} + Ax^{\Delta}\,.
\end{equation*}

Let us solve the risk-minimisation model one more time.
<<>>=
attr(x0, "Ax") <- R %*% x0
@

<<variance2>>=
variance3 <- function(x, ...)
    var(attr(x, "Ax"))
@

<<nb-update>>=
nb_upd <- neighbourfun(type = "numeric", min = 0, max = 0.20,
                   stepsize = 0.005,
                   update = "Ax", A = R)
@

<<>>=
sol.ls3 <- LSopt(variance3,
                list(x0 = x0,
                     neighbour = nb_upd,
                     nI = 1000,
                     printBar = FALSE),
                R = R)

data.frame(LS = round(sol.ls3$xbest, 2)[1:10],
           QP = sol.qp[1:10])
@

But \texttt{variance3} is faster:
<<>>=

<<num-target>>=
target <- runif(100)
system.time(for (i in 1:100)
                sol.ls2 <- LSopt(variance2,
                                 list(x0 = x0,
                                      neighbour = nb_upd,
                                      nI = 1000,
                                      printDetail = FALSE,
                                      printBar = FALSE),
                                 R = R))

system.time(for (i in 1:100)
                sol.ls3 <- LSopt(variance3,
                                 list(x0 = x0,
                                      neighbour = nb_upd,
                                      nI = 1000,
                                      printDetail = FALSE,
                                      printBar = FALSE),
                                 R = R))

@

% <<num-target>>=
% target <- runif(100)

deviation <- function(x, target) {
    xx <- x - target
    crossprod(xx)
}

sol <- LSopt(deviation,
             list(neighbour = neighbourfun(type = "numeric",
                                           length = 100,
                                           stepsize = 0.05),
                  x0 = runif(length(target)),
                  nI = 50000,
                  printBar = FALSE),
             target = target)

data.frame(target[1:10], sol$xbest[1:10])
% deviation <- function(x, target) {
%     xx <- x - target
%     crossprod(xx)
% }

% sol <- LSopt(deviation,
%              list(neighbour = neighbourfun(type = "numeric",
%                                            length = 100,
%                                            stepsize = 0.05),
%                   x0 = runif(length(target)),
%                   nI = 50000,
%                   printBar = FALSE),
%              target = target)

@
% data.frame(target[1:10], sol$xbest[1:10])


% @