~hrbrmstr/tdigest

a1fd3cc0b74075dc1b1a9b145b8f3444720ac7c9 — hrbrmstr 4 years ago d9ed033
serialization
M .Rbuildignore => .Rbuildignore +1 -0
@@ 9,6 9,7 @@
^NOTES\.*html$
^\.codecov\.yml$
^README_files$
^README_cache$
^doc$
^docs$
^tmp$

M DESCRIPTION => DESCRIPTION +2 -2
@@ 1,8 1,8 @@
Package: tdigest
Type: Package
Title: Wicked Fast, Accurate Quantiles Using t-Digests
Version: 0.3.0
Date: 2019-07-25
Version: 0.4.0
Date: 2019-08-20
Authors@R: c(
    person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), 
           comment = c(ORCID = "0000-0001-5670-2640")),

M NAMESPACE => NAMESPACE +2 -0
@@ 1,10 1,12 @@
# Generated by roxygen2: do not edit by hand

S3method("[",tdigest)
S3method(as.list,tdigest)
S3method(length,tdigest)
S3method(print,tdigest)
S3method(quantile,tdigest)
export("%>%")
export(as_tdigest)
export(is_tdigest)
export(td_add)
export(td_create)

M NEWS.md => NEWS.md +3 -0
@@ 1,3 1,6 @@
0.4.0
* Serialization & deserialization of tdigest objects

0.3.0
* ALTREP-aware
* `length()` and `[` implemented for `tdigest` objects

M R/create.R => R/create.R +29 -0
@@ 208,3 208,32 @@ length.tdigest <- function(x) {
  if ((i<=0) || (i>1)) return(NULL)
  td_value_at(x, i)
}

#' Serialize a tdigest object to an R list or unserialize a serialized tdigest
#' list back into a tdigest object
#'
#' These functions make it possible to create & populate a tdigest, serialize it out,
#' read it in at a later time and continue populating it enabling compact
#' distribution accumulation & storage for large, "continuous" datasets.
#'
#' @param x a tdigest object or a tdigest_list object
#' @param ... unused
#' @export
#' @examples
#' set.seed(1492)
#' x <- sample(0:100, 1000000, replace = TRUE)
#' td <- tdigest(x, 1000)
#' as_tdigest(as.list(td))
as.list.tdigest <- function(x, ...) {
  stopifnot(inherits(x, "tdigest"))
  out <- .Call("Rg_toR", x)
  class(out) <- c("tdigest_list", "list")
  out
}

#' @rdname as.list.tdigest
#' @export
as_tdigest <- function(x) {
  stopifnot(inherits(x, "tdigest_list"))
  .Call("Rg_fromR", x)
}

M README.Rmd => README.Rmd +25 -4
@@ 56,7 56,7 @@ packageVersion("tdigest")

### Basic (Low-level interface)

```{r}
```{r basic}
td <- td_create(10)

td


@@ 76,7 76,7 @@ quantile(td)

#### Bigger (and Vectorised)

```{r}
```{r bigger}
td <- tdigest(c(0, 10), 10)

is_tdigest(td)


@@ 95,9 95,30 @@ tquantile(td, c(0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1))
quantile(td)
```

#### Serialization

These [de]serialization functions make it possible to create & populate a tdigest, 
serialize it out, read it in at a later time and continue populating it enabling 
compact distribution accumulation & storage for large, "continuous" datasets.

```{r serialize}
set.seed(1492)
x <- sample(0:100, 1000000, replace = TRUE)
td <- tdigest(x, 1000)

tquantile(td, c(0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1))

str(in_r <- as.list(td), 1)

td2 <- as_tdigest(in_r)
tquantile(td2, c(0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1))

identical(in_r, as.list(td2))
```

#### ALTREP-aware

```{r}
```{r altrep}
N <- 1000000
x.altrep <- seq_len(N) # this is an ALTREP in R version >= 3.5.0



@@ 109,7 130,7 @@ length(td)

#### Proof it's faster

```{r}
```{r benchmark, cache=TRUE}
microbenchmark::microbenchmark(
  tdigest = tquantile(td, c(0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1)),
  r_quantile = quantile(x, c(0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1))

M README.md => README.md +46 -8
@@ 46,6 46,8 @@ Ertl](https://arxiv.org/abs/1902.04023) for more details on t-Digests.

The following functions are implemented:

  - `as.list.tdigest`: Serialize a tdigest object to an R list or
    unserialize a serialized tdigest list back into a tdigest object
  - `td_add`: Add a value to the t-Digest with the specified count
  - `td_create`: Allocate a new histogram
  - `td_merge`: Merge one t-Digest into another


@@ 80,7 82,7 @@ library(tdigest)

# current version
packageVersion("tdigest")
## [1] '0.3.0'
## [1] '0.4.0'
```

### Basic (Low-level interface)


@@ 138,6 140,42 @@ quantile(td)
## [1]   0.00000  24.74751  49.99666  75.24783 100.00000
```

#### Serialization

These \[de\]serialization functions make it possible to create &
populate a tdigest, serialize it out, read it in at a later time and
continue populating it enabling compact distribution accumulation &
storage for large, “continuous” datasets.

``` r
set.seed(1492)
x <- sample(0:100, 1000000, replace = TRUE)
td <- tdigest(x, 1000)

tquantile(td, c(0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1))
##  [1]   0.0000000   0.8099857   9.6725790  19.7533723  29.7448283  39.7544675  49.9966628  60.0235148  70.2067574
## [10]  80.3090454  90.2594642  99.4269454 100.0000000

str(in_r <- as.list(td), 1)
## List of 7
##  $ compression   : num 1000
##  $ cap           : int 6010
##  $ merged_nodes  : int 226
##  $ unmerged_nodes: int 0
##  $ merged_count  : num 1e+06
##  $ unmerged_count: num 0
##  $ nodes         :List of 2
##  - attr(*, "class")= chr [1:2] "tdigest_list" "list"

td2 <- as_tdigest(in_r)
tquantile(td2, c(0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1))
##  [1]   0.0000000   0.8099857   9.6725790  19.7533723  29.7448283  39.7544675  49.9966628  60.0235148  70.2067574
## [10]  80.3090454  90.2594642  99.4269454 100.0000000

identical(in_r, as.list(td2))
## [1] TRUE
```

#### ALTREP-aware

``` r


@@ 161,19 199,19 @@ microbenchmark::microbenchmark(
  r_quantile = quantile(x, c(0, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99, 1))
)
## Unit: microseconds
##        expr       min       lq        mean    median       uq        max neval cld
##     tdigest     7.868     9.23    19.94667    10.203    32.59     34.672   100  a 
##  r_quantile 52673.523 53300.41 55414.58517 53616.418 56251.37 108089.810   100   b
##        expr      min        lq       mean    median        uq        max neval cld
##     tdigest     8.02     9.175    20.2545    10.185    32.682     43.003   100  a 
##  r_quantile 52657.60 53307.742 55924.6932 54093.988 56487.027 108778.946   100   b
```

## tdigest Metrics

| Lang         | \# Files |  (%) | LoC |  (%) | Blank lines |  (%) | \# Lines |  (%) |
| :----------- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: |
| C            |        3 | 0.27 | 350 | 0.65 |          46 | 0.36 |       26 | 0.11 |
| R            |        6 | 0.55 | 140 | 0.26 |          31 | 0.24 |      139 | 0.57 |
| Rmd          |        1 | 0.09 |  36 | 0.07 |          40 | 0.31 |       52 | 0.21 |
| C/C++ Header |        1 | 0.09 |  10 | 0.02 |          10 | 0.08 |       26 | 0.11 |
| C            |        3 | 0.27 | 484 | 0.68 |          77 | 0.44 |       46 | 0.16 |
| R            |        6 | 0.55 | 157 | 0.22 |          35 | 0.20 |      156 | 0.54 |
| Rmd          |        1 | 0.09 |  44 | 0.06 |          47 | 0.27 |       58 | 0.20 |
| C/C++ Header |        1 | 0.09 |  24 | 0.03 |          16 | 0.09 |       30 | 0.10 |

## Code of Conduct


A man/as.list.tdigest.Rd => man/as.list.tdigest.Rd +28 -0
@@ 0,0 1,28 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/create.R
\name{as.list.tdigest}
\alias{as.list.tdigest}
\alias{as_tdigest}
\title{Serialize a tdigest object to an R list or unserialize a serialized tdigest
list back into a tdigest object}
\usage{
\method{as.list}{tdigest}(x, ...)

as_tdigest(x)
}
\arguments{
\item{x}{a tdigest object or a tdigest_list object}

\item{...}{unused}
}
\description{
These functions make it possible to create & populate a tdigest, serialize it out,
read it in at a later time and continue populating it enabling compact
distribution accumulation & storage for large, "continuous" datasets.
}
\examples{
set.seed(1492)
x <- sample(0:100, 1000000, replace = TRUE)
td <- tdigest(x, 1000)
as_tdigest(as.list(td))
}

M src/init.c => src/init.c +33 -18
@@ 3,10 3,6 @@
#include <stdlib.h> // for NULL
#include <R_ext/Rdynload.h>

/* FIXME: 
   Check these declarations against the C/Fortran source code.
*/

/* .Call calls */
extern SEXP is_null_xptr_(SEXP);
extern SEXP Rtd_add(SEXP, SEXP, SEXP);


@@ 17,22 13,41 @@ extern SEXP Rtd_total_count(SEXP);
extern SEXP Rtd_value_at(SEXP, SEXP);
extern SEXP Rtdig(SEXP, SEXP);
extern SEXP Rtquant(SEXP, SEXP);
extern SEXP Rg_compression(SEXP);
extern SEXP Rg_cap(SEXP);
extern SEXP Rg_merged_nodes(SEXP);
extern SEXP Rg_unmerged_nodes(SEXP);
extern SEXP Rg_merged_count(SEXP);
extern SEXP Rg_unmerged_count(SEXP);
extern SEXP Rg_nodes_mean(SEXP);
extern SEXP Rg_nodes_count(SEXP);
extern SEXP Rg_toR(SEXP);
extern SEXP Rg_fromR(SEXP);

static const R_CallMethodDef CallEntries[] = {
    {"is_null_xptr_",   (DL_FUNC) &is_null_xptr_,   1},
    {"Rtd_add",         (DL_FUNC) &Rtd_add,         3},
    {"Rtd_create",      (DL_FUNC) &Rtd_create,      1},
    {"Rtd_merge",       (DL_FUNC) &Rtd_merge,       2},
    {"Rtd_quantile_of", (DL_FUNC) &Rtd_quantile_of, 2},
    {"Rtd_total_count", (DL_FUNC) &Rtd_total_count, 1},
    {"Rtd_value_at",    (DL_FUNC) &Rtd_value_at,    2},
    {"Rtdig",           (DL_FUNC) &Rtdig,           2},
    {"Rtquant",         (DL_FUNC) &Rtquant,         2},
    {NULL, NULL, 0}
  {"is_null_xptr_",     (DL_FUNC) &is_null_xptr_,     1},
  {"Rtd_add",           (DL_FUNC) &Rtd_add,           3},
  {"Rtd_create",        (DL_FUNC) &Rtd_create,        1},
  {"Rtd_merge",         (DL_FUNC) &Rtd_merge,         2},
  {"Rtd_quantile_of",   (DL_FUNC) &Rtd_quantile_of,   2},
  {"Rtd_total_count",   (DL_FUNC) &Rtd_total_count,   1},
  {"Rtd_value_at",      (DL_FUNC) &Rtd_value_at,      2},
  {"Rtdig",             (DL_FUNC) &Rtdig,             2},
  {"Rtquant",           (DL_FUNC) &Rtquant,           2},
  {"Rg_compression",    (DL_FUNC) &Rg_compression,    1},
  {"Rg_cap",            (DL_FUNC) &Rg_cap,            1},
  {"Rg_merged_nodes",   (DL_FUNC) &Rg_merged_nodes,   1},
  {"Rg_unmerged_nodes", (DL_FUNC) &Rg_unmerged_nodes, 1},
  {"Rg_merged_count",   (DL_FUNC) &Rg_merged_count,   1},
  {"Rg_unmerged_count", (DL_FUNC) &Rg_unmerged_count, 1},
  {"Rg_nodes_mean",     (DL_FUNC) &Rg_nodes_mean,     1},
  {"Rg_nodes_count",    (DL_FUNC) &Rg_nodes_count,    1},
  {"Rg_toR",            (DL_FUNC) &Rg_toR,            1},
  {"Rg_fromR",          (DL_FUNC) &Rg_fromR,          1},
  {NULL, NULL, 0}
};

void R_init_tdigest(DllInfo *dll)
{
    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
    R_useDynamicSymbols(dll, FALSE);
void R_init_tdigest(DllInfo *dll) {
  R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
  R_useDynamicSymbols(dll, FALSE);
}

M src/tdigest-main.c => src/tdigest-main.c +168 -0
@@ 11,6 11,17 @@

#include "tdigest.h"

#define O_COMPRESSION 0
#define O_CAP 1
#define O_MNODES 2
#define O_UMNODES 3
#define O_MCOUNT 4
#define O_UMCOUNT 5
#define O_OUT_NODES 6

#define O_MEANS 0
#define O_COUNTS 1

// next 2 ƒ() via <https://github.com/randy3k/xptr/blob/master/src/xptr.c>

void check_is_xptr(SEXP s) {


@@ 125,3 136,160 @@ SEXP Rtd_merge(SEXP from, SEXP into) {
  }
  return(R_NilValue);
}

SEXP Rg_compression(SEXP from) {
  td_histogram_t *f = (td_histogram_t *)R_ExternalPtrAddr(from);
  return(f ? ScalarReal(f->compression) : R_NilValue);
}

SEXP Rg_cap(SEXP from) {
  td_histogram_t *f = (td_histogram_t *)R_ExternalPtrAddr(from);
  return(f ? ScalarInteger(f->cap) : R_NilValue);
}

SEXP Rg_merged_nodes(SEXP from) {
  td_histogram_t *f = (td_histogram_t *)R_ExternalPtrAddr(from);
  return(f ? ScalarInteger(f->merged_nodes) : R_NilValue);
}

SEXP Rg_unmerged_nodes(SEXP from) {
  td_histogram_t *f = (td_histogram_t *)R_ExternalPtrAddr(from);
  return(f ? ScalarInteger(f->unmerged_nodes) : R_NilValue);
}

SEXP Rg_merged_count(SEXP from) {
  td_histogram_t *f = (td_histogram_t *)R_ExternalPtrAddr(from);
  return(f ? ScalarReal(f->merged_count) : R_NilValue);
}

SEXP Rg_unmerged_count(SEXP from) {
  td_histogram_t *f = (td_histogram_t *)R_ExternalPtrAddr(from);
  return(f ? ScalarReal(f->unmerged_count) : R_NilValue);
}

SEXP Rg_nodes_mean(SEXP from) {
  td_histogram_t *f = (td_histogram_t *)R_ExternalPtrAddr(from);
  if (f) {
    int N = f->merged_nodes + f->unmerged_nodes;
    SEXP out = PROTECT(allocVector(REALSXP, N));
    for (int i=0; i<N; i++) {
      REAL(out)[i] = f->nodes[i].mean;
    }
    UNPROTECT(1);
    return(out);
  } else {
    return(R_NilValue);
  }
}

SEXP Rg_nodes_count(SEXP from) {
  td_histogram_t *f = (td_histogram_t *)R_ExternalPtrAddr(from);
  if (f) {
    int N = f->merged_nodes + f->unmerged_nodes;
    SEXP out = PROTECT(allocVector(REALSXP, N));
    for (int i=0; i<N; i++) {
      REAL(out)[i] = f->nodes[i].count;
    }
    UNPROTECT(1);
    return(out);
  } else {
    return(R_NilValue);
  }
}

SEXP Rg_toR(SEXP from) {

  td_histogram_t *f = (td_histogram_t *)R_ExternalPtrAddr(from);

  if (f) {

    SEXP o_cap = PROTECT(ScalarInteger(f->cap));
    SEXP o_compression = PROTECT(ScalarReal(f->compression));
    SEXP o_mcount = PROTECT(ScalarReal(f->merged_count));
    SEXP o_umcount = PROTECT(ScalarReal(f->unmerged_count));
    SEXP o_mnodes = PROTECT(ScalarInteger(f->merged_nodes));
    SEXP o_umnodes = PROTECT(ScalarInteger(f->unmerged_nodes));

    int N = f->merged_nodes + f->unmerged_nodes;

    SEXP o_means = PROTECT(allocVector(REALSXP, N));
    SEXP o_counts = PROTECT(allocVector(REALSXP, N));

    for (int i=0; i<N; i++) {
      REAL(o_means)[i] = f->nodes[i].mean;
      REAL(o_counts)[i] = f->nodes[i].count;
    }

    const char *names[] = {
      "compression",
      "cap",
      "merged_nodes",
      "unmerged_nodes",
      "merged_count",
      "unmerged_count",
      "nodes",
      ""
    };

    SEXP out = PROTECT(mkNamed(VECSXP, names));

    SET_VECTOR_ELT(out, O_COMPRESSION, o_compression);
    SET_VECTOR_ELT(out, O_CAP, o_cap);
    SET_VECTOR_ELT(out, O_MNODES, o_mnodes);
    SET_VECTOR_ELT(out, O_UMNODES, o_umnodes);
    SET_VECTOR_ELT(out, O_MCOUNT, o_mcount);
    SET_VECTOR_ELT(out, O_UMCOUNT, o_umcount);

    const char *node_names[] = {
      "counts",
      "means",
      ""
    };

    SEXP out_nodes = PROTECT(mkNamed(VECSXP, node_names));

    SET_VECTOR_ELT(out_nodes, 0, o_means);
    SET_VECTOR_ELT(out_nodes, 1, o_counts);

    SET_VECTOR_ELT(out, O_OUT_NODES, out_nodes);

    UNPROTECT(10);

    return(out);

  } else {
    return(R_NilValue);
  }

}

SEXP Rg_fromR(SEXP td_list) {

  SEXP out = PROTECT(Rtd_create(VECTOR_ELT(td_list, O_COMPRESSION)));

  td_histogram_t *t = (td_histogram_t *)R_ExternalPtrAddr(out);

  t->compression = asReal(VECTOR_ELT(td_list, O_COMPRESSION));
  t->cap = asInteger(VECTOR_ELT(td_list, O_CAP));
  t->merged_nodes = asInteger(VECTOR_ELT(td_list, O_MNODES));
  t->unmerged_nodes = asInteger(VECTOR_ELT(td_list, O_UMNODES));
  t->merged_count = asReal(VECTOR_ELT(td_list, O_MCOUNT));
  t->unmerged_count = asReal(VECTOR_ELT(td_list, O_UMCOUNT));

  int N = t->merged_nodes + t->unmerged_nodes;

  SEXP node_list = VECTOR_ELT(td_list, O_OUT_NODES);

  SEXP o_means = VECTOR_ELT(node_list, O_MEANS);
  SEXP o_counts = VECTOR_ELT(node_list, O_COUNTS);

  for (int i=0; i<N; i++) {
    t->nodes[i].count = REAL(o_counts)[i];
    t->nodes[i].mean = REAL(o_means)[i];
  }

  UNPROTECT(1);

  return(out);

}

M src/tdigest.c => src/tdigest.c +49 -47
@@ 5,48 5,52 @@

#include "tdigest.h"

#define MM_PI 3.14159265358979323846

typedef struct node {
     double mean;
     double count;
} node_t;

void bbzero(void *to, size_t count) {
  memset(to, 0, count);
}

struct td_histogram {
     // compression is a setting used to configure the size of centroids when merged.
     double compression;

     // cap is the total size of nodes
     int cap;
     // merged_nodes is the number of merged nodes at the front of nodes.
     int merged_nodes;
     // unmerged_nodes is the number of buffered nodes.
     int unmerged_nodes;

     double merged_count;
     double unmerged_count;

     node_t nodes[];
};
// #define MM_PI 3.14159265358979323846
//
// typedef struct node {
//   double mean;
//   double count;
// } node_t;
//
// void bbzero(void *to, size_t count) {
//   memset(to, 0, count);
// }
//
// struct td_histogram {
//   // compression is a setting used to configure the size of centroids when merged.
//   double compression;
//
//   // cap is the total size of nodes
//   int cap;
//   // merged_nodes is the number of merged nodes at the front of nodes.
//   int merged_nodes;
//   // unmerged_nodes is the number of buffered nodes.
//   int unmerged_nodes;
//
//   double merged_count;
//   double unmerged_count;
//
//   node_t nodes[];
// };

static bool is_very_small(double val) {
     return !(val > .000000001 || val < -.000000001);
  return !(val > .000000001 || val < -.000000001);
}

static int cap_from_compression(double compression) {
     return (6 * (int)(compression)) + 10;
  return (6 * (int)(compression)) + 10;
}

static bool should_merge(td_histogram_t *h) {
     return ((h->merged_nodes + h->unmerged_nodes) == h->cap);
  return ((h->merged_nodes + h->unmerged_nodes) == h->cap);
}

static int next_node(td_histogram_t *h) {
     return h->merged_nodes + h->unmerged_nodes;
  return h->merged_nodes + h->unmerged_nodes;
}

static void merge(td_histogram_t *h);


@@ 55,11 59,9 @@ static void merge(td_histogram_t *h);
// Constructors
////////////////////////////////////////////////////////////////////////////////



static size_t td_required_buf_size(double compression) {
     return sizeof(td_histogram_t) +
          (cap_from_compression(compression) * sizeof(node_t));
  return sizeof(td_histogram_t) +
    (cap_from_compression(compression) * sizeof(node_t));
}

// td_init will initialize a td_histogram_t inside buf which is buf_size bytes.


@@ 69,25 71,25 @@ static size_t td_required_buf_size(double compression) {
// In general use td_required_buf_size to figure out what size buffer to
// pass.
static td_histogram_t *td_init(double compression, size_t buf_size, char *buf) {
     td_histogram_t *h = (td_histogram_t *)(buf);
     if (!h) {
          return NULL;
     }
     bbzero((void *)(h), buf_size);
     *h = (td_histogram_t) {
          .compression = compression,
          .cap = (buf_size - sizeof(td_histogram_t)) / sizeof(node_t),
          .merged_nodes = 0,
          .merged_count = 0,
          .unmerged_nodes = 0,
          .unmerged_count = 0,
     };
     return h;
  td_histogram_t *h = (td_histogram_t *)(buf);
  if (!h) {
    return NULL;
  }
  bbzero((void *)(h), buf_size);
  *h = (td_histogram_t) {
    .compression = compression,
    .cap = (buf_size - sizeof(td_histogram_t)) / sizeof(node_t),
    .merged_nodes = 0,
    .merged_count = 0,
    .unmerged_nodes = 0,
    .unmerged_count = 0,
  };
  return h;
}

td_histogram_t *td_new(double compression) {
     size_t memsize = td_required_buf_size(compression);
     return td_init(compression, memsize, (char *)(malloc(memsize)));
  size_t memsize = td_required_buf_size(compression);
  return td_init(compression, memsize, (char *)(malloc(memsize)));
}

void td_free(td_histogram_t *h) {

M src/tdigest.h => src/tdigest.h +24 -0
@@ 19,6 19,30 @@

#include <stdlib.h>

#define MM_PI 3.14159265358979323846

typedef struct node {
  double mean;
  double count;
} node_t;

struct td_histogram {
  // compression is a setting used to configure the size of centroids when merged.
  double compression;

  // cap is the total size of nodes
  int cap;
  // merged_nodes is the number of merged nodes at the front of nodes.
  int merged_nodes;
  // unmerged_nodes is the number of buffered nodes.
  int unmerged_nodes;

  double merged_count;
  double unmerged_count;

  node_t nodes[];
};

typedef struct td_histogram td_histogram_t;

// td_new allocates a new histogram.

M tests/testthat/test-tdigest.R => tests/testthat/test-tdigest.R +9 -0
@@ 63,3 63,12 @@ td <- tdigest(x.altrep)
expect_equal(as.integer(td[0.1]), 93051)
expect_equal(as.integer(td[0.5]), 491472)
expect_equal(length(td), 1000000)

context("Serialization test")

set.seed(1492)
x <- sample(0:100, 1000000, replace = TRUE)
td <- tdigest(x, 1000)
a <- as.list(td)
b <- as.list(as_tdigest(a))
expect_true(identical(a, b))