~hrbrmstr/sergeant

add03225be521bee6ce3f95d24853a3b51b176dc — Bob Rudis 4 years ago 8b1166f
CRAN checks
M .Rbuildignore => .Rbuildignore +1 -0
@@ 5,3 5,4 @@
^\.travis\.yml$
^CONDUCT\.md$
^README\.md$
^sergeant.png$

M DESCRIPTION => DESCRIPTION +2 -1
@@ 14,7 14,8 @@ Imports:
    htmltools,
    dplyr,
    readr,
    purrr
    purrr,
    utils
Suggests:
    testthat
RoxygenNote: 5.0.1

M NAMESPACE => NAMESPACE +2 -0
@@ 1,5 1,6 @@
# Generated by roxygen2: do not edit by hand

export(drill_active)
export(drill_cancel)
export(drill_metrics)
export(drill_options)


@@ 21,6 22,7 @@ export(drill_version)
import(htmltools)
import(httr)
import(jsonlite)
import(utils)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,data_frame)

A R/aaa.r => R/aaa.r +1 -0
@@ 0,0 1,1 @@
utils::globalVariables(c("error", "everything", "isDirectory", "name", "params", "permissions", "query"))

M R/query.r => R/query.r +16 -5
@@ 2,17 2,27 @@
#'
#' @param query query to run
#' @param uplift automatically run \code{drill_uplift()} on the result?
#' @param .progress if \code{TRUE} then ask \code{httr::PSOT} to display a progress bar
#' @param drill_server base URL of the \code{drill} server
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
#' @examples \dontrun{
#' drill_query("SELECT * FROM cp.`employee.json` limit 5")
#' }
drill_query <- function(query, uplift=TRUE, drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {
drill_query <- function(query, uplift=TRUE, .progress=FALSE, drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {

  res <- httr::POST(sprintf("%s/query.json", drill_server),
                    encode="json",
                    body=list(queryType="SQL",
                              query=query))
  if (.progress) {
    res <- httr::POST(sprintf("%s/query.json", drill_server),
                      encode="json",
                      progress(),
                      body=list(queryType="SQL",
                                query=query))
  } else {
    res <- httr::POST(sprintf("%s/query.json", drill_server),
                      encode="json",
                      body=list(queryType="SQL",
                                query=query))
  }

  out <- jsonlite::fromJSON(httr::content(res, as="text", encoding="UTF-8"), flatten=TRUE)



@@ 36,6 46,7 @@ drill_query <- function(query, uplift=TRUE, drill_server=Sys.getenv("DRILL_URL",
#' without `uplift=TRUE` but want to then convert the structure.
#'
#' @param query_result the result of a call to `drill_query()`
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
drill_uplift <- function(query_result) {
  dplyr::tbl_df(readr::type_convert(query_result$rows))

M R/schemas.R => R/schemas.R +3 -0
@@ 1,6 1,7 @@
#' Returns a list of available schemas.
#'
#' @param drill_server base URL of the \code{drill} server
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
drill_show_schemas <- function(drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {
  drill_query("SHOW SCHEMAS", drill_server=drill_server)$rows$SCHEMA_NAME


@@ 11,6 12,7 @@ drill_show_schemas <- function(drill_server=Sys.getenv("DRILL_URL", unset="http:
#' @param schema_name A unique name for a Drill schema. A schema in Drill is a configured
#'                   storage plugin, such as hive, or a storage plugin and workspace.
#' @param drill_server base URL of the \code{drill} server
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
drill_use <- function(schema_name, drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {
  query <- sprintf("USE `%s`", schema_name)


@@ 24,6 26,7 @@ drill_use <- function(schema_name, drill_server=Sys.getenv("DRILL_URL", unset="h
#' @param schema_spec properly quoted "filesystem.directory_name" reference path
#' @param drill_server base URL of the \code{drill} server
#' @export
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @examples \dontrun{
#' drill_show_files("dfs.tmp")
#' drill_show_files("dfs.tmp")

M R/sergeant-package.r => R/sergeant-package.r +15 -1
@@ 1,9 1,23 @@
#' Tools to Transform and Query Data with the 'Apache' 'Drill' 'REST' 'API'
#' Tools to Transform and Query Data with the the 'Apache Drill' 'REST API'
#'
#' Drill is an innovative distributed SQL engine designed to enable data exploration
#' and analytics on non-relational datastores. Users can query the data using standard
#' SQL and BI tools without having to create and manage schemas. Some of the key features
#' are:
#'
#' \itemize{
#'   \item{Schema-free JSON document model similar to MongoDB and Elasticsearch}
#'   \item{Industry-standard APIs: ANSI SQL, ODBC/JDBC, RESTful APIs}
#'   \item{Extremely user and developer friendly}
#'   \item{Pluggable architecture enables connectivity to multiple datastores}
#' }
#'
#' @name sergeant
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import httr jsonlite htmltools
#' @importFrom purrr map map2 map2_df %>%
#' @importFrom dplyr mutate select left_join bind_cols bind_rows data_frame tbl
#' @import utils
NULL

M R/sergeant.r => R/sergeant.r +27 -2
@@ 1,5 1,21 @@
s_head <- purrr::safely(httr::HEAD)

#' Test whether Drill HTTP REST API server is up
#'
#' This is a very simple test (performs \code{HEAD /} on \code{drill_server}
#'
#' @param drill_server base URL of the \code{drill} server
#' @export
#' @examples \dontrun{
#' drill_active()
#' }
drill_active <- function(drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {
  !is.null(s_head(drill_server, httr::timeout(2))$result)
}

#' Get the status of Drill
#'
#' @note The output of this is in a "viewer" window
#' @param drill_server base URL of the \code{drill} server
#' @export
#' @examples \dontrun{


@@ 27,6 43,7 @@ drill_metrics <- function(drill_server=Sys.getenv("DRILL_URL", unset="http://loc

#' Get information about threads
#'
#' @note The output of this is in a "viewer" window
#' @param drill_server base URL of the \code{drill} server
#' @export
#' @examples \dontrun{


@@ 43,6 60,7 @@ drill_threads <- function(drill_server=Sys.getenv("DRILL_URL", unset="http://loc
#'
#' @param drill_server base URL of the \code{drill} server
#' @export
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @examples \dontrun{
#' drill_profiles()
#' }


@@ 52,9 70,11 @@ drill_profiles <- function(drill_server=Sys.getenv("DRILL_URL", unset="http://lo
  jsonlite::fromJSON(cnt)
}

#' Get the profile of the query that has the given queryid.
#' Get the profile of the query that has the given queryid
#'
#' @param query_id UUID of the query in standard UUID format that Drill assigns to each query
#' @param drill_server base URL of the \code{drill} server
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
drill_profile <- function(query_id, drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {
  res <- httr::GET(sprintf("%s/profiles/%s.json", drill_server, query_id))


@@ 62,10 82,11 @@ drill_profile <- function(query_id, drill_server=Sys.getenv("DRILL_URL", unset="
  jsonlite::fromJSON(cnt)
}

#' Cancel the query that has the given queryid.
#' Cancel the query that has the given queryid
#'
#' @param query_id the UUID of the query in standard UUID format that Drill assigns to each query.
#' @param drill_server base URL of the \code{drill} server
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
drill_cancel <- function(query_id, drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {
  res <- httr::GET(sprintf("%s/profiles/cancel%s", drill_server, query_id))


@@ 77,6 98,7 @@ drill_cancel <- function(query_id, drill_server=Sys.getenv("DRILL_URL", unset="h
#'
#' @param plugin the assigned name in the storage plugin definition.
#' @param drill_server base URL of the \code{drill} server
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
#' @examples \dontrun{
#' drill_storage()


@@ 99,6 121,7 @@ drill_storage <- function(plugin=NULL, drill_server=Sys.getenv("DRILL_URL", unse
#'
#' @param drill_server base URL of the \code{drill} server
#' @export
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @examples \dontrun{
#' drill_options()
#' }


@@ 113,6 136,7 @@ drill_options <- function(drill_server=Sys.getenv("DRILL_URL", unset="http://loc
#'
#' @param drill_server base URL of the \code{drill} server
#' @export
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @examples \dontrun{
#' drill_stats()
#' }


@@ 126,6 150,7 @@ drill_stats <- function(drill_server=Sys.getenv("DRILL_URL", unset="http://local
#'
#' @param drill_server base URL of the \code{drill} server
#' @export
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @examples \dontrun{
#' drill_version()
#' }

M R/set.R => R/set.R +3 -0
@@ 10,6 10,7 @@
#' @param type set the \code{session} or \code{system} parameter
#' @param drill_server base URL of the \code{drill} server
#' @return a \code{tbl} (invisibly) with the \code{ALTER} queries sent and results, including errors.
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
#' @examples \dontrun{
#' drill_set(exec.errors.verbose=TRUE, store.format="parquet", web.logs.max_lines=20000)


@@ 53,6 54,7 @@ drill_set <- function(..., type=c("session", "system"),
#' @param ... bare name of system options to reset
#' @param all if \code{TRUE}, all parameters are reset (\code{...} is ignored)
#' @param drill_server base URL of the \code{drill} server
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
#' @examples \dontrun{
#' drill_system_reset(all=TRUE)


@@ 93,6 95,7 @@ drill_system_reset <- function(..., all=FALSE,
#'
#' @param ... bare name of system options to reset
#' @param drill_server base URL of the \code{drill} server
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @export
#' @examples \dontrun{
#' drill_settings_reset(exec.errors.verbose)

M README.Rmd => README.Rmd +59 -2
@@ 18,14 18,33 @@ knitr::opts_chunk$set(
![downloads](http://cranlogs.r-pkg.org/badges/grand-total/sergeant)
-->

<img src="sergeant.png" width="33" align="left" style="padding-right:20px"/>

`sergeant` : Tools to Transform and Query Data with the 'Apache' 'Drill' 'API'

Drill + `sergeant` is (IMO) a nice alternative to Spark + `sparklyr` if you don't need the ML components of Spark (i.e. just need to query "big data" sources, need to interface with parquet, need to combine disperate data source types — json, csv, parquet, rdbms - for aggregation, etc). Drill also has support for spatial queries.

The package doesn't have a `dplyr`-esque interface yet, but creating one is possible since Drill uses pretty standard SQL for queries. Right now, you need to build Drill SQL queries by hand and issue them with `drill_query()`. It's good to get one's hands dirty with some SQL on occassion (it build character).

I find writing SQL queries to parquet files with Drill on a local 64GB Linux workstation to be more performant than doing the data ingestion work with R (for large or disperate data sets). I also work with many tiny JSON files on a daily basis and Drill makes it much easier to do so. YMMV.

You can download Drill from <https://drill.apache.org/download/> (use "Direct File Download"). I use `/usr/local/drill` as the install directory. `drill-embedded` is a super-easy way to get started playing with Drill on a single workstation and most of my workflows can get by using Drill this way. If there is sufficient desire for an automated downloader and a way to start the `drill-embedded` server from within R, please file an issue.

Theren are a few convenience wrappers for various informational SQL queries (like `drill_version()`). Please file an PR if you add more.

The package has been written with retrieval of rectangular data sources in mind. If you need/want a version of `drill_query()` that will enable returning of non-rectangular data (which is possible with Drill) then please file an issue.

Some of the more "controlling vs data ops" REST API functions aren't implemented. Please file a PR if you need those.

Finally, I run most of this locally and at home, so it's all been coded with no authentication or encryption in mind. If you want/need support for that, please file an issue. If there is demand for this, it will change the R API a bit (I've already thought out what to do but have no need for it right now).

The following functions are implemented:

- `drill_cancel`:	Cancel the query that has the given queryid.
- `drill_active`: Test whether Drill HTTP REST API server is up
- `drill_cancel`:	Cancel the query that has the given queryid
- `drill_metrics`:	Get the current memory metrics
- `drill_options`:	List the name, default, and data type of the system and session options
- `drill_profile`:	Get the profile of the query that has the given queryid.
- `drill_profile`:	Get the profile of the query that has the given queryid
- `drill_profiles`:	Get the profiles of running and completed queries
- `drill_query`:	Submit a query and return results
- `drill_set`:	Set Drill SYSTEM or SESSION options


@@ 59,10 78,16 @@ library(sergeant)
# current verison
packageVersion("sergeant")

drill_active()

drill_version()

drill_storage()$name
```

Working with the built-in JSON data sets:

```{r}
drill_query("SELECT * FROM cp.`employee.json` limit 100")

drill_query("SELECT COUNT(gender) AS gender FROM cp.`employee.json` GROUP BY gender")


@@ 70,6 95,38 @@ drill_query("SELECT COUNT(gender) AS gender FROM cp.`employee.json` GROUP BY gen
drill_options()
```

## Working with parquet files

```{r}
drill_query("SELECT * FROM dfs.`/usr/local/drill/sample-data/nation.parquet` LIMIT 5")
```

Including multiple parquet files in different directories (note the wildcard support):

```{r}
drill_query("SELECT * FROM dfs.`/usr/local/drill/sample-data/nations*/nations*.parquet` LIMIT 5")
```

### A preview of the built-in support for spatial ops

Via: <https://github.com/k255/drill-gis>

A common use case is to select data within boundary of given polygon:

```{r}
drill_query("
select columns[2] as city, columns[4] as lon, columns[3] as lat
    from cp.`sample-data/CA-cities.csv`
    where
        ST_Within(
            ST_Point(columns[4], columns[3]),
            ST_GeomFromText(
                'POLYGON((-121.95 37.28, -121.94 37.35, -121.84 37.35, -121.84 37.28, -121.95 37.28))'
                )
            )
")
```

### Test Results

```{r}

M README.md => README.md +109 -3
@@ 6,14 6,33 @@
[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/sergeant)](http://cran.r-project.org/web/packages/sergeant) 
![downloads](http://cranlogs.r-pkg.org/badges/grand-total/sergeant)
-->
<img src="sergeant.png" width="33" align="left" style="padding-right:20px"/>

`sergeant` : Tools to Transform and Query Data with the 'Apache' 'Drill' 'API'

Drill + `sergeant` is (IMO) a nice alternative to Spark + `sparklyr` if you don't need the ML components of Spark (i.e. just need to query "big data" sources, need to interface with parquet, need to combine disperate data source types — json, csv, parquet, rdbms - for aggregation, etc). Drill also has support for spatial queries.

The package doesn't have a `dplyr`-esque interface yet, but creating one is possible since Drill uses pretty standard SQL for queries. Right now, you need to build Drill SQL queries by hand and issue them with `drill_query()`. It's good to get one's hands dirty with some SQL on occassion (it build character).

I find writing SQL queries to parquet files with Drill on a local 64GB Linux workstation to be more performant than doing the data ingestion work with R (for large or disperate data sets). I also work with many tiny JSON files on a daily basis and Drill makes it much easier to do so. YMMV.

You can download Drill from <https://drill.apache.org/download/> (use "Direct File Download"). I use `/usr/local/drill` as the install directory. `drill-embedded` is a super-easy way to get started playing with Drill on a single workstation and most of my workflows can get by using Drill this way. If there is sufficient desire for an automated downloader and a way to start the `drill-embedded` server from within R, please file an issue.

Theren are a few convenience wrappers for various informational SQL queries (like `drill_version()`). Please file an PR if you add more.

The package has been written with retrieval of rectangular data sources in mind. If you need/want a version of `drill_query()` that will enable returning of non-rectangular data (which is possible with Drill) then please file an issue.

Some of the more "controlling vs data ops" REST API functions aren't implemented. Please file a PR if you need those.

Finally, I run most of this locally and at home, so it's all been coded with no authentication or encryption in mind. If you want/need support for that, please file an issue. If there is demand for this, it will change the R API a bit (I've already thought out what to do but have no need for it right now).

The following functions are implemented:

-   `drill_cancel`: Cancel the query that has the given queryid.
-   `drill_active`: Test whether Drill HTTP REST API server is up
-   `drill_cancel`: Cancel the query that has the given queryid
-   `drill_metrics`: Get the current memory metrics
-   `drill_options`: List the name, default, and data type of the system and session options
-   `drill_profile`: Get the profile of the query that has the given queryid.
-   `drill_profile`: Get the profile of the query that has the given queryid
-   `drill_profiles`: Get the profiles of running and completed queries
-   `drill_query`: Submit a query and return results
-   `drill_set`: Set Drill SYSTEM or SESSION options


@@ 44,12 63,19 @@ library(sergeant)
packageVersion("sergeant")
#> [1] '0.1.0.9000'

drill_active()
#> [1] TRUE

drill_version()
#> [1] "1.9.0"

drill_storage()$name
#> [1] "cp"    "dfs"   "hbase" "hive"  "kudu"  "mongo" "s3"
```

Working with the built-in JSON data sets:

``` r
drill_query("SELECT * FROM cp.`employee.json` limit 100")
#> Parsed with column specification:
#> cols(


@@ 114,6 140,86 @@ drill_options()
#> # ... with 95 more rows
```

Working with parquet files
--------------------------

``` r
drill_query("SELECT * FROM dfs.`/usr/local/drill/sample-data/nation.parquet` LIMIT 5")
#> Parsed with column specification:
#> cols(
#>   N_COMMENT = col_character(),
#>   N_NAME = col_character(),
#>   N_NATIONKEY = col_integer(),
#>   N_REGIONKEY = col_integer()
#> )
#> # A tibble: 5 × 4
#>              N_COMMENT    N_NAME N_NATIONKEY N_REGIONKEY
#> *                <chr>     <chr>       <int>       <int>
#> 1  haggle. carefully f   ALGERIA           0           0
#> 2 al foxes promise sly ARGENTINA           1           1
#> 3 y alongside of the p    BRAZIL           2           1
#> 4 eas hang ironic, sil    CANADA           3           1
#> 5 y above the carefull     EGYPT           4           4
```

Including multiple parquet files in different directories (note the wildcard support):

``` r
drill_query("SELECT * FROM dfs.`/usr/local/drill/sample-data/nations*/nations*.parquet` LIMIT 5")
#> Parsed with column specification:
#> cols(
#>   N_COMMENT = col_character(),
#>   N_NAME = col_character(),
#>   N_NATIONKEY = col_integer(),
#>   N_REGIONKEY = col_integer(),
#>   dir0 = col_character()
#> )
#> # A tibble: 5 × 5
#>              N_COMMENT    N_NAME N_NATIONKEY N_REGIONKEY      dir0
#> *                <chr>     <chr>       <int>       <int>     <chr>
#> 1  haggle. carefully f   ALGERIA           0           0 nationsMF
#> 2 al foxes promise sly ARGENTINA           1           1 nationsMF
#> 3 y alongside of the p    BRAZIL           2           1 nationsMF
#> 4 eas hang ironic, sil    CANADA           3           1 nationsMF
#> 5 y above the carefull     EGYPT           4           4 nationsMF
```

### A preview of the built-in support for spatial ops

Via: <https://github.com/k255/drill-gis>

A common use case is to select data within boundary of given polygon:

``` r
drill_query("
select columns[2] as city, columns[4] as lon, columns[3] as lat
    from cp.`sample-data/CA-cities.csv`
    where
        ST_Within(
            ST_Point(columns[4], columns[3]),
            ST_GeomFromText(
                'POLYGON((-121.95 37.28, -121.94 37.35, -121.84 37.35, -121.84 37.28, -121.95 37.28))'
                )
            )
")
#> Parsed with column specification:
#> cols(
#>   city = col_character(),
#>   lon = col_double(),
#>   lat = col_double()
#> )
#> # A tibble: 7 × 3
#>          city       lon      lat
#> *       <chr>     <dbl>    <dbl>
#> 1     Burbank -121.9316 37.32328
#> 2    San Jose -121.8950 37.33939
#> 3        Lick -121.8458 37.28716
#> 4 Willow Glen -121.8897 37.30855
#> 5 Buena Vista -121.9166 37.32133
#> 6    Parkmoor -121.9308 37.32105
#> 7   Fruitdale -121.9327 37.31086
```

### Test Results

``` r


@@ 121,7 227,7 @@ library(sergeant)
library(testthat)

date()
#> [1] "Sat Dec  3 12:35:10 2016"
#> [1] "Sat Dec  3 14:18:30 2016"

test_dir("tests/")
#> testthat results ========================================================================================================

A man/drill_active.Rd => man/drill_active.Rd +21 -0
@@ 0,0 1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sergeant.r
\name{drill_active}
\alias{drill_active}
\title{Test whether Drill HTTP REST API server is up}
\usage{
drill_active(drill_server = Sys.getenv("DRILL_URL", unset =
  "http://localhost:8047"))
}
\arguments{
\item{drill_server}{base URL of the \code{drill} server}
}
\description{
This is a very simple test (performs \code{HEAD /} on \code{drill_server}
}
\examples{
\dontrun{
drill_active()
}
}


M man/drill_cancel.Rd => man/drill_cancel.Rd +5 -2
@@ 2,7 2,7 @@
% Please edit documentation in R/sergeant.r
\name{drill_cancel}
\alias{drill_cancel}
\title{Cancel the query that has the given queryid.}
\title{Cancel the query that has the given queryid}
\usage{
drill_cancel(query_id, drill_server = Sys.getenv("DRILL_URL", unset =
  "http://localhost:8047"))


@@ 13,6 13,9 @@ drill_cancel(query_id, drill_server = Sys.getenv("DRILL_URL", unset =
\item{drill_server}{base URL of the \code{drill} server}
}
\description{
Cancel the query that has the given queryid.
Cancel the query that has the given queryid
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_options.Rd => man/drill_options.Rd +3 -0
@@ 18,4 18,7 @@ List the name, default, and data type of the system and session options
drill_options()
}
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_profile.Rd => man/drill_profile.Rd +7 -2
@@ 2,15 2,20 @@
% Please edit documentation in R/sergeant.r
\name{drill_profile}
\alias{drill_profile}
\title{Get the profile of the query that has the given queryid.}
\title{Get the profile of the query that has the given queryid}
\usage{
drill_profile(query_id, drill_server = Sys.getenv("DRILL_URL", unset =
  "http://localhost:8047"))
}
\arguments{
\item{query_id}{UUID of the query in standard UUID format that Drill assigns to each query}

\item{drill_server}{base URL of the \code{drill} server}
}
\description{
Get the profile of the query that has the given queryid.
Get the profile of the query that has the given queryid
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_profiles.Rd => man/drill_profiles.Rd +3 -0
@@ 18,4 18,7 @@ Get the profiles of running and completed queries
drill_profiles()
}
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_query.Rd => man/drill_query.Rd +7 -2
@@ 4,14 4,16 @@
\alias{drill_query}
\title{Submit a query and return results}
\usage{
drill_query(query, uplift = TRUE, drill_server = Sys.getenv("DRILL_URL",
  unset = "http://localhost:8047"))
drill_query(query, uplift = TRUE, .progress = FALSE,
  drill_server = Sys.getenv("DRILL_URL", unset = "http://localhost:8047"))
}
\arguments{
\item{query}{query to run}

\item{uplift}{automatically run \code{drill_uplift()} on the result?}

\item{.progress}{if \code{TRUE} then ask \code{httr::PSOT} to display a progress bar}

\item{drill_server}{base URL of the \code{drill} server}
}
\description{


@@ 22,4 24,7 @@ Submit a query and return results
drill_query("SELECT * FROM cp.`employee.json` limit 5")
}
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_set.Rd => man/drill_set.Rd +3 -0
@@ 30,4 30,7 @@ If any query errors result, error messages will be presented to the console.
drill_set(exec.errors.verbose=TRUE, store.format="parquet", web.logs.max_lines=20000)
}
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_settings_reset.Rd => man/drill_settings_reset.Rd +3 -0
@@ 20,4 20,7 @@ Changes (optionally, all) session settings back to system defaults
drill_settings_reset(exec.errors.verbose)
}
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_show_files.Rd => man/drill_show_files.Rd +3 -0
@@ 21,4 21,7 @@ drill_show_files("dfs.tmp")
drill_show_files("dfs.tmp")
}
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_show_schemas.Rd => man/drill_show_schemas.Rd +3 -0
@@ 13,4 13,7 @@ drill_show_schemas(drill_server = Sys.getenv("DRILL_URL", unset =
\description{
Returns a list of available schemas.
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_stats.Rd => man/drill_stats.Rd +3 -0
@@ 18,4 18,7 @@ Get Drillbit information, such as ports numbers
drill_stats()
}
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_status.Rd => man/drill_status.Rd +3 -0
@@ 13,6 13,9 @@ drill_status(drill_server = Sys.getenv("DRILL_URL", unset =
\description{
Get the status of Drill
}
\note{
The output of this is in a "viewer" window
}
\examples{
\dontrun{
drill_status()

M man/drill_storage.Rd => man/drill_storage.Rd +3 -0
@@ 20,4 20,7 @@ Get the list of storage plugin names and configurations
drill_storage()
}
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_system_reset.Rd => man/drill_system_reset.Rd +3 -0
@@ 22,4 22,7 @@ Changes (optionally, all) system settings back to system defaults
drill_system_reset(all=TRUE)
}
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_threads.Rd => man/drill_threads.Rd +3 -0
@@ 13,6 13,9 @@ drill_threads(drill_server = Sys.getenv("DRILL_URL", unset =
\description{
Get information about threads
}
\note{
The output of this is in a "viewer" window
}
\examples{
\dontrun{
drill_threads()

M man/drill_uplift.Rd => man/drill_uplift.Rd +3 -0
@@ 18,4 18,7 @@ type-convert it.
Not really intended to be called directly, but useful if you ran \code{drill_query()}
without `uplift=TRUE` but want to then convert the structure.
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_use.Rd => man/drill_use.Rd +3 -0
@@ 16,4 16,7 @@ storage plugin, such as hive, or a storage plugin and workspace.}
\description{
Change to a particular schema.
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/drill_version.Rd => man/drill_version.Rd +3 -0
@@ 18,4 18,7 @@ Identify the version of Drill running
drill_version()
}
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


M man/sergeant.Rd => man/sergeant.Rd +16 -2
@@ 4,11 4,25 @@
\name{sergeant}
\alias{sergeant}
\alias{sergeant-package}
\title{Tools to Transform and Query Data with the 'Apache' 'Drill' 'REST' 'API'}
\title{Tools to Transform and Query Data with the the 'Apache Drill' 'REST API'}
\description{
Tools to Transform and Query Data with the 'Apache' 'Drill' 'REST' 'API'
Drill is an innovative distributed SQL engine designed to enable data exploration
and analytics on non-relational datastores. Users can query the data using standard
SQL and BI tools without having to create and manage schemas. Some of the key features
are:
}
\details{
\itemize{
  \item{Schema-free JSON document model similar to MongoDB and Elasticsearch}
  \item{Industry-standard APIs: ANSI SQL, ODBC/JDBC, RESTful APIs}
  \item{Extremely user and developer friendly}
  \item{Pluggable architecture enables connectivity to multiple datastores}
}
}
\author{
Bob Rudis (bob@rud.is)
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
}


A sergeant.png => sergeant.png +0 -0