~hrbrmstr/sergeant

a811b8734e794d0337764c1d2afdba1a8397287d — Bob Rudis 4 years ago e77e311
a few new functions
M DESCRIPTION => DESCRIPTION +8 -4
@@ 1,16 1,20 @@
Package: sergeant
Title: Tools to Transform and Query Data with the 'Apache' 'Drill' 'API'
Version: 0.1.0.9000
Authors@R: c(person("Bob", "Rudis", email = "bob@rudis.net", role = c("aut", "cre")))
Authors@R: c(person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre")))
Description: Tools to Transform and Query Data with the 'Apache' 'Drill' 'API'.
Depends:
    R (>= 3.0.0)
License: AGPL + file LICENSE
Encoding: UTF-8
LazyData: true
Suggests:
    testthat,
Imports:
    httr,
    jsonlite,
    htmltools
    htmltools,
    dplyr,
    readr,
    purrr
Suggests:
    testthat
RoxygenNote: 5.0.1

M NAMESPACE => NAMESPACE +19 -0
@@ 6,10 6,29 @@ export(drill_options)
export(drill_profile)
export(drill_profiles)
export(drill_query)
export(drill_set)
export(drill_setting_reset)
export(drill_show_files)
export(drill_show_schemas)
export(drill_stats)
export(drill_status)
export(drill_storage)
export(drill_system_reset)
export(drill_threads)
export(drill_uplift)
export(drill_use)
export(drill_version)
import(htmltools)
import(httr)
import(jsonlite)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(dplyr,data_frame)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(dplyr,tbl)
importFrom(purrr,"%>%")
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map2_df)

M R/query.r => R/query.r +23 -2
@@ 1,15 1,36 @@
#' Submit a query and return results
#'
#' @param query query to run
#' @param uplift automatically run `drill_uplift()` on the result?
#' @param drill_server base URL of the \code{drill} server
#' @export
drill_query <- function(query, drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {
drill_query <- function(query, uplift=FALSE, drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {

  res <- httr::POST(sprintf("%s/query.json", drill_server),
                    encode="json",
                    body=list(queryType="SQL",
                              query=query))

  jsonlite::fromJSON(httr::content(res, as="text", encoding="UTF-8"), flatten=TRUE)
  out <- jsonlite::fromJSON(httr::content(res, as="text", encoding="UTF-8"), flatten=TRUE)

  if ("errorMessage" %in% names(out)) {
    message(sprintf("Query ==> %s\n%s\n", gsub("[\r\n]", " ", query), out$errorMessage))
    invisible(out)
  } else {
    if (uplift) out <- drill_uplift(out)
    out
  }

}

#' Turn a columnar query results into a type-converted tbl
#'
#' If you know the result of `drill_query()` will be a data frame, then
#' you can pipe it to this function to pull out `rows` and automatically
#' type-convert it.
#'
#' @param query_result the result of a call to `drill_query()`
#' @export
drill_uplift <- function(query_result) {
  dplyr::tbl_df(readr::type_convert(query_result$rows))
}

A R/schemas.R => R/schemas.R +43 -0
@@ 0,0 1,43 @@
#' Returns a list of available schemas.
#'
#' @param drill_server base URL of the \code{drill} server
#' @export
drill_show_schemas <- function(drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {
  drill_query("SHOW SCHEMAS", drill_server=drill_server)$rows$SCHEMA_NAME
}

#' Change to a particular schema.
#'
#' @param schema_name A unique name for a Drill schema. A schema in Drill is a configured
#'                   storage plugin, such as hive, or a storage plugin and workspace.
#' @param drill_server base URL of the \code{drill} server
#' @export
drill_use <- function(schema_name, drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {
  query <- sprintf("USE `%s`", schema_name)
  out <- drill_query(query, drill_server=drill_server)
  if (!("errorMessage" %in% names(out))) message(out$rows$summary[1])
  invisible(out)
}

#' Identify the version of Drill running
#'
#' @param drill_server base URL of the \code{drill} server
#' @export
drill_version <- function(drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {
  drill_query("SELECT version FROM sys.version", drill_server=drill_server)$rows$version[1]
}

#' Show files in a file system schema.
#'
#' @param schema_spec properly quoted "filesystem.directory_name" reference path
#' @param drill_server base URL of the \code{drill} server
#' @export
#' @examples \dontrun{
#' drill_show_files("dfs.tmp")
#' drill_show_files("dfs.tmp")
#' }
drill_show_files <- function(schema_spec, drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {
  query <- sprintf("SHOW FILES IN %s", schema_spec)
  drill_query(query, uplift=TRUE, drill_server=drill_server) %>%
    dplyr::select(name, isDirectory, permissions, everything())
}

M R/sergeant-package.r => R/sergeant-package.r +3 -1
@@ 1,7 1,9 @@
#' Tools to Transform and Query Data with the 'Apache' 'Drill' 'API'
#' Tools to Transform and Query Data with the 'Apache' 'Drill' 'REST' 'API'
#'
#' @name sergeant
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import httr jsonlite htmltools
#' @importFrom purrr map map2 map2_df %>%
#' @importFrom dplyr mutate select left_join bind_cols bind_rows data_frame tbl
NULL

A R/set.R => R/set.R +123 -0
@@ 0,0 1,123 @@
#' Set Drill SYSTEM or SESSION options
#'
#' Helper function to make it more R-like to set Drill SESSION or SYSTEM optons. It
#' handles the conversion of R types (like \code{TRUE}) to SQL types and automatically
#' quotes parameter values (when necessary).
#'
#' If any query errors result, error messages will be presented to the console.
#'
#' @param ... named parameters to be sent to \code{ALTER [SYSTEM|SESSION]}
#' @param type set the \code{session} or \code{system} parameter
#' @param drill_server base URL of the \code{drill} server
#' @return a \code{tbl} (invisibly) with the \code{ALTER} queries sent and results, including errors.
#' @export
#' @examples \dontrun{
#' drill_set(exec.errors.verbose=TRUE, store.format="parquet", web.logs.max_lines=20000)
#' }
drill_set <- function(..., type=c("session", "system"),
                      drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {

  type <- toupper(match.arg(tolower(type), choices=c("session", "system")))

  as.list(substitute(list(...)))[-1L] %>%
    purrr::map(jsonlite::toJSON, auto_unbox=TRUE) %>%
    purrr::map(~gsub('^"|"$', "'", .)) -> params

  purrr::map2(names(params), params, ~sprintf("ALTER %s SET `%s` = %s", type, .x, .y)) %>%
    purrr::map_df(function(x) {
    y <- drill_query(x, drill_server=drill_server)
    if (length(y) == 2) {
      dplyr::data_frame(query=x, param=y[[2]]$summary, value=y[[2]]$ok, error=NA)
    } else {
      dplyr::data_frame(query=x, param=NA, value=NA, error=y[[1]])
    }
  }) -> res

  if (sum(!is.na(res$error))>0) {

    dplyr::filter(res, !is.na(error)) %>%
      dplyr::mutate(msg=sprintf("QUERY => %s\n%s\n", query, error)) -> msgs

    msgs <- paste0(msgs$msg, collapse="\n")

    message(sprintf("%d errors:\n\n%s", sum(!is.na(res$error)), msgs))

  }

  invisible(res)

}

#' Changes (optionally, all) system settings back to system defaults
#'
#' @param ... bare name of system options to reset
#' @param all if \code{TRUE}, all parameters are reset (\code{...} is ignored)
#' @param drill_server base URL of the \code{drill} server
#' @export
drill_system_reset <- function(..., all=FALSE,
                               drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {

  if (all) return(invisible(drill_query("ALTER SYSTEM RESET ALL", drill_server=drill_server)))

  as.list(substitute(list(...)))[-1L] %>%
  purrr::map(params, ~sprintf("ALTER SYSTEM RESET `%s`", .)) %>%
    purrr::map_df(function(x) {
    y <- drill_query(x, drill_server=drill_server)
    if (length(y) == 2) {
      dplyr::data_frame(query=x, param=y[[2]]$summary, value=y[[2]]$ok, error=NA)
    } else {
      dplyr::data_frame(query=x, param=NA, value=NA, error=y[[1]])
    }
  }) -> res

  if (sum(!is.na(res$error))>0) {

    dplyr::filter(res, !is.na(error)) %>%
      dplyr::mutate(msg=sprintf("QUERY => %s\n%s\n", query, error)) -> msgs

    msgs <- paste0(msgs$msg, collapse="\n")

    message(sprintf("%d errors:\n\n%s", sum(!is.na(res$error)), msgs))

  }

  invisible(res)

}


#' Changes (optionally, all) session settings back to system defaults
#'
#' @param ... bare name of system options to reset
#' @param drill_server base URL of the \code{drill} server
#' @export
drill_setting_reset <- function(...,
                               drill_server=Sys.getenv("DRILL_URL", unset="http://localhost:8047")) {


  as.list(substitute(list(...)))[-1L] %>%
  purrr::map(params, ~sprintf("ALTER SESSION RESET `%s`", .)) %>%
    purrr::map_df(function(x) {
    y <- drill_query(x, drill_server=drill_server)
    if (length(y) == 2) {
      dplyr::data_frame(query=x, param=y[[2]]$summary, value=y[[2]]$ok, error=NA)
    } else {
      dplyr::data_frame(query=x, param=NA, value=NA, error=y[[1]])
    }
  }) -> res

  if (sum(!is.na(res$error))>0) {

    dplyr::filter(res, !is.na(error)) %>%
      dplyr::mutate(msg=sprintf("QUERY => %s\n%s\n", query, error)) -> msgs

    msgs <- paste0(msgs$msg, collapse="\n")

    message(sprintf("%d errors:\n\n%s", sum(!is.na(res$error)), msgs))

  }

  invisible(res)

}


M README.Rmd => README.Rmd +10 -5
@@ 22,18 22,24 @@ knitr::opts_chunk$set(

The following functions are implemented:

- `drill_cancel`:	Cancel the query that has the given queryid.
- `drill_metrics`:	Get the current memory metrics
- `drill_options`:	List the name, default, and data type of the system and session options
- `drill_profile`:	Get the profile of the query that has the given queryid.
- `drill_profiles`:	Get the profiles of running and completed queries
- `drill_query`:	Submit a query and return results
- `drill_set`:	Set Drill SYSTEM or SESSION options
- `drill_setting_reset`:	Changes (optionally, all) session settings back to system defaults
- `drill_show_files`:	Show files in a file system schema.
- `drill_show_schemas`:	Returns a list of available schemas.
- `drill_stats`:	Get Drillbit information, such as ports numbers
- `drill_status`:	Get the status of Drill
- `drill_storage`:	Get the list of storage plugin names and configurations
- `drill_system_reset`:	Changes (optionally, all) system settings back to system defaults
- `drill_threads`:	Get information about threads

### News

- Version 0.1.0 released
- `drill_uplift`:	Turn a columnar query results into a type-converted tbl
- `drill_use`:	Change to a particular schema.
- `drill_version`:	Identify the version of Drill running

### Installation



@@ 53,7 59,6 @@ library(sergeant)
# current verison
packageVersion("sergeant")

drill_query("SELECT * FROM dfs.`/usr/local/drill/sample-data/nation.parquet`")
```

### Test Results

M README.md => README.md +11 -37
@@ 10,18 10,24 @@

The following functions are implemented:

-   `drill_cancel`: Cancel the query that has the given queryid.
-   `drill_metrics`: Get the current memory metrics
-   `drill_options`: List the name, default, and data type of the system and session options
-   `drill_profile`: Get the profile of the query that has the given queryid.
-   `drill_profiles`: Get the profiles of running and completed queries
-   `drill_query`: Submit a query and return results
-   `drill_set`: Set Drill SYSTEM or SESSION options
-   `drill_setting_reset`: Changes (optionally, all) session settings back to system defaults
-   `drill_show_files`: Show files in a file system schema.
-   `drill_show_schemas`: Returns a list of available schemas.
-   `drill_stats`: Get Drillbit information, such as ports numbers
-   `drill_status`: Get the status of Drill
-   `drill_storage`: Get the list of storage plugin names and configurations
-   `drill_system_reset`: Changes (optionally, all) system settings back to system defaults
-   `drill_threads`: Get information about threads

### News

-   Version 0.1.0 released
-   `drill_uplift`: Turn a columnar query results into a type-converted tbl
-   `drill_use`: Change to a particular schema.
-   `drill_version`: Identify the version of Drill running

### Installation



@@ 37,38 43,6 @@ library(sergeant)
# current verison
packageVersion("sergeant")
#> [1] '0.1.0.9000'

drill_query("SELECT * FROM dfs.`/usr/local/drill/sample-data/nation.parquet`")
#> $columns
#> [1] "N_NATIONKEY" "N_NAME"      "N_REGIONKEY" "N_COMMENT"  
#> 
#> $rows
#>               N_COMMENT         N_NAME N_NATIONKEY N_REGIONKEY
#> 1   haggle. carefully f        ALGERIA           0           0
#> 2  al foxes promise sly      ARGENTINA           1           1
#> 3  y alongside of the p         BRAZIL           2           1
#> 4  eas hang ironic, sil         CANADA           3           1
#> 5  y above the carefull          EGYPT           4           4
#> 6  ven packages wake qu       ETHIOPIA           5           0
#> 7  refully final reques         FRANCE           6           3
#> 8  l platelets. regular        GERMANY           7           3
#> 9  ss excuses cajole sl          INDIA           8           2
#> 10  slyly express asymp      INDONESIA           9           2
#> 11 efully alongside of            IRAN          10           4
#> 12 nic deposits boost a           IRAQ          11           4
#> 13 ously. final, expres          JAPAN          12           2
#> 14 ic deposits are blit         JORDAN          13           4
#> 15  pending excuses hag          KENYA          14           0
#> 16 rns. blithely bold c        MOROCCO          15           0
#> 17 s. ironic, unusual a     MOZAMBIQUE          16           0
#> 18 platelets. blithely            PERU          17           1
#> 19 c dependencies. furi          CHINA          18           2
#> 20 ular asymptotes are         ROMANIA          19           3
#> 21 ts. silent requests    SAUDI ARABIA          20           4
#> 22 hely enticingly expr        VIETNAM          21           2
#> 23  requests against th         RUSSIA          22           3
#> 24 eans boost carefully UNITED KINGDOM          23           3
#> 25 y final packages. sl  UNITED STATES          24           1
```

### Test Results


@@ 78,7 52,7 @@ library(sergeant)
library(testthat)

date()
#> [1] "Thu Jun  2 22:04:19 2016"
#> [1] "Sat Dec  3 11:28:51 2016"

test_dir("tests/")
#> testthat results ========================================================================================================

M man/drill_query.Rd => man/drill_query.Rd +4 -2
@@ 4,12 4,14 @@
\alias{drill_query}
\title{Submit a query and return results}
\usage{
drill_query(query, drill_server = Sys.getenv("DRILL_URL", unset =
  "http://localhost:8047"))
drill_query(query, uplift = FALSE, drill_server = Sys.getenv("DRILL_URL",
  unset = "http://localhost:8047"))
}
\arguments{
\item{query}{query to run}

\item{uplift}{automatically run `drill_uplift()` on the result?}

\item{drill_server}{base URL of the \code{drill} server}
}
\description{

A man/drill_set.Rd => man/drill_set.Rd +33 -0
@@ 0,0 1,33 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/set.R
\name{drill_set}
\alias{drill_set}
\title{Set Drill SYSTEM or SESSION options}
\usage{
drill_set(..., type = c("session", "system"),
  drill_server = Sys.getenv("DRILL_URL", unset = "http://localhost:8047"))
}
\arguments{
\item{...}{named parameters to be sent to \code{ALTER [SYSTEM|SESSION]}}

\item{type}{set the \code{session} or \code{system} parameter}

\item{drill_server}{base URL of the \code{drill} server}
}
\value{
a \code{tbl} (invisibly) with the \code{ALTER} queries sent and results, including errors.
}
\description{
Helper function to make it more R-like to set Drill SESSION or SYSTEM optons. It
handles the conversion of R types (like \code{TRUE}) to SQL types and automatically
quotes parameter values (when necessary).
}
\details{
If any query errors result, error messages will be presented to the console.
}
\examples{
\dontrun{
drill_set(exec.errors.verbose=TRUE, store.format="parquet", web.logs.max_lines=20000)
}
}


A man/drill_setting_reset.Rd => man/drill_setting_reset.Rd +18 -0
@@ 0,0 1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/set.R
\name{drill_setting_reset}
\alias{drill_setting_reset}
\title{Changes (optionally, all) session settings back to system defaults}
\usage{
drill_setting_reset(..., drill_server = Sys.getenv("DRILL_URL", unset =
  "http://localhost:8047"))
}
\arguments{
\item{...}{bare name of system options to reset}

\item{drill_server}{base URL of the \code{drill} server}
}
\description{
Changes (optionally, all) session settings back to system defaults
}


A man/drill_show_files.Rd => man/drill_show_files.Rd +24 -0
@@ 0,0 1,24 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/schemas.R
\name{drill_show_files}
\alias{drill_show_files}
\title{Show files in a file system schema.}
\usage{
drill_show_files(schema_spec, drill_server = Sys.getenv("DRILL_URL", unset =
  "http://localhost:8047"))
}
\arguments{
\item{schema_spec}{properly quoted "filesystem.directory_name" reference path}

\item{drill_server}{base URL of the \code{drill} server}
}
\description{
Show files in a file system schema.
}
\examples{
\dontrun{
drill_show_files("dfs.tmp")
drill_show_files("dfs.tmp")
}
}


A man/drill_show_schemas.Rd => man/drill_show_schemas.Rd +16 -0
@@ 0,0 1,16 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/schemas.R
\name{drill_show_schemas}
\alias{drill_show_schemas}
\title{Returns a list of available schemas.}
\usage{
drill_show_schemas(drill_server = Sys.getenv("DRILL_URL", unset =
  "http://localhost:8047"))
}
\arguments{
\item{drill_server}{base URL of the \code{drill} server}
}
\description{
Returns a list of available schemas.
}


A man/drill_system_reset.Rd => man/drill_system_reset.Rd +20 -0
@@ 0,0 1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/set.R
\name{drill_system_reset}
\alias{drill_system_reset}
\title{Changes (optionally, all) system settings back to system defaults}
\usage{
drill_system_reset(..., all = FALSE, drill_server = Sys.getenv("DRILL_URL",
  unset = "http://localhost:8047"))
}
\arguments{
\item{...}{bare name of system options to reset}

\item{all}{if \code{TRUE}, all parameters are reset (\code{...} is ignored)}

\item{drill_server}{base URL of the \code{drill} server}
}
\description{
Changes (optionally, all) system settings back to system defaults
}


A man/drill_uplift.Rd => man/drill_uplift.Rd +17 -0
@@ 0,0 1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/query.r
\name{drill_uplift}
\alias{drill_uplift}
\title{Turn a columnar query results into a type-converted tbl}
\usage{
drill_uplift(query_result)
}
\arguments{
\item{query_result}{the result of a call to `drill_query()`}
}
\description{
If you know the result of `drill_query()` will be a data frame, then
you can pipe it to this function to pull out `rows` and automatically
type-convert it.
}


A man/drill_use.Rd => man/drill_use.Rd +19 -0
@@ 0,0 1,19 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/schemas.R
\name{drill_use}
\alias{drill_use}
\title{Change to a particular schema.}
\usage{
drill_use(schema_name, drill_server = Sys.getenv("DRILL_URL", unset =
  "http://localhost:8047"))
}
\arguments{
\item{schema_name}{A unique name for a Drill schema. A schema in Drill is a configured
storage plugin, such as hive, or a storage plugin and workspace.}

\item{drill_server}{base URL of the \code{drill} server}
}
\description{
Change to a particular schema.
}


A man/drill_version.Rd => man/drill_version.Rd +16 -0
@@ 0,0 1,16 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/schemas.R
\name{drill_version}
\alias{drill_version}
\title{Identify the version of Drill running}
\usage{
drill_version(drill_server = Sys.getenv("DRILL_URL", unset =
  "http://localhost:8047"))
}
\arguments{
\item{drill_server}{base URL of the \code{drill} server}
}
\description{
Identify the version of Drill running
}


M man/sergeant.Rd => man/sergeant.Rd +2 -2
@@ 4,9 4,9 @@
\name{sergeant}
\alias{sergeant}
\alias{sergeant-package}
\title{Tools to Transform and Query Data with the 'Apache' 'Drill' 'API'}
\title{Tools to Transform and Query Data with the 'Apache' 'Drill' 'REST' 'API'}
\description{
Tools to Transform and Query Data with the 'Apache' 'Drill' 'API'
Tools to Transform and Query Data with the 'Apache' 'Drill' 'REST' 'API'
}
\author{
Bob Rudis (bob@rud.is)