M DESCRIPTION => DESCRIPTION +0 -2
@@ 29,12 29,10 @@ Imports:
readr (>= 1.1.1),
purrr (>= 0.2.2),
scales (>= 0.4.1),
- RJDBC (>= 0.2-5),
utils,
methods,
magrittr
Suggests:
- rJava (>= 0.9-8),
testthat (>= 1.0.2),
covr (>= 3.0.0)
RoxygenNote: 6.0.1.9000
M NAMESPACE => NAMESPACE +1 -16
@@ 2,26 2,18 @@
S3method(copy_to,src_drill)
S3method(db_data_type,DrillConnection)
-S3method(db_data_type,DrillJDBCConnection)
-S3method(db_data_type,tbl_drill_jdbc)
S3method(db_desc,src_drill)
S3method(db_explain,DrillConnection)
S3method(db_query_fields,DrillConnection)
S3method(sql_escape_ident,DrillConnection)
-S3method(sql_escape_ident,DrillJDBCConnection)
S3method(sql_translate_env,DrillConnection)
-S3method(sql_translate_env,DrillJDBCConnection)
-S3method(src_tbls,src_dbi)
S3method(src_tbls,src_drill)
S3method(tbl,src_drill)
-S3method(tbl,src_drill_jdbc)
export("%>%")
export(Drill)
-export(DrillJDBC)
export(drill_active)
export(drill_cancel)
export(drill_connection)
-export(drill_jdbc)
export(drill_metrics)
export(drill_mod_storage)
export(drill_options)
@@ 42,12 34,9 @@ export(drill_uplift)
export(drill_use)
export(drill_version)
export(src_drill)
-export(src_drill_jdbc)
+export(tbl)
exportClasses(DrillConnection)
exportClasses(DrillDriver)
-exportClasses(DrillJDBCConnection)
-exportClasses(DrillJDBCDriver)
-exportClasses(DrillJDBCResult)
exportClasses(DrillResult)
exportMethods(dbClearResult)
exportMethods(dbConnect)
@@ 58,7 47,6 @@ exportMethods(dbGetStatement)
exportMethods(dbHasCompleted)
exportMethods(dbIsValid)
exportMethods(dbListFields)
-exportMethods(dbSendQuery)
exportMethods(dbUnloadDriver)
import(DBI)
import(htmltools)
@@ 66,9 54,6 @@ import(httr)
import(jsonlite)
import(methods)
import(utils)
-importClassesFrom(RJDBC,JDBCConnection)
-importClassesFrom(RJDBC,JDBCDriver)
-importClassesFrom(RJDBC,JDBCResult)
importFrom(dbplyr,base_agg)
importFrom(dbplyr,base_scalar)
importFrom(dbplyr,base_win)
M NEWS.md => NEWS.md +9 -0
@@ 1,3 1,12 @@
+# sergeant 0.7.0-BETA
+
+- separated out Java-based functions into a sibling package
+
+# sergeant 0.7.0-ALPHA
+
+- enhanced JDBC connectivity
+- updated REST API to handle new features in Drill 1.14.0
+
# sergeant 0.6.0
- Authentication support for DBI/dplyr and `drill_connection()` pure REST interface
M R/dplyr.r => R/dplyr.r +3 -2
@@ 13,7 13,8 @@
#' @param username,password if not `NULL` the credentials for the Drill service.
#' @note This is a DBI wrapper around the Drill REST API.
#' @export
-#' @examples \dontrun{
+#' @examples
+#' try({
#' db <- src_drill("localhost", 8047L)
#'
#' print(db)
@@ 57,7 58,7 @@
#' ## 9 2 Darren Stanz 12 Darren Stanz******** 5 D*rr*n St*nz
#' ## 10 4 Jonathan Murraiin 17 Jonathan Murraiin*** 0 J*n*th*n M*rr***n
#' ## # ... with more rows, and 3 more variables: rpd <chr>, rnd <dbl>, first_three <chr>
-#' }
+#' }, silent=TRUE)
src_drill <- function(host = Sys.getenv("DRILL_HOST", "localhost"),
port = as.integer(Sys.getenv("DRILL_PORT", 8047L)),
ssl = FALSE, username = NULL, password = NULL) {
D R/jdbc.r => R/jdbc.r +0 -425
@@ 1,425 0,0 @@
-.fillStatementParameters <- function(s, l) {
- for (i in 1:length(l)) {
- v <- l[[i]]
- if (is.na(v)) { # map NAs to NULLs (courtesy of Axel Klenk)
- sqlType <- if (is.integer(v)) 4 else if (is.numeric(v)) 8 else 12
- rJava::.jcall(s, "V", "setNull", i, as.integer(sqlType))
- } else if (is.integer(v))
- rJava::.jcall(s, "V", "setInt", i, v[1])
- else if (is.numeric(v))
- rJava::.jcall(s, "V", "setDouble", i, as.double(v)[1])
- else
- rJava::.jcall(s, "V", "setString", i, as.character(v)[1])
- }
-}
-
-#' JDBC Driver for Drill database.
-#'
-#' @keywords internal
-#' @export
-setClass(
- Class = "DrillJDBCDriver",
- contains = "JDBCDriver"
-)
-
-#' Drill JDBC connection class.
-#'
-#' @export
-#' @keywords internal
-#' @export
-setClass(
- Class = "DrillJDBCConnection",
- contains = "JDBCConnection"
-)
-
-#' Connect to Drill JDBC with your own connection string
-#'
-#' You should really use [drill_jdbc()] as it handles some cruft for
-#' you, but you can specify the full JDBC connection string
-#'
-#' @md
-#' @family Drill JDBC API
-#' @param drv what you get back from [DrillJDBC()]
-#' @param url your Drill connection strinfg
-#' @param user,password username & password (leave as-is for no-auth)
-#' @param ... additional `name=val` properties which will be set with Java's
-#' `SetProperty` method.
-#' @export
-setMethod(
- f = "dbConnect",
- signature = "DrillJDBCDriver",
- definition = function(drv, url, user='', password='', ...) {
-
- rJava::.jcall(
- "java/sql/DriverManager",
- "Ljava/sql/Connection;",
- "getConnection",
- as.character(url)[1],
- as.character(user)[1],
- as.character(password)[1],
- check = FALSE
- ) -> jc
-
- if (rJava::is.jnull(jc) && !rJava::is.jnull(drv@jdrv)) {
- # ok one reason for this to fail is its interaction with rJava's
- # class loader. In that case we try to load the driver directly.
- oex <- rJava::.jgetEx(TRUE)
-
- p <- rJava::.jnew("java/util/Properties")
-
- if (length(user)==1 && nchar(user)) {
- rJava::.jcall(p,"Ljava/lang/Object;","setProperty","user",user)
- }
-
- if (length(password)==1 && nchar(password)) {
- rJava::.jcall(p,"Ljava/lang/Object;","setProperty","password",password)
- }
-
- l <- list(...)
- if (length(names(l))) for (n in names(l)) {
- rJava::.jcall(p, "Ljava/lang/Object;", "setProperty", n, as.character(l[[n]]))
- }
-
- jc <- rJava::.jcall(drv@jdrv, "Ljava/sql/Connection;", "connect", as.character(url)[1], p)
-
- }
-
- .verify.JDBC.result(jc, "Unable to connect JDBC to ",url)
-
- new("DrillJDBCConnection", jc=jc, identifier.quote=drv@identifier.quote)
-
- },
-
- valueClass = "DrillJDBCConnection"
-
-)
-
-#' Drill JDBC dbDataType
-#'
-#' @param dbObj A \code{\linkS4class{DrillJDBCDriver}} object
-#' @param obj Any R object
-#' @param ... Extra optional parameters
-#' @family Drill JDBC API
-#' @export
-setMethod(
- "dbDataType",
- "DrillJDBCConnection",
- function(dbObj, obj, ...) {
- if (is.integer(obj)) "INTEGER"
- else if (inherits(obj, "Date")) "DATE"
- else if (identical(class(obj), "times")) "TIME"
- else if (inherits(obj, "POSIXct")) "TIMESTAMP"
- else if (is.numeric(obj)) "DOUBLE"
- else "VARCHAR(255)"
- },
- valueClass = "character"
-)
-
-
-#' Drill's JDBC driver main class loader
-#'
-#' @family Drill JDBC API
-#' @export
-DrillJDBC <- function() {
-
- driverClass <- "org.apache.drill.jdbc.Driver"
-
- ## expand all paths in the classPath
- classPath <- path.expand(unlist(strsplit(Sys.getenv("DRILL_JDBC_JAR"), .Platform$path.sep)))
-
- ## this is benign in that it's equivalent to rJava::.jaddClassPath if a JVM is running
- rJava::.jinit(classPath)
-
- rJava::.jaddClassPath(system.file("java", "RJDBC.jar", package="RJDBC"))
- rJava::.jaddClassPath(system.file("java", "slf4j-nop-1.7.25.jar", package = "sergeant"))
-
- if (nchar(driverClass) && rJava::is.jnull(rJava::.jfindClass(as.character(driverClass)[1]))) {
- stop("Cannot find JDBC driver class ",driverClass)
- }
-
- jdrv <- rJava::.jnew(driverClass, check=FALSE)
-
- rJava::.jcheck(TRUE)
-
- if (rJava::is.jnull(jdrv)) jdrv <- rJava::.jnull()
-
- new("DrillJDBCDriver", identifier.quote = "`", jdrv = jdrv)
-
-}
-
-#' Connect to Drill using JDBC
-#'
-#' The DRILL JDBC driver fully-qualified path must be placed in the
-#' \code{DRILL_JDBC_JAR} environment variable. This is best done via \code{~/.Renviron}
-#' for interactive work. e.g. \code{DRILL_JDBC_JAR=/usr/local/drill/jars/jdbc-driver/drill-jdbc-all-1.10.0.jar}
-#'
-#' [src_drill_jdbc()] wraps the JDBC [dbConnect()] connection instantation in
-#' [dbplyr::src_dbi()] to return the equivalent of the REST driver's [src_drill()].
-#'
-#' @param nodes character vector of nodes. If more than one node, you can either have
-#' a single string with the comma-separated node:port pairs pre-made or
-#' pass in a character vector with multiple node:port strings and the
-#' function will make a comma-separated node string for you.
-#' @param cluster_id the cluster id from \code{drill-override.conf}
-#' @param schema an optional schema name to append to the JDBC connection string
-#' @param use_zk are you connecting to a ZooKeeper instance (default: \code{TRUE}) or
-#' connecting to an individual DrillBit.
-#' @family Drill JDBC API
-#' @return a JDBC connection object
-#' @references \url{https://drill.apache.org/docs/using-the-jdbc-driver/#using-the-jdbc-url-for-a-random-drillbit-connection}
-#' @export
-#' @examples \dontrun{
-#' con <- drill_jdbc("localhost:2181", "main")
-#' drill_query(con, "SELECT * FROM cp.`employee.json`")
-#'
-#' # you can also use the connection with RJDBC calls:
-#' dbGetQuery(con, "SELECT * FROM cp.`employee.json`")
-#'
-#' # for local/embedded mode with default configuration info
-#' con <- drill_jdbc("localhost:31010", use_zk=FALSE)
-#' }
-drill_jdbc <- function(nodes = "localhost:2181", cluster_id = NULL,
- schema = NULL, use_zk = TRUE) {
-
- try_require("rJava")
- try_require("RJDBC")
-
- jar_path <- Sys.getenv("DRILL_JDBC_JAR")
- if (!file.exists(jar_path)) {
- stop(sprintf("Cannot locate DRILL JDBC JAR [%s]", jar_path))
- }
-
- drill_jdbc_drv <- DrillJDBC()
-
- conn_type <- "drillbit"
- if (use_zk) conn_type <- "zk"
-
- if (length(nodes) > 1) nodes <- paste0(nodes, collapse=",")
-
- conn_str <- sprintf("jdbc:drill:%s=%s", conn_type, nodes)
-
- if (!is.null(cluster_id)) {
- conn_str <- sprintf("%s%s", conn_str, sprintf("/drill/%s", cluster_id))
- }
-
- if (!is.null(schema)) conn_str <- sprintf("%s;%s", schema)
-
- message(sprintf("Using [%s]...", conn_str))
-
- dbConnect(drill_jdbc_drv, conn_str)
-
-}
-
-#' @rdname drill_jdbc
-#' @family Drill JDBC API
-#' @export
-src_drill_jdbc <- function(nodes = "localhost:2181", cluster_id = NULL,
- schema = NULL, use_zk = TRUE) {
-
- con <- drill_jdbc(nodes, cluster_id, schema, use_zk)
- src_sql("drill_jdbc", con)
-
-}
-
-#' @rdname drill_jdbc
-#' @param src A Drill "src" created with \code{src_drill()}
-#' @param from A Drill view or table specification
-#' @param ... Extra parameters
-#' @family Drill JDBC API
-#' @export
-tbl.src_drill_jdbc <- function(src, from, ...) {
- tbl_sql("drill_jdbc", src=src, from=from, ...)
-}
-
-#' Drill internals
-#'
-#' @rdname drill_jdbc_internals
-#' @keywords internal
-#' @export
-db_data_type.DrillJDBCConnection <- function(con, fields, ...) {
- data_type <- function(x) {
- switch(
- class(x)[1],
- logical = "BOOLEAN",
- integer = "INTEGER",
- numeric = "DOUBLE",
- factor = "CHARACTER",
- character = "CHARACTER",
- Date = "DATE",
- POSIXct = "TIMESTAMP",
- stop("Can't map type ", paste(class(x), collapse = "/"),
- " to a supported database type.")
- )
- }
- vapply(fields, data_type, character(1))
-}
-
-#' Drill internals
-#'
-#' @rdname drill_jdbc_internals
-#' @keywords internal
-#' @export
-db_data_type.tbl_drill_jdbc <- db_data_type.DrillJDBCConnection
-
-#' @rdname drill_jdbc_internals
-#' @keywords internal
-#' @export
-setClass("DrillJDBCResult", representation("JDBCResult", jr="jobjRef", md="jobjRef", stat="jobjRef", pull="jobjRef"))
-
-#' @rdname drill_jdbc_internals
-#' @keywords internal
-#' @export
-setMethod(
- f = "dbSendQuery",
- signature = signature(conn="DrillJDBCConnection", statement="character"),
- definition = function(conn, statement, ..., list=NULL) {
- statement <- as.character(statement)[1L]
- ## if the statement starts with {call or {?= call then we use CallableStatement
- if (isTRUE(as.logical(grepl("^\\{(call|\\?= *call)", statement)))) {
- s <- rJava::.jcall(conn@jc, "Ljava/sql/CallableStatement;", "prepareCall", statement, check=FALSE)
- .verify.JDBC.result(s, "Unable to execute JDBC callable statement ",statement)
- if (length(list(...))) .fillStatementParameters(s, list(...))
- if (!is.null(list)) .fillStatementParameters(s, list)
- r <- rJava::.jcall(s, "Ljava/sql/ResultSet;", "executeQuery", check=FALSE)
- .verify.JDBC.result(r, "Unable to retrieve JDBC result set for ",statement)
- } else if (length(list(...)) || length(list)) { ## use prepared statements if there are additional arguments
- s <- rJava::.jcall(conn@jc, "Ljava/sql/PreparedStatement;", "prepareStatement", statement, check=FALSE)
- .verify.JDBC.result(s, "Unable to execute JDBC prepared statement ", statement)
- if (length(list(...))) .fillStatementParameters(s, list(...))
- if (!is.null(list)) .fillStatementParameters(s, list)
- r <- rJava::.jcall(s, "Ljava/sql/ResultSet;", "executeQuery", check=FALSE)
- .verify.JDBC.result(r, "Unable to retrieve JDBC result set for ",statement)
- } else { ## otherwise use a simple statement some DBs fail with the above)
- s <- rJava::.jcall(conn@jc, "Ljava/sql/Statement;", "createStatement")
- .verify.JDBC.result(s, "Unable to create simple JDBC statement ",statement)
- r <- rJava::.jcall(s, "Ljava/sql/ResultSet;", "executeQuery", as.character(statement)[1], check=FALSE)
- .verify.JDBC.result(r, "Unable to retrieve JDBC result set for ",statement)
- }
- md <- rJava::.jcall(r, "Ljava/sql/ResultSetMetaData;", "getMetaData", check=FALSE)
- .verify.JDBC.result(md, "Unable to retrieve JDBC result set meta data for ",statement, " in dbSendQuery")
- new("DrillJDBCResult", jr=r, md=md, stat=s, pull=rJava::.jnull())
- })
-
-#' @rdname drill_jdbc_internals
-#' @keywords internal
-#' @export
-sql_escape_ident.DrillJDBCConnection <- function(con, x) {
- ifelse(grepl(con@identifier.quote, x), sql_quote(x, ' '), sql_quote(x, con@identifier.quote))
-}
-
-#' @rdname drill_jdbc_internals
-#' @keywords internal
-#' @export
-sql_translate_env.DrillJDBCConnection <- function(con) {
-
- x <- con
-
- dbplyr::sql_variant(
-
- scalar = dbplyr::sql_translator(
- .parent = dbplyr::base_scalar,
- `!=` = dbplyr::sql_infix("<>"),
- as.numeric = function(x) build_sql("CAST(", x, " AS DOUBLE)"),
- as.character = function(x) build_sql("CAST(", x, " AS CHARACTER)"),
- as.date = function(x) build_sql("CAST(", x, " AS DATE)"),
- as.posixct = function(x) build_sql("CAST(", x, " AS TIMESTAMP)"),
- as.logical = function(x) build_sql("CAST(", x, " AS BOOLEAN)"),
- date_part = function(x, y) build_sql("DATE_PART(", x, ",", y ,")"),
- grepl = function(x, y) build_sql("CONTAINS(", y, ", ", x, ")"),
- gsub = function(x, y, z) build_sql("REGEXP_REPLACE(", z, ", ", x, ",", y ,")"),
- str_replace = function(x, y, z) build_sql("REGEXP_REPLACE(", x, ", ", y, ",", z ,")"),
- trimws = function(x) build_sql("TRIM(both ' ' FROM ", x, ")"),
- cbrt = sql_prefix("CBRT", 1),
- degrees = sql_prefix("DEGREES", 1),
- e = sql_prefix("E", 0),
- row_number = sql_prefix("row_number", 0),
- lshift = sql_prefix("LSHIFT", 2),
- mod = sql_prefix("MOD", 2),
- age = sql_prefix("AGE", 1),
- negative = sql_prefix("NEGATIVE", 1),
- pi = sql_prefix("PI", 0),
- pow = sql_prefix("POW", 2),
- radians = sql_prefix("RADIANS", 1),
- rand = sql_prefix("RAND", 0),
- rshift = sql_prefix("RSHIFT", 2),
- trunc = sql_prefix("TRUNC", 2),
- contains = sql_prefix("CONTAINS", 2),
- convert_to = sql_prefix("CONVERT_TO", 2),
- convert_from = sql_prefix("CONVERT_FROM", 2),
- string_binary = sql_prefix("STRING_BINARY", 1),
- binary_string = sql_prefix("BINARY_STRING", 1),
- to_char = sql_prefix("TO_CHAR", 2),
- to_date = sql_prefix("TO_DATE", 2),
- to_number = sql_prefix("TO_NUMBER", 2),
- char_to_timestamp = sql_prefix("TO_TIMESTAMP", 2),
- double_to_timestamp = sql_prefix("TO_TIMESTAMP", 1),
- char_length = sql_prefix("CHAR_LENGTH", 1),
- flatten = sql_prefix("FLATTEN", 1),
- kvgen = sql_prefix("KVGEN", 1),
- repeated_count = sql_prefix("REPEATED_COUNT", 1),
- repeated_contains = sql_prefix("REPEATED_CONTAINS", 2),
- ilike = sql_prefix("ILIKE", 2),
- init_cap = sql_prefix("INIT_CAP", 1),
- length = sql_prefix("LENGTH", 1),
- lower = sql_prefix("LOWER", 1),
- str_to_lower = sql_prefix("LOWER", 1),
- tolower = sql_prefix("LOWER", 1),
- ltrim = sql_prefix("LTRIM", 2),
- nullif = sql_prefix("NULLIF", 2),
- position = function(x, y) build_sql("POSITION(", x, " IN ", y, ")"),
- regexp_replace = sql_prefix("REGEXP_REPLACE", 3),
- rtrim = sql_prefix("RTRIM", 2),
- rpad = sql_prefix("RPAD", 2),
- rpad_with = sql_prefix("RPAD", 3),
- lpad = sql_prefix("LPAD", 2),
- lpad_with = sql_prefix("LPAD", 3),
- strpos = sql_prefix("STRPOS", 2),
- substr = sql_prefix("SUBSTR", 3),
- str_sub = sql_prefix("SUBSTR", 3),
- trim = function(x, y, z) build_sql("TRIM(", x, " ", y, " FROM ", z, ")"),
- upper = sql_prefix("UPPER", 1),
- str_to_upper = sql_prefix("UPPER", 1),
- toupper = sql_prefix("UPPER", 1)
- ),
-
- aggregate = dbplyr::sql_translator(
- .parent = dbplyr::base_agg,
- n = function() dbplyr::sql("COUNT(*)"),
- cor = dbplyr::sql_prefix("CORR"),
- cov = dbplyr::sql_prefix("COVAR_SAMP"),
- sd = dbplyr::sql_prefix("STDDEV_SAMP"),
- var = dbplyr::sql_prefix("VAR_SAMP"),
- n_distinct = function(x) {
- dbplyr::build_sql(dbplyr::sql("COUNT(DISTINCT "), x, dbplyr::sql(")"))
- }
- ),
-
- window = dbplyr::sql_translator(
- .parent = dbplyr::base_win,
- n = function() { dbplyr::win_over(dbplyr::sql("count(*)"),
- partition = dbplyr::win_current_group()) },
- cor = dbplyr::win_recycled("corr"),
- cov = dbplyr::win_recycled("covar_samp"),
- sd = dbplyr::win_recycled("stddev_samp"),
- var = dbplyr::win_recycled("var_samp"),
- all = dbplyr::win_recycled("bool_and"),
- any = dbplyr::win_recycled("bool_or")
- )
-
- )
-
-}
-
-#' src tbls
-#'
-#' "SHOW DATABASES"
-#'
-#' @rdname drill_jdbc_internals
-#' @family Drill JDBC API
-#' @keywords internal
-#' @param x x
-#' @export
-src_tbls.src_dbi <- function(x) {
- tmp <- dbGetQuery(x$con, "SHOW DATABASES")
- paste0(unlist(tmp$SCHEMA_NAME, use.names=FALSE), collapse=", ")
-}
M R/query.r => R/query.r +5 -3
@@ 17,10 17,11 @@
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @family Dill direct REST API Interface
#' @export
-#' @examples \dontrun{
+#' @examples
+#' try({
#' drill_connection() %>%
#' drill_query("SELECT * FROM cp.`employee.json` limit 5")
-#' }
+#' }, silent=TRUE)
drill_query <- function(drill_con, query, uplift=TRUE, .progress=interactive()) {
query <- trimws(query)
@@ 30,8 31,9 @@ drill_query <- function(drill_con, query, uplift=TRUE, .progress=interactive())
try_require("rJava")
try_require("RJDBC")
+ try_require("sergeant.caffeinated")
- dplyr::tbl_df(RJDBC::dbGetQuery(drill_con, query))
+ dplyr::tbl_df(dbGetQuery(drill_con, query))
} else {
M R/schemas.R => R/schemas.R +3 -2
@@ 30,9 30,10 @@ drill_use <- function(drill_con, schema_name) {
#' @export
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}
#' @family Dill direct REST API Interface
-#' @examples \dontrun{
+#' @examples
+#' try({
#' drill_connection() %>% drill_show_files("dfs.tmp")
-#' }
+#' }, silent=TRUE)
drill_show_files <- function(drill_con, schema_spec) {
query <- sprintf("SHOW FILES IN %s", schema_spec)
drill_query(drill_con, query, uplift=TRUE) %>%
M R/sergeant-package.r => R/sergeant-package.r +6 -2
@@ 23,7 23,7 @@
#' network or between nodes. Drill uses ZooKeeper to maintain cluster membership and health
#' check information.
#'
-#' Methods are provided to work with Drill via the native JDBC & REST APIs along with R
+#' Methods are provided to work with Drill via the REST APIs along with R
#' \code{DBI} and \code{dplyr} interfaces.
#'
#' @name sergeant
@@ 40,7 40,6 @@
#' @import utils
#' @import DBI methods
#' @importFrom scales comma
-#' @importClassesFrom RJDBC JDBCDriver JDBCConnection JDBCResult
NULL
@@ 57,3 56,8 @@ NULL
#' @export
#' @rdname sergeant-exports
NULL
+
+#' @name tbl
+#' @export
+#' @rdname sergeant-exports
+NULL
M R/sergeant.r => R/sergeant.r +3 -2
@@ 41,9 41,10 @@ drill_connection <- function(host=Sys.getenv("DRILL_HOST", "localhost"),
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' @export
#' @family Dill direct REST API Interface
-#' @examples \dontrun{
+#' @examples
+#' try({
#' drill_connection() %>% drill_active()
-#' }
+#' }, silent=TRUE)
drill_active <- function(drill_con) {
drill_server <- make_server(drill_con)
!is.null(s_head(drill_server, httr::timeout(2))$result)
M R/utils.r => R/utils.r +0 -11
@@ 1,14 1,3 @@
-.verify.JDBC.result <- function (result, ...) {
- if (rJava::is.jnull(result)) {
- x <- rJava::.jgetEx(TRUE)
- if (rJava::is.jnull(x))
- stop(...)
- else
- stop(...," (",rJava::.jcall(x, "S", "getMessage"),")")
- }
-}
-
-
try_require <- function(package, fun) {
if (requireNamespace(package, quietly = TRUE)) {
library(package, character.only = TRUE)
D R/zzz.R => R/zzz.R +0 -3
@@ 1,3 0,0 @@
-# .onLoad <- function(libname, pkgname) {
-# if (requireNamespace("rJava")) rJava::.jpackage(pkgname, lib.loc = libname)
-# }
M README.Rmd => README.Rmd +23 -120
@@ 1,12 1,14 @@
---
output: github_document
+editor_options:
+ chunk_output_type: console
---
<!-- README.md is generated from README.Rmd. Please edit that file -->
```{r, echo = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
- comment = "#>",
+ comment = "##",
fig.path = "README-"
)
```
@@ 22,19 24,19 @@ Tools to Transform and Query Data with 'Apache' 'Drill'
## NOTE
-Version 0.7.0 re-introduces an `RJDBC` (and, as such, an `rJava` depedency). If you desire this to be put into a sibling package, [cast your vote](https://github.com/hrbrmstr/sergeant/issues/20).
+Version 0.7.0 splits off the JDBC interface into a separate package `sergeant.caffeinated` ([GitLab](https://gitlab.com/hrbrmstr/sergeant-caffeinated); [GitHub](https://github.com/hrbrmstr/sergeant-caffeinated)).
## Description
-Drill + `sergeant` is (IMO) a nice alternative to Spark + `sparklyr` if you don't need the ML components of Spark (i.e. just need to query "big data" sources, need to interface with parquet, need to combine disparate data source types — json, csv, parquet, rdbms - for aggregation, etc). Drill also has support for spatial queries.
+Drill + `sergeant` is (IMO) a streamlined alternative to Spark + `sparklyr` if you don't need the ML components of Spark (i.e. just need to query "big data" sources, need to interface with parquet, need to combine disparate data source types — json, csv, parquet, rdbms - for aggregation, etc). Drill also has support for spatial queries.
-I find writing SQL queries to parquet files with Drill on a local linux or macOS workstation to be more performant than doing the data ingestion work with R (especially for large or disperate data sets). I also work with many tiny JSON files on a daily basis and Drill makes it much easier to do so. YMMV.
+Using Drill SQL queries that reference parquet files on a local linux or macOS workstation can often be more performant than doing the same data ingestion & wrangling work with R (especially for large or disperate data sets). Drill can often help further streaming workflows that infolve wrangling many tiny JSON files on a daily basis.
-You can download Drill from <https://drill.apache.org/download/> (use "Direct File Download"). I use `/usr/local/drill` as the install directory. `drill-embedded` is a super-easy way to get started playing with Drill on a single workstation and most of my workflows can get by using Drill this way. If there is sufficient desire for an automated downloader and a way to start the `drill-embedded` server from within R, please file an issue.
+Drill can be obtained from <https://drill.apache.org/download/> (use "Direct File Download"). Drill can also be installed via [Docker](https://drill.apache.org/docs/running-drill-on-docker/). For local installs on Unix-like systems, a common/suggestion location for the Drill directory is `/usr/local/drill` as the install directory.
-There are a few convenience wrappers for various informational SQL queries (like `drill_version()`). Please file an PR if you add more.
+Drill embedded (started using the `$DRILL_BASE_DIR/bin/drill-embedded` script) is a super-easy way to get started playing with Drill on a single workstation and most of many workflows can "get by" using Drill this way.
-The package has been written with retrieval of rectangular data sources in mind. If you need/want a version of `drill_query()` that will enable returning of non-rectangular data (which is possible with Drill) then please file an issue.
+There are a few convenience wrappers for various informational SQL queries (like `drill_version()`). Please file an PR if you add more.
Some of the more "controlling vs data ops" REST API functions aren't implemented. Please file a PR if you need those.
@@ 45,22 47,11 @@ The following functions are implemented:
- A "just enough" feature complete R `DBI` driver has been implemented using the Drill REST API, mostly to facilitate the `dplyr` interface. Use the `RJDBC` driver interface if you need more `DBI` functionality.
- This also means that SQL functions unique to Drill have also been "implemented" (i.e. made accessible to the `dplyr` interface). If you have custom Drill SQL functions that need to be implemented please file an issue on GitHub. Many should work without it, but some may require a custom interface.
-**`DBI`** (RJDBC)
-
-- `drill_jdbc`: Connect to Drill using JDBC, enabling use of said idioms. See `RJDBC` for more info.
-- NOTE: The DRILL JDBC driver fully-qualified path must be placed in the `DRILL_JDBC_JAR` environment variable. This is best done via `~/.Renviron` for interactive work. i.e. `DRILL_JDBC_JAR=/usr/local/drill/jars/drill-jdbc-all-1.14.0.jar`
-
**`dplyr`**: (REST)
-- `src_drill`: Connect to Drill (using dplyr) + supporting functions
-
-See `dplyr` for the `dplyr` operations (light testing shows they work in basic SQL use-cases but Drill's SQL engine has issues with more complex queries).
-
-**`dplyr`**: (RJDBC)
+- `src_drill`: Connect to Drill (using `dplyr`) + supporting functions
-- `src_drill_jdbc`: Connect to Drill (using dplyr & RJDBC) + supporting functions
-
-See `dplyr` for the `dplyr` operations (light testing shows they work in basic SQL use-cases but Drill's SQL engine has issues with more complex queries).
+Note that a number of Drill SQL functions have been mapped to R functions (e.g. `grepl`) to make it easier to transition from non-database-backed SQL ops to Drill. See the help on `drill_custom_functions` for more info on these helper Drill custom function mappings.
**Drill APIs**:
@@ 86,7 77,7 @@ See `dplyr` for the `dplyr` operations (light testing shows they work in basic S
- `drill_use`: Change to a particular schema.
- `drill_version`: Identify the version of Drill running
-### Installation
+## Installation
```{r eval=FALSE}
devtools::install_github("hrbrmstr/sergeant")
@@ 96,45 87,23 @@ devtools::install_github("hrbrmstr/sergeant")
options(width=120)
```
-### Experimental `dplyr` interface
-
-```{r message=FALSE}
-library(sergeant)
-
-```
+## Usage
-```{r echo=TRUE, eval=FALSE}
-ds <- src_drill("localhost") # use localhost if running standalone on same system otherwise the host or IP of your Drill server
-ds
-```
+### `dplyr` interface
-```{r echo=FALSE, eval=TRUE}
-ds <- src_drill("bigd")
-ds
-```
+```{r dplyr-01, message=FALSE}
+library(sergeant)
+library(tidyverse)
-```{r message=FALSE}
+# use localhost if running standalone on same system otherwise the host or IP of your Drill server
+ds <- src_drill("localhost") #ds
db <- tbl(ds, "cp.`employee.json`")
# without `collect()`:
count(db, gender, marital_status)
-# ^^ gets translated to:
-#
-# SELECT *
-# FROM (SELECT gender , marital_status , COUNT(*) AS n
-# FROM cp.`employee.json`
-# GROUP BY gender , marital_status ) govketbhqb
-# LIMIT 1000
-
count(db, gender, marital_status) %>% collect()
-# ^^ gets translated to:
-#
-# SELECT gender , marital_status , COUNT(*) AS n
-# FROM cp.`employee.json`
-# GROUP BY gender , marital_status
-
group_by(db, position_title) %>%
count(gender) -> tmp2
@@ 145,24 114,8 @@ group_by(db, position_title) %>%
collect() %>%
select(Title=position_title, Gender=full_desc, Count=n)
-# ^^ gets translated to:
-#
-# SELECT position_title , gender , n ,
-# CASE WHEN ( gender = 'F') THEN ('Female') ELSE ('Male') END AS full_desc
-# FROM (SELECT position_title , gender , COUNT(*) AS n
-# FROM cp.`employee.json`
-# GROUP BY position_title , gender ) dcyuypuypb
-
arrange(db, desc(employee_id)) %>% print(n=20)
-# ^^ gets translated to:
-#
-# SELECT *
-# FROM (SELECT *
-# FROM cp.`employee.json`
-# ORDER BY employee_id DESC) lvpxoaejbc
-# LIMIT 5
-
mutate(db, position_title=tolower(position_title)) %>%
mutate(salary=as.numeric(salary)) %>%
mutate(gender=ifelse(gender=="F", "Female", "Male")) %>%
@@ 170,44 123,19 @@ mutate(db, position_title=tolower(position_title)) %>%
group_by(supervisor_id) %>%
summarise(underlings_count=n()) %>%
collect()
-
-# ^^ gets translated to:
-#
-# SELECT supervisor_id , COUNT(*) AS underlings_count
-# FROM (SELECT employee_id , full_name , first_name , last_name , position_id , position_title , store_id , department_id , birth_date , hire_date , salary , supervisor_id , education_level , gender , management_role , CASE WHEN ( marital_status = 'S') THEN ('Single') ELSE ('Married') END AS marital_status
-# FROM (SELECT employee_id , full_name , first_name , last_name , position_id , position_title , store_id , department_id , birth_date , hire_date , salary , supervisor_id , education_level , marital_status , management_role , CASE WHEN ( gender = 'F') THEN ('Female') ELSE ('Male') END AS gender
-# FROM (SELECT employee_id , full_name , first_name , last_name , position_id , position_title , store_id , department_id , birth_date , hire_date , supervisor_id , education_level , marital_status , gender , management_role , CAST( salary AS DOUBLE) AS salary
-# FROM (SELECT employee_id , full_name , first_name , last_name , position_id , store_id , department_id , birth_date , hire_date , salary , supervisor_id , education_level , marital_status , gender , management_role , LOWER( position_title ) AS position_title
-# FROM cp.`employee.json` ) cnjsqxeick ) bnbnjrubna ) wavfmhkczv ) zaxeyyicxo
-# GROUP BY supervisor_id
```
-### Usage
-
-```{r message=FALSE}
-library(sergeant)
-
-# current verison
-packageVersion("sergeant")
+### REST API
-```
-```{r eval=FALSE}
+```{r rest-01}
dc <- drill_connection("localhost")
-```
-```{r echo=FALSE}
-dc <- drill_connection("bigd")
-```
-```{r message=FALSE}
+
drill_active(dc)
drill_version(dc)
drill_storage(dc)$name
-```
-
-Working with the built-in JSON data sets:
-```{r}
drill_query(dc, "SELECT * FROM cp.`employee.json` limit 100")
drill_query(dc, "SELECT COUNT(gender) AS gender FROM cp.`employee.json` GROUP BY gender")
@@ 229,7 157,7 @@ Including multiple parquet files in different directories (note the wildcard sup
drill_query(dc, "SELECT * FROM dfs.`/usr/local/drill/sample-data/nations*/nations*.parquet` LIMIT 5")
```
-### A preview of the built-in support for spatial ops
+### Drill has built-in support for spatial ops
Via: <https://github.com/k255/drill-gis>
@@ 248,31 176,6 @@ select columns[2] as city, columns[4] as lon, columns[3] as lat
)
")
```
-
-### JDBC
-
-```{r}
-library(RJDBC)
-
-# Use this if connecting to a cluster with zookeeper
-# con <- drill_jdbc("drill-node:2181", "drillbits1")
-
-# Use the following if running drill-embedded
-```
-```{r eval=FALSE}
-con <- drill_jdbc("localhost:31010", use_zk=FALSE)
-```
-```{r echo=FALSE}
-con <- drill_jdbc("bigd:31010", use_zk=FALSE)
-```
-```{r message=FALSE}
-drill_query(con, "SELECT * FROM cp.`employee.json`")
-
-# but it can work via JDBC function calls, too
-dbGetQuery(con, "SELECT * FROM cp.`employee.json`") %>%
- tibble::as_tibble()
-```
-
### Test Results
```{r}
M README.md => README.md +339 -379
@@ 1,138 1,149 @@
<!-- README.md is generated from README.Rmd. Please edit that file -->
-[](https://doi.org/10.5281/zenodo.1248912)
-[](https://travis-ci.org/hrbrmstr/sergeant)
-[](https://codecov.io/gh/hrbrmstr/sergeant)
-[](https://cran.r-project.org/package=sergeant)
+
+[](https://doi.org/10.5281/zenodo.1248912)
+[](https://travis-ci.org/hrbrmstr/sergeant)
+[](https://codecov.io/gh/hrbrmstr/sergeant)
+[](https://cran.r-project.org/package=sergeant)
# 💂 sergeant
-Tools to Transform and Query Data with 'Apache' 'Drill'
+Tools to Transform and Query Data with ‘Apache’ ‘Drill’
## NOTE
-Version 0.7.0 re-introduces an `RJDBC` (and, as such, an `rJava` depedency). If you desire this to be put into a sibling package, [cast your vote](https://github.com/hrbrmstr/sergeant/issues/20).
+Version 0.7.0 splits off the JDBC interface into a separate package
+`sergeant.caffeinated`
+([GitLab](https://gitlab.com/hrbrmstr/sergeant-caffeinated);
+[GitHub](https://github.com/hrbrmstr/sergeant-caffeinated)).
## Description
-Drill + `sergeant` is (IMO) a nice alternative to Spark + `sparklyr` if you don't need the ML components of Spark (i.e. just need to query "big data" sources, need to interface with parquet, need to combine disparate data source types — json, csv, parquet, rdbms - for aggregation, etc). Drill also has support for spatial queries.
+Drill + `sergeant` is (IMO) a streamlined alternative to Spark +
+`sparklyr` if you don’t need the ML components of Spark (i.e. just need
+to query “big data” sources, need to interface with parquet, need to
+combine disparate data source types — json, csv, parquet, rdbms - for
+aggregation, etc). Drill also has support for spatial queries.
-I find writing SQL queries to parquet files with Drill on a local linux or macOS workstation to be more performant than doing the data ingestion work with R (especially for large or disperate data sets). I also work with many tiny JSON files on a daily basis and Drill makes it much easier to do so. YMMV.
+Using Drill SQL queries that reference parquet files on a local linux or
+macOS workstation can often be more performant than doing the same data
+ingestion & wrangling work with R (especially for large or disperate
+data sets). Drill can often help further streaming workflows that
+infolve wrangling many tiny JSON files on a daily basis.
-You can download Drill from <https://drill.apache.org/download/> (use "Direct File Download"). I use `/usr/local/drill` as the install directory. `drill-embedded` is a super-easy way to get started playing with Drill on a single workstation and most of my workflows can get by using Drill this way. If there is sufficient desire for an automated downloader and a way to start the `drill-embedded` server from within R, please file an issue.
+Drill can be obtained from <https://drill.apache.org/download/> (use
+“Direct File Download”). Drill can also be installed via
+[Docker](https://drill.apache.org/docs/running-drill-on-docker/). For
+local installs on Unix-like systems, a common/suggestion location for
+the Drill directory is `/usr/local/drill` as the install directory.
-There are a few convenience wrappers for various informational SQL queries (like `drill_version()`). Please file an PR if you add more.
+Drill embedded (started using the `$DRILL_BASE_DIR/bin/drill-embedded`
+script) is a super-easy way to get started playing with Drill on a
+single workstation and most of many workflows can “get by” using Drill
+this way.
-The package has been written with retrieval of rectangular data sources in mind. If you need/want a version of `drill_query()` that will enable returning of non-rectangular data (which is possible with Drill) then please file an issue.
+There are a few convenience wrappers for various informational SQL
+queries (like `drill_version()`). Please file an PR if you add more.
-Some of the more "controlling vs data ops" REST API functions aren't implemented. Please file a PR if you need those.
+Some of the more “controlling vs data ops” REST API functions aren’t
+implemented. Please file a PR if you need those.
The following functions are implemented:
**`DBI`** (REST)
-- A "just enough" feature complete R `DBI` driver has been implemented using the Drill REST API, mostly to facilitate the `dplyr` interface. Use the `RJDBC` driver interface if you need more `DBI` functionality.
-- This also means that SQL functions unique to Drill have also been "implemented" (i.e. made accessible to the `dplyr` interface). If you have custom Drill SQL functions that need to be implemented please file an issue on GitHub. Many should work without it, but some may require a custom interface.
-
-**`DBI`** (RJDBC)
-
-- `drill_jdbc`: Connect to Drill using JDBC, enabling use of said idioms. See `RJDBC` for more info.
-- NOTE: The DRILL JDBC driver fully-qualified path must be placed in the `DRILL_JDBC_JAR` environment variable. This is best done via `~/.Renviron` for interactive work. i.e. `DRILL_JDBC_JAR=/usr/local/drill/jars/drill-jdbc-all-1.14.0.jar`
+ - A “just enough” feature complete R `DBI` driver has been implemented
+ using the Drill REST API, mostly to facilitate the `dplyr`
+ interface. Use the `RJDBC` driver interface if you need more `DBI`
+ functionality.
+ - This also means that SQL functions unique to Drill have also been
+ “implemented” (i.e. made accessible to the `dplyr` interface). If
+ you have custom Drill SQL functions that need to be implemented
+ please file an issue on GitHub. Many should work without it, but
+ some may require a custom interface.
**`dplyr`**: (REST)
-- `src_drill`: Connect to Drill (using dplyr) + supporting functions
-
-See `dplyr` for the `dplyr` operations (light testing shows they work in basic SQL use-cases but Drill's SQL engine has issues with more complex queries).
-
-**`dplyr`**: (RJDBC)
+ - `src_drill`: Connect to Drill (using `dplyr`) + supporting functions
-- `src_drill_jdbc`: Connect to Drill (using dplyr & RJDBC) + supporting functions
-
-See `dplyr` for the `dplyr` operations (light testing shows they work in basic SQL use-cases but Drill's SQL engine has issues with more complex queries).
+Note that a number of Drill SQL functions have been mapped to R
+functions (e.g. `grepl`) to make it easier to transition from
+non-database-backed SQL ops to Drill. See the help on
+`drill_custom_functions` for more info on these helper Drill custom
+function mappings.
**Drill APIs**:
-- `drill_connection`: Setup parameters for a Drill server/cluster connection
-- `drill_active`: Test whether Drill HTTP REST API server is up
-- `drill_cancel`: Cancel the query that has the given queryid
-- `drill_jdbc`: Connect to Drill using JDBC
-- `drill_metrics`: Get the current memory metrics
-- `drill_options`: List the name, default, and data type of the system and session options
-- `drill_profile`: Get the profile of the query that has the given query id
-- `drill_profiles`: Get the profiles of running and completed queries
-- `drill_query`: Submit a query and return results
-- `drill_set`: Set Drill SYSTEM or SESSION options
-- `drill_settings_reset`: Changes (optionally, all) session settings back to system defaults
-- `drill_show_files`: Show files in a file system schema.
-- `drill_show_schemas`: Returns a list of available schemas.
-- `drill_stats`: Get Drillbit information, such as ports numbers
-- `drill_status`: Get the status of Drill
-- `drill_storage`: Get the list of storage plugin names and configurations
-- `drill_system_reset`: Changes (optionally, all) system settings back to system defaults
-- `drill_threads`: Get information about threads
-- `drill_uplift`: Turn a columnar query results into a type-converted tbl
-- `drill_use`: Change to a particular schema.
-- `drill_version`: Identify the version of Drill running
-
-### Installation
+ - `drill_connection`: Setup parameters for a Drill server/cluster
+ connection
+ - `drill_active`: Test whether Drill HTTP REST API server is up
+ - `drill_cancel`: Cancel the query that has the given queryid
+ - `drill_jdbc`: Connect to Drill using JDBC
+ - `drill_metrics`: Get the current memory metrics
+ - `drill_options`: List the name, default, and data type of the system
+ and session options
+ - `drill_profile`: Get the profile of the query that has the given
+ query id
+ - `drill_profiles`: Get the profiles of running and completed queries
+ - `drill_query`: Submit a query and return results
+ - `drill_set`: Set Drill SYSTEM or SESSION options
+ - `drill_settings_reset`: Changes (optionally, all) session settings
+ back to system defaults
+ - `drill_show_files`: Show files in a file system schema.
+ - `drill_show_schemas`: Returns a list of available schemas.
+ - `drill_stats`: Get Drillbit information, such as ports numbers
+ - `drill_status`: Get the status of Drill
+ - `drill_storage`: Get the list of storage plugin names and
+ configurations
+ - `drill_system_reset`: Changes (optionally, all) system settings back
+ to system defaults
+ - `drill_threads`: Get information about threads
+ - `drill_uplift`: Turn a columnar query results into a type-converted
+ tbl
+ - `drill_use`: Change to a particular schema.
+ - `drill_version`: Identify the version of Drill running
+
+## Installation
``` r
devtools::install_github("hrbrmstr/sergeant")
```
-### Experimental `dplyr` interface
+## Usage
-``` r
-library(sergeant)
-```
+### `dplyr` interface
``` r
-ds <- src_drill("localhost") # use localhost if running standalone on same system otherwise the host or IP of your Drill server
-ds
-```
-
- #> src: DrillConnection
- #> tbls: INFORMATION_SCHEMA, cp.default, dfs.d, dfs.default, dfs.h, dfs.natexp, dfs.p, dfs.root, dfs.tmp, sys
+library(sergeant)
+library(tidyverse)
-``` r
+# use localhost if running standalone on same system otherwise the host or IP of your Drill server
+ds <- src_drill("localhost") #ds
db <- tbl(ds, "cp.`employee.json`")
# without `collect()`:
count(db, gender, marital_status)
-#> # Source: lazy query [?? x 3]
-#> # Database: DrillConnection
-#> # Groups: gender
-#> marital_status gender n
-#> <chr> <chr> <int>
-#> 1 S F 297
-#> 2 M M 278
-#> 3 S M 276
-#> 4 M F 304
-
-# ^^ gets translated to:
-#
-# SELECT *
-# FROM (SELECT gender , marital_status , COUNT(*) AS n
-# FROM cp.`employee.json`
-# GROUP BY gender , marital_status ) govketbhqb
-# LIMIT 1000
+## # Source: lazy query [?? x 3]
+## # Database: DrillConnection
+## # Groups: gender
+## marital_status gender n
+## <chr> <chr> <int>
+## 1 S F 297
+## 2 M M 278
+## 3 S M 276
+## 4 M F 304
count(db, gender, marital_status) %>% collect()
-#> # A tibble: 4 x 3
-#> # Groups: gender [2]
-#> marital_status gender n
-#> * <chr> <chr> <int>
-#> 1 S F 297
-#> 2 M M 278
-#> 3 S M 276
-#> 4 M F 304
-
-# ^^ gets translated to:
-#
-# SELECT gender , marital_status , COUNT(*) AS n
-# FROM cp.`employee.json`
-# GROUP BY gender , marital_status
+## # A tibble: 4 x 3
+## # Groups: gender [2]
+## marital_status gender n
+## * <chr> <chr> <int>
+## 1 S F 297
+## 2 M M 278
+## 3 S M 276
+## 4 M F 304
group_by(db, position_title) %>%
count(gender) -> tmp2
@@ 143,65 154,49 @@ group_by(db, position_title) %>%
mutate(full_desc=ifelse(gender=="F", "Female", "Male")) %>%
collect() %>%
select(Title=position_title, Gender=full_desc, Count=n)
-#> # A tibble: 30 x 3
-#> Title Gender Count
-#> * <chr> <chr> <int>
-#> 1 President Female 1
-#> 2 VP Country Manager Male 3
-#> 3 VP Country Manager Female 3
-#> 4 VP Information Systems Female 1
-#> 5 VP Human Resources Female 1
-#> 6 Store Manager Female 13
-#> 7 VP Finance Male 1
-#> 8 Store Manager Male 11
-#> 9 HQ Marketing Female 2
-#> 10 HQ Information Systems Female 4
-#> # ... with 20 more rows
-
-# ^^ gets translated to:
-#
-# SELECT position_title , gender , n ,
-# CASE WHEN ( gender = 'F') THEN ('Female') ELSE ('Male') END AS full_desc
-# FROM (SELECT position_title , gender , COUNT(*) AS n
-# FROM cp.`employee.json`
-# GROUP BY position_title , gender ) dcyuypuypb
+## # A tibble: 30 x 3
+## Title Gender Count
+## * <chr> <chr> <int>
+## 1 President Female 1
+## 2 VP Country Manager Male 3
+## 3 VP Country Manager Female 3
+## 4 VP Information Systems Female 1
+## 5 VP Human Resources Female 1
+## 6 Store Manager Female 13
+## 7 VP Finance Male 1
+## 8 Store Manager Male 11
+## 9 HQ Marketing Female 2
+## 10 HQ Information Systems Female 4
+## # ... with 20 more rows
arrange(db, desc(employee_id)) %>% print(n=20)
-#> # Source: table<cp.`employee.json`> [?? x 16]
-#> # Database: DrillConnection
-#> # Ordered by: desc(employee_id)
-#> store_id gender department_id birth_date supervisor_id last_name position_title hire_date
-#> <int> <chr> <int> <date> <int> <chr> <chr> <dttm>
-#> 1 18 F 18 1914-02-02 1140 Stand Store Temporary Stocker 1998-01-01
-#> 2 18 M 18 1914-02-02 1140 Burnham Store Temporary Stocker 1998-01-01
-#> 3 18 F 18 1914-02-02 1139 Doolittle Store Temporary Stocker 1998-01-01
-#> 4 18 M 18 1914-02-02 1139 Pirnie Store Temporary Stocker 1998-01-01
-#> 5 18 M 17 1914-02-02 1140 Younce Store Permanent Stocker 1998-01-01
-#> 6 18 F 17 1914-02-02 1140 Biltoft Store Permanent Stocker 1998-01-01
-#> 7 18 M 17 1914-02-02 1139 Detwiler Store Permanent Stocker 1998-01-01
-#> 8 18 F 17 1914-02-02 1139 Ciruli Store Permanent Stocker 1998-01-01
-#> 9 18 F 16 1914-02-02 1140 Bishop Store Temporary Checker 1998-01-01
-#> 10 18 F 16 1914-02-02 1140 Cutwright Store Temporary Checker 1998-01-01
-#> 11 18 F 16 1914-02-02 1139 Anderson Store Temporary Checker 1998-01-01
-#> 12 18 F 16 1914-02-02 1139 Swartwood Store Temporary Checker 1998-01-01
-#> 13 18 M 15 1914-02-02 1140 Curtsinger Store Permanent Checker 1998-01-01
-#> 14 18 F 15 1914-02-02 1140 Quick Store Permanent Checker 1998-01-01
-#> 15 18 M 15 1914-02-02 1139 Souza Store Permanent Checker 1998-01-01
-#> 16 18 M 15 1914-02-02 1139 Compagno Store Permanent Checker 1998-01-01
-#> 17 18 M 11 1961-09-24 1139 Jaramillo Store Shift Supervisor 1998-01-01
-#> 18 18 M 11 1972-05-12 17 Belsey Store Assistant Manager 1998-01-01
-#> 19 12 M 18 1914-02-02 1069 Eichorn Store Temporary Stocker 1998-01-01
-#> 20 12 F 18 1914-02-02 1069 Geiermann Store Temporary Stocker 1998-01-01
-#> # ... with more rows, and 8 more variables: management_role <chr>, salary <dbl>, marital_status <chr>, full_name <chr>,
-#> # employee_id <int>, education_level <chr>, first_name <chr>, position_id <int>
-
-# ^^ gets translated to:
-#
-# SELECT *
-# FROM (SELECT *
-# FROM cp.`employee.json`
-# ORDER BY employee_id DESC) lvpxoaejbc
-# LIMIT 5
+## # Source: table<cp.`employee.json`> [?? x 20]
+## # Database: DrillConnection
+## # Ordered by: desc(employee_id)
+## store_id gender department_id birth_date supervisor_id last_name position_title hire_date management_role
+## <int> <chr> <int> <date> <int> <chr> <chr> <dttm> <chr>
+## 1 18 F 18 1914-02-02 1140 Stand Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
+## 2 18 M 18 1914-02-02 1140 Burnham Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
+## 3 18 F 18 1914-02-02 1139 Doolittle Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
+## 4 18 M 18 1914-02-02 1139 Pirnie Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
+## 5 18 M 17 1914-02-02 1140 Younce Store Permane… 1998-01-01 00:00:00 Store Full Tim…
+## 6 18 F 17 1914-02-02 1140 Biltoft Store Permane… 1998-01-01 00:00:00 Store Full Tim…
+## 7 18 M 17 1914-02-02 1139 Detwiler Store Permane… 1998-01-01 00:00:00 Store Full Tim…
+## 8 18 F 17 1914-02-02 1139 Ciruli Store Permane… 1998-01-01 00:00:00 Store Full Tim…
+## 9 18 F 16 1914-02-02 1140 Bishop Store Tempora… 1998-01-01 00:00:00 Store Full Tim…
+## 10 18 F 16 1914-02-02 1140 Cutwright Store Tempora… 1998-01-01 00:00:00 Store Full Tim…
+## 11 18 F 16 1914-02-02 1139 Anderson Store Tempora… 1998-01-01 00:00:00 Store Full Tim…
+## 12 18 F 16 1914-02-02 1139 Swartwood Store Tempora… 1998-01-01 00:00:00 Store Full Tim…
+## 13 18 M 15 1914-02-02 1140 Curtsinger Store Permane… 1998-01-01 00:00:00 Store Full Tim…
+## 14 18 F 15 1914-02-02 1140 Quick Store Permane… 1998-01-01 00:00:00 Store Full Tim…
+## 15 18 M 15 1914-02-02 1139 Souza Store Permane… 1998-01-01 00:00:00 Store Full Tim…
+## 16 18 M 15 1914-02-02 1139 Compagno Store Permane… 1998-01-01 00:00:00 Store Full Tim…
+## 17 18 M 11 1961-09-24 1139 Jaramillo Store Shift S… 1998-01-01 00:00:00 Store Manageme…
+## 18 18 M 11 1972-05-12 17 Belsey Store Assista… 1998-01-01 00:00:00 Store Manageme…
+## 19 12 M 18 1914-02-02 1069 Eichorn Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
+## 20 12 F 18 1914-02-02 1069 Geiermann Store Tempora… 1998-01-01 00:00:00 Store Temp Sta…
+## # ... with more rows, and 7 more variables: salary <dbl>, marital_status <chr>, full_name <chr>, employee_id <int>,
+## # education_level <chr>, first_name <chr>, position_id <int>
mutate(db, position_title=tolower(position_title)) %>%
mutate(salary=as.numeric(salary)) %>%
@@ 210,181 205,160 @@ mutate(db, position_title=tolower(position_title)) %>%
group_by(supervisor_id) %>%
summarise(underlings_count=n()) %>%
collect()
-#> # A tibble: 112 x 2
-#> supervisor_id underlings_count
-#> * <int> <int>
-#> 1 0 1
-#> 2 1 7
-#> 3 5 9
-#> 4 4 2
-#> 5 2 3
-#> 6 20 2
-#> 7 21 4
-#> 8 22 7
-#> 9 6 4
-#> 10 36 2
-#> # ... with 102 more rows
-
-# ^^ gets translated to:
-#
-# SELECT supervisor_id , COUNT(*) AS underlings_count
-# FROM (SELECT employee_id , full_name , first_name , last_name , position_id , position_title , store_id , department_id , birth_date , hire_date , salary , supervisor_id , education_level , gender , management_role , CASE WHEN ( marital_status = 'S') THEN ('Single') ELSE ('Married') END AS marital_status
-# FROM (SELECT employee_id , full_name , first_name , last_name , position_id , position_title , store_id , department_id , birth_date , hire_date , salary , supervisor_id , education_level , marital_status , management_role , CASE WHEN ( gender = 'F') THEN ('Female') ELSE ('Male') END AS gender
-# FROM (SELECT employee_id , full_name , first_name , last_name , position_id , position_title , store_id , department_id , birth_date , hire_date , supervisor_id , education_level , marital_status , gender , management_role , CAST( salary AS DOUBLE) AS salary
-# FROM (SELECT employee_id , full_name , first_name , last_name , position_id , store_id , department_id , birth_date , hire_date , salary , supervisor_id , education_level , marital_status , gender , management_role , LOWER( position_title ) AS position_title
-# FROM cp.`employee.json` ) cnjsqxeick ) bnbnjrubna ) wavfmhkczv ) zaxeyyicxo
-# GROUP BY supervisor_id
+## # A tibble: 112 x 2
+## supervisor_id underlings_count
+## * <int> <int>
+## 1 0 1
+## 2 1 7
+## 3 5 9
+## 4 4 2
+## 5 2 3
+## 6 20 2
+## 7 21 4
+## 8 22 7
+## 9 6 4
+## 10 36 2
+## # ... with 102 more rows
```
-### Usage
-
-``` r
-library(sergeant)
-
-# current verison
-packageVersion("sergeant")
-#> [1] '0.5.2'
-```
+### REST API
``` r
dc <- drill_connection("localhost")
-```
-``` r
drill_active(dc)
-#> [1] TRUE
+## [1] TRUE
drill_version(dc)
-#> [1] "1.11.0"
+## [1] "1.13.0"
drill_storage(dc)$name
-#> [1] "cp" "dfs" "hbase" "hive" "kudu" "mongo" "s3"
-```
-
-Working with the built-in JSON data sets:
+## [1] "cp" "dfs" "hbase" "hive" "kudu" "mongo" "s3"
-``` r
drill_query(dc, "SELECT * FROM cp.`employee.json` limit 100")
-#> Parsed with column specification:
-#> cols(
-#> store_id = col_integer(),
-#> gender = col_character(),
-#> department_id = col_integer(),
-#> birth_date = col_date(format = ""),
-#> supervisor_id = col_integer(),
-#> last_name = col_character(),
-#> position_title = col_character(),
-#> hire_date = col_datetime(format = ""),
-#> management_role = col_character(),
-#> salary = col_double(),
-#> marital_status = col_character(),
-#> full_name = col_character(),
-#> employee_id = col_integer(),
-#> education_level = col_character(),
-#> first_name = col_character(),
-#> position_id = col_integer()
-#> )
-#> # A tibble: 100 x 16
-#> store_id gender department_id birth_date supervisor_id last_name position_title hire_date management_role
-#> * <int> <chr> <int> <date> <int> <chr> <chr> <dttm> <chr>
-#> 1 0 F 1 1961-08-26 0 Nowmer President 1994-12-01 Senior Management
-#> 2 0 M 1 1915-07-03 1 Whelply VP Country Manager 1994-12-01 Senior Management
-#> 3 0 M 1 1969-06-20 1 Spence VP Country Manager 1998-01-01 Senior Management
-#> 4 0 F 1 1951-05-10 1 Gutierrez VP Country Manager 1998-01-01 Senior Management
-#> 5 0 F 2 1942-10-08 1 Damstra VP Information Systems 1994-12-01 Senior Management
-#> 6 0 F 3 1949-03-27 1 Kanagaki VP Human Resources 1994-12-01 Senior Management
-#> 7 9 F 11 1922-08-10 5 Brunner Store Manager 1998-01-01 Store Management
-#> 8 21 F 11 1979-06-23 5 Blumberg Store Manager 1998-01-01 Store Management
-#> 9 0 M 5 1949-08-26 1 Stanz VP Finance 1994-12-01 Senior Management
-#> 10 1 M 11 1967-06-20 5 Murraiin Store Manager 1998-01-01 Store Management
-#> # ... with 90 more rows, and 7 more variables: salary <dbl>, marital_status <chr>, full_name <chr>, employee_id <int>,
-#> # education_level <chr>, first_name <chr>, position_id <int>
+## Parsed with column specification:
+## cols(
+## store_id = col_integer(),
+## gender = col_character(),
+## department_id = col_integer(),
+## birth_date = col_date(format = ""),
+## supervisor_id = col_integer(),
+## last_name = col_character(),
+## position_title = col_character(),
+## hire_date = col_datetime(format = ""),
+## management_role = col_character(),
+## salary = col_double(),
+## marital_status = col_character(),
+## full_name = col_character(),
+## employee_id = col_integer(),
+## education_level = col_character(),
+## first_name = col_character(),
+## position_id = col_integer()
+## )
+## # A tibble: 100 x 16
+## store_id gender department_id birth_date supervisor_id last_name position_title hire_date management_role
+## * <int> <chr> <int> <date> <int> <chr> <chr> <dttm> <chr>
+## 1 0 F 1 1961-08-26 0 Nowmer President 1994-12-01 00:00:00 Senior Managem…
+## 2 0 M 1 1915-07-03 1 Whelply VP Country Man… 1994-12-01 00:00:00 Senior Managem…
+## 3 0 M 1 1969-06-20 1 Spence VP Country Man… 1998-01-01 00:00:00 Senior Managem…
+## 4 0 F 1 1951-05-10 1 Gutierrez VP Country Man… 1998-01-01 00:00:00 Senior Managem…
+## 5 0 F 2 1942-10-08 1 Damstra VP Information… 1994-12-01 00:00:00 Senior Managem…
+## 6 0 F 3 1949-03-27 1 Kanagaki VP Human Resou… 1994-12-01 00:00:00 Senior Managem…
+## 7 9 F 11 1922-08-10 5 Brunner Store Manager 1998-01-01 00:00:00 Store Manageme…
+## 8 21 F 11 1979-06-23 5 Blumberg Store Manager 1998-01-01 00:00:00 Store Manageme…
+## 9 0 M 5 1949-08-26 1 Stanz VP Finance 1994-12-01 00:00:00 Senior Managem…
+## 10 1 M 11 1967-06-20 5 Murraiin Store Manager 1998-01-01 00:00:00 Store Manageme…
+## # ... with 90 more rows, and 7 more variables: salary <dbl>, marital_status <chr>, full_name <chr>, employee_id <int>,
+## # education_level <chr>, first_name <chr>, position_id <int>
drill_query(dc, "SELECT COUNT(gender) AS gender FROM cp.`employee.json` GROUP BY gender")
-#> Parsed with column specification:
-#> cols(
-#> gender = col_integer()
-#> )
-#> # A tibble: 2 x 1
-#> gender
-#> * <int>
-#> 1 601
-#> 2 554
+## Parsed with column specification:
+## cols(
+## gender = col_integer()
+## )
+## # A tibble: 2 x 1
+## gender
+## * <int>
+## 1 601
+## 2 554
drill_options(dc)
-#> # A tibble: 124 x 4
-#> name value type kind
-#> * <chr> <chr> <chr> <chr>
-#> 1 planner.enable_hash_single_key TRUE SYSTEM BOOLEAN
-#> 2 store.parquet.reader.pagereader.queuesize 2 SYSTEM LONG
-#> 3 planner.enable_limit0_optimization FALSE SYSTEM BOOLEAN
-#> 4 store.json.read_numbers_as_double FALSE SYSTEM BOOLEAN
-#> 5 planner.enable_constant_folding TRUE SYSTEM BOOLEAN
-#> 6 store.json.extended_types FALSE SYSTEM BOOLEAN
-#> 7 planner.memory.non_blocking_operators_memory 64 SYSTEM LONG
-#> 8 planner.enable_multiphase_agg TRUE SYSTEM BOOLEAN
-#> 9 exec.query_profile.debug_mode FALSE SYSTEM BOOLEAN
-#> 10 planner.filter.max_selectivity_estimate_factor 1 SYSTEM DOUBLE
-#> # ... with 114 more rows
+## # A tibble: 138 x 5
+## name value accessibleScopes kind optionScope
+## * <chr> <chr> <chr> <chr> <chr>
+## 1 debug.validate_iterators FALSE ALL BOOLEAN BOOT
+## 2 debug.validate_vectors FALSE ALL BOOLEAN BOOT
+## 3 drill.exec.functions.cast_empty_string_to_null FALSE ALL BOOLEAN BOOT
+## 4 drill.exec.hashagg.fallback.enabled FALSE ALL BOOLEAN BOOT
+## 5 drill.exec.memory.operator.output_batch_size 16777216 SYSTEM LONG BOOT
+## 6 drill.exec.storage.file.partition.column.label dir ALL STRING BOOT
+## 7 drill.exec.storage.implicit.filename.column.label filename ALL STRING BOOT
+## 8 drill.exec.storage.implicit.filepath.column.label filepath ALL STRING BOOT
+## 9 drill.exec.storage.implicit.fqn.column.label fqn ALL STRING BOOT
+## 10 drill.exec.storage.implicit.suffix.column.label suffix ALL STRING BOOT
+## # ... with 128 more rows
drill_options(dc, "json")
-#> # A tibble: 7 x 4
-#> name value type kind
-#> <chr> <chr> <chr> <chr>
-#> 1 store.json.read_numbers_as_double FALSE SYSTEM BOOLEAN
-#> 2 store.json.extended_types FALSE SYSTEM BOOLEAN
-#> 3 store.json.writer.uglify FALSE SYSTEM BOOLEAN
-#> 4 store.json.reader.skip_invalid_records FALSE SYSTEM BOOLEAN
-#> 5 store.json.reader.print_skipped_invalid_record_number FALSE SYSTEM BOOLEAN
-#> 6 store.json.all_text_mode FALSE SYSTEM BOOLEAN
-#> 7 store.json.writer.skip_null_fields TRUE SYSTEM BOOLEAN
+## # A tibble: 9 x 5
+## name value accessibleScopes kind optionScope
+## <chr> <chr> <chr> <chr> <chr>
+## 1 store.json.all_text_mode FALSE ALL BOOLEAN BOOT
+## 2 store.json.extended_types FALSE ALL BOOLEAN BOOT
+## 3 store.json.read_numbers_as_double FALSE ALL BOOLEAN BOOT
+## 4 store.json.reader.allow_nan_inf TRUE ALL BOOLEAN BOOT
+## 5 store.json.reader.print_skipped_invalid_record_number FALSE ALL BOOLEAN BOOT
+## 6 store.json.reader.skip_invalid_records FALSE ALL BOOLEAN BOOT
+## 7 store.json.writer.allow_nan_inf TRUE ALL BOOLEAN BOOT
+## 8 store.json.writer.skip_null_fields TRUE ALL BOOLEAN BOOT
+## 9 store.json.writer.uglify FALSE ALL BOOLEAN BOOT
```
-Working with parquet files
---------------------------
+## Working with parquet files
``` r
drill_query(dc, "SELECT * FROM dfs.`/usr/local/drill/sample-data/nation.parquet` LIMIT 5")
-#> Parsed with column specification:
-#> cols(
-#> N_COMMENT = col_character(),
-#> N_NAME = col_character(),
-#> N_NATIONKEY = col_integer(),
-#> N_REGIONKEY = col_integer()
-#> )
-#> # A tibble: 5 x 4
-#> N_COMMENT N_NAME N_NATIONKEY N_REGIONKEY
-#> * <chr> <chr> <int> <int>
-#> 1 haggle. carefully f ALGERIA 0 0
-#> 2 al foxes promise sly ARGENTINA 1 1
-#> 3 y alongside of the p BRAZIL 2 1
-#> 4 eas hang ironic, sil CANADA 3 1
-#> 5 y above the carefull EGYPT 4 4
+## Parsed with column specification:
+## cols(
+## N_COMMENT = col_character(),
+## N_NAME = col_character(),
+## N_NATIONKEY = col_integer(),
+## N_REGIONKEY = col_integer()
+## )
+## # A tibble: 5 x 4
+## N_COMMENT N_NAME N_NATIONKEY N_REGIONKEY
+## * <chr> <chr> <int> <int>
+## 1 haggle. carefully f ALGERIA 0 0
+## 2 al foxes promise sly ARGENTINA 1 1
+## 3 y alongside of the p BRAZIL 2 1
+## 4 eas hang ironic, sil CANADA 3 1
+## 5 y above the carefull EGYPT 4 4
```
-Including multiple parquet files in different directories (note the wildcard support):
+Including multiple parquet files in different directories (note the
+wildcard
+support):
``` r
drill_query(dc, "SELECT * FROM dfs.`/usr/local/drill/sample-data/nations*/nations*.parquet` LIMIT 5")
-#> Parsed with column specification:
-#> cols(
-#> N_COMMENT = col_character(),
-#> N_NAME = col_character(),
-#> N_NATIONKEY = col_integer(),
-#> N_REGIONKEY = col_integer(),
-#> dir0 = col_character()
-#> )
-#> # A tibble: 5 x 5
-#> N_COMMENT N_NAME N_NATIONKEY N_REGIONKEY dir0
-#> * <chr> <chr> <int> <int> <chr>
-#> 1 haggle. carefully f ALGERIA 0 0 nationsSF
-#> 2 al foxes promise sly ARGENTINA 1 1 nationsSF
-#> 3 y alongside of the p BRAZIL 2 1 nationsSF
-#> 4 eas hang ironic, sil CANADA 3 1 nationsSF
-#> 5 y above the carefull EGYPT 4 4 nationsSF
+## Parsed with column specification:
+## cols(
+## N_COMMENT = col_character(),
+## N_NAME = col_character(),
+## N_NATIONKEY = col_integer(),
+## dir0 = col_character(),
+## N_REGIONKEY = col_integer()
+## )
+## # A tibble: 5 x 5
+## N_COMMENT N_NAME N_NATIONKEY dir0 N_REGIONKEY
+## * <chr> <chr> <int> <chr> <int>
+## 1 haggle. carefully f ALGERIA 0 nationsSF 0
+## 2 al foxes promise sly ARGENTINA 1 nationsSF 1
+## 3 y alongside of the p BRAZIL 2 nationsSF 1
+## 4 eas hang ironic, sil CANADA 3 nationsSF 1
+## 5 y above the carefull EGYPT 4 nationsSF 4
```
-### A preview of the built-in support for spatial ops
+### Drill has built-in support for spatial ops
Via: <https://github.com/k255/drill-gis>
@@ 402,78 376,22 @@ select columns[2] as city, columns[4] as lon, columns[3] as lat
)
)
")
-#> Parsed with column specification:
-#> cols(
-#> city = col_character(),
-#> lon = col_double(),
-#> lat = col_double()
-#> )
-#> # A tibble: 7 x 3
-#> city lon lat
-#> * <chr> <dbl> <dbl>
-#> 1 Burbank -121.9316 37.32328
-#> 2 San Jose -121.8950 37.33939
-#> 3 Lick -121.8458 37.28716
-#> 4 Willow Glen -121.8897 37.30855
-#> 5 Buena Vista -121.9166 37.32133
-#> 6 Parkmoor -121.9308 37.32105
-#> 7 Fruitdale -121.9327 37.31086
-```
-
-### JDBC
-
-``` r
-library(RJDBC)
-#> Loading required package: rJava
-
-# Use this if connecting to a cluster with zookeeper
-# con <- drill_jdbc("drill-node:2181", "drillbits1")
-
-# Use the following if running drill-embedded
-```
-
-``` r
-con <- drill_jdbc("localhost:31010", use_zk=FALSE)
-```
-
- #> Using [jdbc:drill:drillbit=bigd:31010]...
-
-``` r
-drill_query(con, "SELECT * FROM cp.`employee.json`")
-#> # A tibble: 1,155 x 16
-#> employee_id full_name first_name last_name position_id position_title store_id department_id
-#> * <dbl> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl>
-#> 1 1 Sheri Nowmer Sheri Nowmer 1 President 0 1
-#> 2 2 Derrick Whelply Derrick Whelply 2 VP Country Manager 0 1
-#> 3 4 Michael Spence Michael Spence 2 VP Country Manager 0 1
-#> 4 5 Maya Gutierrez Maya Gutierrez 2 VP Country Manager 0 1
-#> 5 6 Roberta Damstra Roberta Damstra 3 VP Information Systems 0 2
-#> 6 7 Rebecca Kanagaki Rebecca Kanagaki 4 VP Human Resources 0 3
-#> 7 8 Kim Brunner Kim Brunner 11 Store Manager 9 11
-#> 8 9 Brenda Blumberg Brenda Blumberg 11 Store Manager 21 11
-#> 9 10 Darren Stanz Darren Stanz 5 VP Finance 0 5
-#> 10 11 Jonathan Murraiin Jonathan Murraiin 11 Store Manager 1 11
-#> # ... with 1,145 more rows, and 8 more variables: birth_date <chr>, hire_date <chr>, salary <dbl>, supervisor_id <dbl>,
-#> # education_level <chr>, marital_status <chr>, gender <chr>, management_role <chr>
-
-# but it can work via JDBC function calls, too
-dbGetQuery(con, "SELECT * FROM cp.`employee.json`") %>%
- tibble::as_tibble()
-#> # A tibble: 1,155 x 16
-#> employee_id full_name first_name last_name position_id position_title store_id department_id
-#> * <dbl> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl>
-#> 1 1 Sheri Nowmer Sheri Nowmer 1 President 0 1
-#> 2 2 Derrick Whelply Derrick Whelply 2 VP Country Manager 0 1
-#> 3 4 Michael Spence Michael Spence 2 VP Country Manager 0 1
-#> 4 5 Maya Gutierrez Maya Gutierrez 2 VP Country Manager 0 1
-#> 5 6 Roberta Damstra Roberta Damstra 3 VP Information Systems 0 2
-#> 6 7 Rebecca Kanagaki Rebecca Kanagaki 4 VP Human Resources 0 3
-#> 7 8 Kim Brunner Kim Brunner 11 Store Manager 9 11
-#> 8 9 Brenda Blumberg Brenda Blumberg 11 Store Manager 21 11
-#> 9 10 Darren Stanz Darren Stanz 5 VP Finance 0 5
-#> 10 11 Jonathan Murraiin Jonathan Murraiin 11 Store Manager 1 11
-#> # ... with 1,145 more rows, and 8 more variables: birth_date <chr>, hire_date <chr>, salary <dbl>, supervisor_id <dbl>,
-#> # education_level <chr>, marital_status <chr>, gender <chr>, management_role <chr>
+## Parsed with column specification:
+## cols(
+## city = col_character(),
+## lon = col_double(),
+## lat = col_double()
+## )
+## # A tibble: 7 x 3
+## city lon lat
+## * <chr> <dbl> <dbl>
+## 1 Burbank -122. 37.3
+## 2 San Jose -122. 37.3
+## 3 Lick -122. 37.3
+## 4 Willow Glen -122. 37.3
+## 5 Buena Vista -122. 37.3
+## 6 Parkmoor -122. 37.3
+## 7 Fruitdale -122. 37.3
```
### Test Results
@@ 481,24 399,66 @@ dbGetQuery(con, "SELECT * FROM cp.`employee.json`") %>%
``` r
library(sergeant)
library(testthat)
-#>
-#> Attaching package: 'testthat'
-#> The following object is masked from 'package:dplyr':
-#>
-#> matches
+##
+## Attaching package: 'testthat'
+## The following object is masked from 'package:dplyr':
+##
+## matches
+## The following object is masked from 'package:purrr':
+##
+## is_null
date()
-#> [1] "Sun Sep 17 13:31:23 2017"
+## [1] "Sun Oct 14 08:27:29 2018"
devtools::test()
-#> Loading sergeant
-#> Testing sergeant
-#> dplyr: ...
-#> rest: ................
-#>
-#> DONE ===================================================================================================================
+## Loading sergeant
+## Testing sergeant
+## ✔ | OK F W S | Context
+##
+⠏ | 0 | dplyr API
+⠋ | 1 | dplyr API
+⠙ | 2 | dplyr API
+⠹ | 3 | dplyr API
+✔ | 3 | dplyr API [0.3 s]
+##
+⠏ | 0 | REST API
+⠋ | 1 | REST API
+⠙ | 2 | REST API
+⠹ | 3 | REST API
+⠸ | 4 | REST API
+⠼ | 5 | REST API
+⠴ | 6 | REST API
+⠦ | 7 | REST API
+⠧ | 8 | REST API
+⠇ | 9 | REST API
+⠏ | 10 | REST API
+⠋ | 11 | REST API
+⠙ | 12 | REST API
+⠹ | 13 | REST API
+⠸ | 14 | REST API
+⠼ | 15 | REST API
+⠴ | 16 | REST API
+✔ | 16 | REST API [2.2 s]
+##
+## ══ Results ═══════════════════════════════════════════════════
+## Duration: 2.5 s
+##
+## OK: 19
+## Failed: 0
+## Warnings: 0
+## Skipped: 0
```
-### Code of Conduct
+## sergeant Metrics
+
+| Lang | \# Files | (%) | LoC | (%) | Blank lines | (%) | \# Lines | (%) |
+| :--- | -------: | ---: | --: | ---: | ----------: | ---: | -------: | ---: |
+| R | 12 | 0.92 | 625 | 0.92 | 173 | 0.75 | 562 | 0.87 |
+| Rmd | 1 | 0.08 | 55 | 0.08 | 58 | 0.25 | 86 | 0.13 |
+
+## Code of Conduct
-Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.
+Please note that this project is released with a [Contributor Code of
+Conduct](CONDUCT.md). By participating in this project you agree to
+abide by its terms.
D inst/java/slf4j-nop-1.7.25.jar => inst/java/slf4j-nop-1.7.25.jar +0 -0
M man/DrillConnection-class.Rd => man/DrillConnection-class.Rd +2 -2
@@ 3,13 3,13 @@
\docType{class}
\name{DrillConnection-class}
\alias{DrillConnection-class}
-\alias{dbSendQuery,DrillConnection,ANY-method}
+\alias{dbSendQuery,DrillConnection-method}
\alias{dbSendQuery,DrillConnection,character-method}
\alias{dbIsValid,DrillConnection-method}
\alias{dbListFields,DrillConnection,character-method}
\title{Drill connection class.}
\usage{
-\S4method{dbSendQuery}{DrillConnection,ANY}(conn, statement, ...)
+\S4method{dbSendQuery}{DrillConnection}(conn, statement, ...)
\S4method{dbIsValid}{DrillConnection}(dbObj, ...)
D man/DrillJDBC.Rd => man/DrillJDBC.Rd +0 -18
@@ 1,18 0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/jdbc.r
-\name{DrillJDBC}
-\alias{DrillJDBC}
-\title{Drill's JDBC driver main class loader}
-\usage{
-DrillJDBC()
-}
-\description{
-Drill's JDBC driver main class loader
-}
-\seealso{
-Other Drill JDBC API: \code{\link{dbConnect,DrillJDBCDriver-method}},
- \code{\link{dbDataType,DrillJDBCConnection-method}},
- \code{\link{db_data_type.DrillJDBCConnection}},
- \code{\link{drill_jdbc}}
-}
-\concept{Drill JDBC API}
D man/DrillJDBCConnection-class.Rd => man/DrillJDBCConnection-class.Rd +0 -10
@@ 1,10 0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/jdbc.r
-\docType{class}
-\name{DrillJDBCConnection-class}
-\alias{DrillJDBCConnection-class}
-\title{Drill JDBC connection class.}
-\description{
-Drill JDBC connection class.
-}
-\keyword{internal}
D man/DrillJDBCDriver-class.Rd => man/DrillJDBCDriver-class.Rd +0 -10
@@ 1,10 0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/jdbc.r
-\docType{class}
-\name{DrillJDBCDriver-class}
-\alias{DrillJDBCDriver-class}
-\title{JDBC Driver for Drill database.}
-\description{
-JDBC Driver for Drill database.
-}
-\keyword{internal}
D man/dbConnect-DrillJDBCDriver-method.Rd => man/dbConnect-DrillJDBCDriver-method.Rd +0 -31
@@ 1,31 0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/jdbc.r
-\docType{methods}
-\name{dbConnect,DrillJDBCDriver-method}
-\alias{dbConnect,DrillJDBCDriver-method}
-\title{Connect to Drill JDBC with your own connection string}
-\usage{
-\S4method{dbConnect}{DrillJDBCDriver}(drv, url, user = "", password = "",
- ...)
-}
-\arguments{
-\item{drv}{what you get back from \code{\link[=DrillJDBC]{DrillJDBC()}}}
-
-\item{url}{your Drill connection strinfg}
-
-\item{user, password}{username & password (leave as-is for no-auth)}
-
-\item{...}{additional \code{name=val} properties which will be set with Java's
-\code{SetProperty} method.}
-}
-\description{
-You should really use \code{\link[=drill_jdbc]{drill_jdbc()}} as it handles some cruft for
-you, but you can specify the full JDBC connection string
-}
-\seealso{
-Other Drill JDBC API: \code{\link{DrillJDBC}},
- \code{\link{dbDataType,DrillJDBCConnection-method}},
- \code{\link{db_data_type.DrillJDBCConnection}},
- \code{\link{drill_jdbc}}
-}
-\concept{Drill JDBC API}
D man/dbDataType-DrillJDBCConnection-method.Rd => man/dbDataType-DrillJDBCConnection-method.Rd +0 -26
@@ 1,26 0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/jdbc.r
-\docType{methods}
-\name{dbDataType,DrillJDBCConnection-method}
-\alias{dbDataType,DrillJDBCConnection-method}
-\title{Drill JDBC dbDataType}
-\usage{
-\S4method{dbDataType}{DrillJDBCConnection}(dbObj, obj, ...)
-}
-\arguments{
-\item{dbObj}{A \code{\linkS4class{DrillJDBCDriver}} object}
-
-\item{obj}{Any R object}
-
-\item{...}{Extra optional parameters}
-}
-\description{
-Drill JDBC dbDataType
-}
-\seealso{
-Other Drill JDBC API: \code{\link{DrillJDBC}},
- \code{\link{dbConnect,DrillJDBCDriver-method}},
- \code{\link{db_data_type.DrillJDBCConnection}},
- \code{\link{drill_jdbc}}
-}
-\concept{Drill JDBC API}
M man/drill_active.Rd => man/drill_active.Rd +2 -2
@@ 13,9 13,9 @@ drill_active(drill_con)
This is a very simple test (performs \code{HEAD /} request on the Drill server/cluster)
}
\examples{
-\dontrun{
+try({
drill_connection() \%>\% drill_active()
-}
+}, silent=TRUE)
}
\seealso{
Other Dill direct REST API Interface: \code{\link{drill_cancel}},
D man/drill_jdbc.Rd => man/drill_jdbc.Rd +0 -79
@@ 1,79 0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/jdbc.r
-\name{drill_jdbc}
-\alias{drill_jdbc}
-\alias{src_drill_jdbc}
-\alias{tbl.src_drill_jdbc}
-\title{Connect to Drill using JDBC}
-\usage{
-drill_jdbc(nodes = "localhost:2181", cluster_id = NULL, schema = NULL,
- use_zk = TRUE)
-
-src_drill_jdbc(nodes = "localhost:2181", cluster_id = NULL, schema = NULL,
- use_zk = TRUE)
-
-\method{tbl}{src_drill_jdbc}(src, from, ...)
-}
-\arguments{
-\item{nodes}{character vector of nodes. If more than one node, you can either have
-a single string with the comma-separated node:port pairs pre-made or
-pass in a character vector with multiple node:port strings and the
-function will make a comma-separated node string for you.}
-
-\item{cluster_id}{the cluster id from \code{drill-override.conf}}
-
-\item{schema}{an optional schema name to append to the JDBC connection string}
-
-\item{use_zk}{are you connecting to a ZooKeeper instance (default: \code{TRUE}) or
-connecting to an individual DrillBit.}
-
-\item{src}{A Drill "src" created with \code{src_drill()}}
-
-\item{from}{A Drill view or table specification}
-
-\item{...}{Extra parameters}
-}
-\value{
-a JDBC connection object
-}
-\description{
-The DRILL JDBC driver fully-qualified path must be placed in the
-\code{DRILL_JDBC_JAR} environment variable. This is best done via \code{~/.Renviron}
-for interactive work. e.g. \code{DRILL_JDBC_JAR=/usr/local/drill/jars/jdbc-driver/drill-jdbc-all-1.10.0.jar}
-}
-\details{
-[src_drill_jdbc()] wraps the JDBC [dbConnect()] connection instantation in
-[dbplyr::src_dbi()] to return the equivalent of the REST driver's [src_drill()].
-}
-\examples{
-\dontrun{
-con <- drill_jdbc("localhost:2181", "main")
-drill_query(con, "SELECT * FROM cp.`employee.json`")
-
-# you can also use the connection with RJDBC calls:
-dbGetQuery(con, "SELECT * FROM cp.`employee.json`")
-
-# for local/embedded mode with default configuration info
-con <- drill_jdbc("localhost:31010", use_zk=FALSE)
-}
-}
-\references{
-\url{https://drill.apache.org/docs/using-the-jdbc-driver/#using-the-jdbc-url-for-a-random-drillbit-connection}
-}
-\seealso{
-Other Drill JDBC API: \code{\link{DrillJDBC}},
- \code{\link{dbConnect,DrillJDBCDriver-method}},
- \code{\link{dbDataType,DrillJDBCConnection-method}},
- \code{\link{db_data_type.DrillJDBCConnection}}
-
-Other Drill JDBC API: \code{\link{DrillJDBC}},
- \code{\link{dbConnect,DrillJDBCDriver-method}},
- \code{\link{dbDataType,DrillJDBCConnection-method}},
- \code{\link{db_data_type.DrillJDBCConnection}}
-
-Other Drill JDBC API: \code{\link{DrillJDBC}},
- \code{\link{dbConnect,DrillJDBCDriver-method}},
- \code{\link{dbDataType,DrillJDBCConnection-method}},
- \code{\link{db_data_type.DrillJDBCConnection}}
-}
-\concept{Drill JDBC API}
D man/drill_jdbc_internals.Rd => man/drill_jdbc_internals.Rd +0 -40
@@ 1,40 0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/jdbc.r
-\docType{class}
-\name{db_data_type.DrillJDBCConnection}
-\alias{db_data_type.DrillJDBCConnection}
-\alias{db_data_type.tbl_drill_jdbc}
-\alias{DrillJDBCResult-class}
-\alias{dbSendQuery,DrillJDBCConnection,character-method}
-\alias{sql_escape_ident.DrillJDBCConnection}
-\alias{sql_translate_env.DrillJDBCConnection}
-\alias{src_tbls.src_dbi}
-\title{Drill internals}
-\usage{
-\method{db_data_type}{DrillJDBCConnection}(con, fields, ...)
-
-\method{db_data_type}{tbl_drill_jdbc}(con, fields, ...)
-
-\S4method{dbSendQuery}{DrillJDBCConnection,character}(conn, statement, ...,
- list = NULL)
-
-\method{sql_escape_ident}{DrillJDBCConnection}(con, x)
-
-\method{sql_translate_env}{DrillJDBCConnection}(con)
-
-\method{src_tbls}{src_dbi}(x)
-}
-\arguments{
-\item{x}{x}
-}
-\description{
-"SHOW DATABASES"
-}
-\seealso{
-Other Drill JDBC API: \code{\link{DrillJDBC}},
- \code{\link{dbConnect,DrillJDBCDriver-method}},
- \code{\link{dbDataType,DrillJDBCConnection-method}},
- \code{\link{drill_jdbc}}
-}
-\concept{Drill JDBC API}
-\keyword{internal}
M man/drill_query.Rd => man/drill_query.Rd +2 -2
@@ 27,10 27,10 @@ default printing (which can be helpful if you accidentally execute a query and t
set is huge).
}
\examples{
-\dontrun{
+try({
drill_connection() \%>\%
drill_query("SELECT * FROM cp.`employee.json` limit 5")
-}
+}, silent=TRUE)
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
M man/drill_show_files.Rd => man/drill_show_files.Rd +2 -2
@@ 15,9 15,9 @@ drill_show_files(drill_con, schema_spec)
Show files in a file system schema.
}
\examples{
-\dontrun{
+try({
drill_connection() \%>\% drill_show_files("dfs.tmp")
-}
+}, silent=TRUE)
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}
M man/sergeant-exports.Rd => man/sergeant-exports.Rd +1 -0
@@ 3,6 3,7 @@
\name{sergeant-exports}
\alias{sergeant-exports}
\alias{\%>\%}
+\alias{tbl}
\title{sergeant exported operators}
\description{
The following functions are imported and then re-exported
M man/sergeant.Rd => man/sergeant.Rd +1 -1
@@ 30,7 30,7 @@ Drill can maximize data locality during query execution without moving data over
network or between nodes. Drill uses ZooKeeper to maintain cluster membership and health
check information.
-Methods are provided to work with Drill via the native JDBC & REST APIs along with R
+Methods are provided to work with Drill via the REST APIs along with R
\code{DBI} and \code{dplyr} interfaces.
}
\references{
M man/src_drill.Rd => man/src_drill.Rd +2 -2
@@ 36,7 36,7 @@ SQL functions that need to be implemented please file an issue on GitHub.
This is a DBI wrapper around the Drill REST API.
}
\examples{
-\dontrun{
+try({
db <- src_drill("localhost", 8047L)
print(db)
@@ 80,7 80,7 @@ select(emp, full_name) \%>\%
## 9 2 Darren Stanz 12 Darren Stanz******** 5 D*rr*n St*nz
## 10 4 Jonathan Murraiin 17 Jonathan Murraiin*** 0 J*n*th*n M*rr***n
## # ... with more rows, and 3 more variables: rpd <chr>, rnd <dbl>, first_three <chr>
-}
+}, silent=TRUE)
}
\seealso{
Other Drill REST `dplyr` API: \code{\link{drill_custom_functions}},
M tests/testthat/test-sergeant.R => tests/testthat/test-sergeant.R +2 -16
@@ 1,6 1,6 @@
test_host <- Sys.getenv("DRILL_TEST_HOST", "localhost")
-context("dplyr")
+context("dplyr API")
test_that("Core dbplyr ops work", {
testthat::skip_on_cran()
@@ 16,7 16,7 @@ test_that("Core dbplyr ops work", {
})
-context("rest")
+context("REST API")
test_that("REST API works", {
testthat::skip_on_cran()
@@ 51,17 51,3 @@ test_that("REST API works", {
})
-
-# context("jdbc")
-# test_that("we can do something", {
-#
-# testthat::skip_on_cran()
-#
-# dc <- drill_jdbc("localhost:31010", use_zk=FALSE)
-#
-# expect_that(dc, is_a("JDBCConnection"))
-#
-# expect_that(drill_query(dc, "SELECT * FROM cp.`employee.json`"),
-# is_a("tbl"))
-#
-# })