~hrbrmstr/sergeant

307fa78b3355f368e0f0b23d70ba40f395f934d9 — Bob Rudis 3 years ago d39b328
getting ready for new dplyr
M .gitignore => .gitignore +1 -0
@@ 1,3 1,4 @@
.Rproj.user
.Rhistory
.RData
.DS_Store

M DESCRIPTION => DESCRIPTION +4 -2
@@ 1,7 1,7 @@
Package: sergeant
Title: Tools to Transform and Query Data with the 'Apache Drill' 'REST API' and 'JDBC' Interfaces, 
    Plus 'dplyr' and 'DBI' Interfaces
Version: 0.3.2
Version: 0.4.0
Authors@R: c(person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre")),
             person("Edward", "Visel", email = "edward.visel@gmail.com", role = "ctb"))
Description: 'Apache Drill' is a low-latency distributed query engine designed to enable 


@@ 15,7 15,7 @@ Depends:
    dbplyr
URL: http://github.com/hrbrmstr/sergeant
BugReports: https://github.com/hrbrmstr/sergeant/issues
License: AGPL + file LICENSE
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Imports:


@@ 32,3 32,5 @@ Imports:
Suggests:
    testthat
RoxygenNote: 6.0.1
Remotes:
    tidyverse/dbplyr

M NEWS.md => NEWS.md +5 -0
@@ 1,3 1,8 @@
# sergeant 0.4.0

- Getting ready for new `dplyr` (thx to Edward Visel)
- Cleaned up roxygen docs so that `src_drill` is exported now.

# sergeant 0.3.2

- Finally got quoting done. I thought I had before but I guess I hadn't.

M R/dplyr.r => R/dplyr.r +16 -15
@@ 1,16 1,14 @@
#' Connect to Drill (using \code{dplyr}).
#' Connect to Drill (dplyr)
#'
#' Use \code{src_drill()} to connect to a Drill cluster and `tbl()` to connect to a
#' fully-qualified "table reference". The vast majority of Drill SQL functions have
#' also been made available to the \code{dplyr} interface. If you have custom Drill
#' SQL functions that need to be implemented please file an issue on GitHub.
#'
#' @note This is a DBI wrapper around the Drill REST API.
#' @note TODO username/password support
#'
#' @param host Drill host (will pick up the value from \code{DRILL_HOST} env var)
#' @param port Drill port (will pick up the value from \code{DRILL_PORT} env var)
#' @param ssl use ssl?
#' @note This is a DBI wrapper around the Drill REST API. TODO username/password support
#' @export
#' @examples \dontrun{
#' db <- src_drill("localhost", "8047")


@@ 32,7 30,6 @@
#'                  rpd = rpad(full_name, 20L),
#'                 rpdw = rpad_with(full_name, 20L, "*"))
#' }
#' @export
src_drill <- function(host=Sys.getenv("DRILL_HOST", "localhost"),
                      port=as.integer(Sys.getenv("DRILL_PORT", 8047L)),
                      ssl=FALSE) {


@@ 43,36 40,40 @@ src_drill <- function(host=Sys.getenv("DRILL_HOST", "localhost"),

}

#' @rdname src_drill
#' @keywords internal
#' src tbls
#'
#' "SHOW DATABASES"
#'
#' @rdname src_tbls
#' @param x x
#' @export
src_tbls.src_drill <- function(x) {
  tmp <- dbGetQuery(x$con, "SHOW DATABASES")
  paste0(unlist(tmp$SCHEMA_NAME, use.names=FALSE), collapse=", ")
}

#' @rdname src_drill
#' @rdname src_tbls
#' @keywords internal
#' @export
db_desc.src_drill <- function(x) {

  tmp <- dbGetQuery(x$con, "SELECT * FROM sys.version")
  version <- tmp$version
  tmp <- dbGetQuery(x$con, "SELECT (direct_max / 1024 / 1024 /1024) AS direct_max FROM sys.memory")
  tmp <- dbGetQuery(x$con, "SELECT (direct_max / 1024 / 1024 / 1024) AS direct_max FROM sys.memory")
  memory <- tmp$direct_max

  sprintf("Drill %s [%s:%d] [%dGB direct memory]", version, x$con@host, x$con@port, memory)

}

#' @rdname src_drill
#' @rdname src_tbls
#' @keywords internal
#' @export
sql_escape_ident.DrillConnection <- function(con, x) {
  ifelse(grepl("`", x), sql_quote(x, ' '), sql_quote(x, '`'))
}

#' @rdname src_drill
#' @rdname src_tbls
#' @keywords internal
#' @export
copy_to.src_drill <- function(dest, df) {


@@ 88,7 89,7 @@ tbl.src_drill <- function(src, from, ...) {
  tbl_sql("drill", src=src, from=from, ...)
}

#' @rdname src_drill
#' @rdname src_tbls
#' @keywords internal
#' @export
db_explain.DrillConnection <- function(con, sql, ...) {


@@ 97,7 98,7 @@ db_explain.DrillConnection <- function(con, sql, ...) {
  return(paste(explanation[[1]], collapse = "\n"))
}

#' @rdname src_drill
#' @rdname src_tbls
#' @keywords internal
#' @export
db_query_fields.DrillConnection <- function(con, sql, ...) {


@@ 111,7 112,7 @@ db_query_fields.DrillConnection <- function(con, sql, ...) {

}

#' @rdname src_drill
#' @rdname src_tbls
#' @keywords internal
#' @export
db_data_type.DrillConnection <- function(con, fields, ...) {


@@ 132,7 133,7 @@ db_data_type.DrillConnection <- function(con, fields, ...) {
  vapply(fields, data_type, character(1))
}

#' @rdname src_drill
#' @rdname src_tbls
#' @keywords internal
#' @export
sql_translate_env.DrillConnection <- function(con) {

M README.Rmd => README.Rmd +7 -5
@@ 88,7 88,7 @@ options(width=120)
```{r message=FALSE}
library(sergeant)

ds <- src_drill("drill.local") 
ds <- src_drill("drillex")  # use localhost if running standalone on same system otherwise the host or IP of your Drill server
ds

db <- tbl(ds, "cp.`employee.json`") 


@@ 167,7 167,7 @@ library(sergeant)
# current verison
packageVersion("sergeant")

dc <- drill_connection("localhost") 
dc <- drill_connection("drillex") 

drill_active(dc)



@@ 225,9 225,11 @@ select columns[2] as city, columns[4] as lon, columns[3] as lat
```{r}
library(RJDBC)

con <- drill_jdbc("drill.local:2181", "jla") 
# or the following if running drill-embedded
# con <- drill_jdbc("localhost:31010", use_zk=FALSE)
# Use this if connecting to a cluster with zookeeper
# con <- drill_jdbc("drill-node:2181", "drillbits1") 

# Use the following if running drill-embedded
con <- drill_jdbc("localhost:31010", use_zk=FALSE)

drill_query(con, "SELECT * FROM cp.`employee.json`")


M README.md => README.md +128 -128
@@ 73,19 73,18 @@ devtools::install_github("hrbrmstr/sergeant")
``` r
library(sergeant)

ds <- src_drill("drill.local") 
ds <- src_drill("drillex")  # use localhost if running standalone on same system otherwise the host or IP of your Drill server
ds
#> src:  Drill 1.9.0 [drill.local:8047] [32GB direct memory]
#> tbls: INFORMATION_SCHEMA, cp.default, dfs.default, dfs.pq, dfs.root, dfs.tmp, sys
#> src:  DrillConnection
#> tbls: INFORMATION_SCHEMA, cp.default, dfs.d, dfs.default, dfs.h, dfs.natexp, dfs.p, dfs.root, dfs.tmp, sys

db <- tbl(ds, "cp.`employee.json`") 

# without `collect()`:
count(db, gender, marital_status)
#> Source:   query [?? x 3]
#> Database: Drill 1.9.0 [drill.local:8047] [32GB direct memory]
#> Groups: gender
#> 
#> # Source:   lazy query [?? x 3]
#> # Database: DrillConnection
#> # Groups:   gender
#>   marital_status gender     n
#>            <chr>  <chr> <int>
#> 1              S      F   297


@@ 102,9 101,8 @@ count(db, gender, marital_status)
# LIMIT 1000

count(db, gender, marital_status) %>% collect()
#> Source: local data frame [4 x 3]
#> Groups: gender [2]
#> 
#> # A tibble: 4 x 3
#> # Groups:   gender [2]
#>   marital_status gender     n
#> *          <chr>  <chr> <int>
#> 1              S      F   297


@@ 127,18 125,18 @@ group_by(db, position_title) %>%
  mutate(full_desc=ifelse(gender=="F", "Female", "Male")) %>% 
  collect() %>% 
  select(Title=position_title, Gender=full_desc, Count=n)
#> # A tibble: 30 × 3
#> # A tibble: 30 x 3
#>                     Title Gender Count
#> *                   <chr>  <chr> <int>
#> 1               President Female     1
#> 2      VP Country Manager   Male     3
#> 3      VP Country Manager Female     3
#> 4  VP Information Systems Female     1
#> 5      VP Human Resources Female     1
#> 6           Store Manager Female    13
#> 7              VP Finance   Male     1
#> 8           Store Manager   Male    11
#> 9            HQ Marketing Female     2
#>  *                  <chr>  <chr> <int>
#>  1              President Female     1
#>  2     VP Country Manager   Male     3
#>  3     VP Country Manager Female     3
#>  4 VP Information Systems Female     1
#>  5     VP Human Resources Female     1
#>  6          Store Manager Female    13
#>  7             VP Finance   Male     1
#>  8          Store Manager   Male    11
#>  9           HQ Marketing Female     2
#> 10 HQ Information Systems Female     4
#> # ... with 20 more rows



@@ 151,31 149,31 @@ group_by(db, position_title) %>%
#       GROUP BY  position_title ,  gender )  dcyuypuypb 

arrange(db, desc(employee_id)) %>% print(n=20)
#> Source:   query [?? x 16]
#> Database: Drill 1.9.0 [drill.local:8047] [32GB direct memory]
#> 
#>    store_id gender department_id birth_date supervisor_id last_name          position_title  hire_date
#>       <int>  <chr>         <int>     <date>         <int>     <chr>                   <chr>     <dttm>
#> 1         8      M            17 1914-02-02           949   Dittmar Store Permanent Stocker 1998-01-01
#> 2         8      F            17 1914-02-02           949   Jantzer Store Permanent Stocker 1998-01-01
#> 3         8      F            17 1914-02-02           949     Sweet Store Permanent Stocker 1998-01-01
#> 4         8      M            17 1914-02-02           949    Murphy Store Permanent Stocker 1998-01-01
#> 5         8      M            17 1914-02-02           948   Lindsay Store Permanent Stocker 1998-01-01
#> 6         8      M            17 1914-02-02           948     Burke Store Permanent Stocker 1998-01-01
#> 7         8      M            17 1914-02-02           948   Bunosky Store Permanent Stocker 1998-01-01
#> 8         8      F            17 1914-02-02           948   Cabrera Store Permanent Stocker 1998-01-01
#> 9         8      F            17 1914-02-02           948     Terry Store Permanent Stocker 1998-01-01
#> 10        8      F            17 1914-02-02           947      Case Store Permanent Stocker 1998-01-01
#> 11        6      F            18 1976-10-05            56     Horne Store Temporary Stocker 1997-01-01
#> 12        8      F            17 1914-02-02           947    Nutter Store Permanent Stocker 1998-01-01
#> 13        8      F            17 1914-02-02           947 Willeford Store Permanent Stocker 1998-01-01
#> 14        8      M            17 1914-02-02           947 Clendenen Store Permanent Stocker 1998-01-01
#> 15        8      F            17 1914-02-02           947      Wall Store Permanent Stocker 1998-01-01
#> 16        8      F            16 1914-02-02           949    Morrow Store Temporary Checker 1998-01-01
#> 17        8      M            16 1914-02-02           949    Wilson Store Temporary Checker 1998-01-01
#> 18        8      F            16 1914-02-02           949    Duncan Store Temporary Checker 1998-01-01
#> 19        8      F            16 1914-02-02           949  Anderson Store Temporary Checker 1998-01-01
#> 20        8      M            16 1914-02-02           949    Watson Store Temporary Checker 1998-01-01
#> # Source:     table<cp.`employee.json`> [?? x 16]
#> # Database:   DrillConnection
#> # Ordered by: desc(employee_id)
#>    store_id gender department_id birth_date supervisor_id  last_name          position_title  hire_date
#>       <int>  <chr>         <int>     <date>         <int>      <chr>                   <chr>     <dttm>
#>  1       18      F            18 1914-02-02          1140      Stand Store Temporary Stocker 1998-01-01
#>  2       18      M            18 1914-02-02          1140    Burnham Store Temporary Stocker 1998-01-01
#>  3       18      F            18 1914-02-02          1139  Doolittle Store Temporary Stocker 1998-01-01
#>  4       18      M            18 1914-02-02          1139     Pirnie Store Temporary Stocker 1998-01-01
#>  5       18      M            17 1914-02-02          1140     Younce Store Permanent Stocker 1998-01-01
#>  6       18      F            17 1914-02-02          1140    Biltoft Store Permanent Stocker 1998-01-01
#>  7       18      M            17 1914-02-02          1139   Detwiler Store Permanent Stocker 1998-01-01
#>  8       18      F            17 1914-02-02          1139     Ciruli Store Permanent Stocker 1998-01-01
#>  9       18      F            16 1914-02-02          1140     Bishop Store Temporary Checker 1998-01-01
#> 10       18      F            16 1914-02-02          1140  Cutwright Store Temporary Checker 1998-01-01
#> 11       18      F            16 1914-02-02          1139   Anderson Store Temporary Checker 1998-01-01
#> 12       18      F            16 1914-02-02          1139  Swartwood Store Temporary Checker 1998-01-01
#> 13       18      M            15 1914-02-02          1140 Curtsinger Store Permanent Checker 1998-01-01
#> 14       18      F            15 1914-02-02          1140      Quick Store Permanent Checker 1998-01-01
#> 15       18      M            15 1914-02-02          1139      Souza Store Permanent Checker 1998-01-01
#> 16       18      M            15 1914-02-02          1139   Compagno Store Permanent Checker 1998-01-01
#> 17       18      M            11 1961-09-24          1139  Jaramillo  Store Shift Supervisor 1998-01-01
#> 18       18      M            11 1972-05-12            17     Belsey Store Assistant Manager 1998-01-01
#> 19       12      M            18 1914-02-02          1069    Eichorn Store Temporary Stocker 1998-01-01
#> 20       12      F            18 1914-02-02          1069  Geiermann Store Temporary Stocker 1998-01-01
#> # ... with more rows, and 8 more variables: management_role <chr>, salary <dbl>, marital_status <chr>, full_name <chr>,
#> #   employee_id <int>, education_level <chr>, first_name <chr>, position_id <int>



@@ 194,18 192,18 @@ mutate(db, position_title=tolower(position_title)) %>%
  group_by(supervisor_id) %>% 
  summarise(underlings_count=n()) %>% 
  collect()
#> # A tibble: 112 × 2
#> # A tibble: 112 x 2
#>    supervisor_id underlings_count
#> *          <int>            <int>
#> 1              0                1
#> 2              1                7
#> 3              5                9
#> 4              4                2
#> 5              2                3
#> 6             20                2
#> 7             21                4
#> 8             22                7
#> 9              6                4
#>  *         <int>            <int>
#>  1             0                1
#>  2             1                7
#>  3             5                9
#>  4             4                2
#>  5             2                3
#>  6            20                2
#>  7            21                4
#>  8            22                7
#>  9             6                4
#> 10            36                2
#> # ... with 102 more rows



@@ 227,18 225,18 @@ library(sergeant)

# current verison
packageVersion("sergeant")
#> [1] '0.3.1.9000'
#> [1] '0.3.2'

dc <- drill_connection("localhost") 
dc <- drill_connection("drillex") 

drill_active(dc)
#> [1] TRUE

drill_version(dc)
#> [1] "1.9.0"
#> [1] "1.10.0"

drill_storage(dc)$name
#> [1] "cp"    "dfs"   "hbase" "hdfs"  "hive"  "kudu"  "mongo" "my"    "s3"
#> [1] "cp"    "dfs"   "hbase" "hive"  "kudu"  "mongo" "s3"
```

Working with the built-in JSON data sets:


@@ 264,18 262,18 @@ drill_query(dc, "SELECT * FROM cp.`employee.json` limit 100")
#>   first_name = col_character(),
#>   position_id = col_integer()
#> )
#> # A tibble: 100 × 16
#> # A tibble: 100 x 16
#>    store_id gender department_id birth_date supervisor_id last_name         position_title  hire_date   management_role
#> *     <int>  <chr>         <int>     <date>         <int>     <chr>                  <chr>     <dttm>             <chr>
#> 1         0      F             1 1961-08-26             0    Nowmer              President 1994-12-01 Senior Management
#> 2         0      M             1 1915-07-03             1   Whelply     VP Country Manager 1994-12-01 Senior Management
#> 3         0      M             1 1969-06-20             1    Spence     VP Country Manager 1998-01-01 Senior Management
#> 4         0      F             1 1951-05-10             1 Gutierrez     VP Country Manager 1998-01-01 Senior Management
#> 5         0      F             2 1942-10-08             1   Damstra VP Information Systems 1994-12-01 Senior Management
#> 6         0      F             3 1949-03-27             1  Kanagaki     VP Human Resources 1994-12-01 Senior Management
#> 7         9      F            11 1922-08-10             5   Brunner          Store Manager 1998-01-01  Store Management
#> 8        21      F            11 1979-06-23             5  Blumberg          Store Manager 1998-01-01  Store Management
#> 9         0      M             5 1949-08-26             1     Stanz             VP Finance 1994-12-01 Senior Management
#>  *    <int>  <chr>         <int>     <date>         <int>     <chr>                  <chr>     <dttm>             <chr>
#>  1        0      F             1 1961-08-26             0    Nowmer              President 1994-12-01 Senior Management
#>  2        0      M             1 1915-07-03             1   Whelply     VP Country Manager 1994-12-01 Senior Management
#>  3        0      M             1 1969-06-20             1    Spence     VP Country Manager 1998-01-01 Senior Management
#>  4        0      F             1 1951-05-10             1 Gutierrez     VP Country Manager 1998-01-01 Senior Management
#>  5        0      F             2 1942-10-08             1   Damstra VP Information Systems 1994-12-01 Senior Management
#>  6        0      F             3 1949-03-27             1  Kanagaki     VP Human Resources 1994-12-01 Senior Management
#>  7        9      F            11 1922-08-10             5   Brunner          Store Manager 1998-01-01  Store Management
#>  8       21      F            11 1979-06-23             5  Blumberg          Store Manager 1998-01-01  Store Management
#>  9        0      M             5 1949-08-26             1     Stanz             VP Finance 1994-12-01 Senior Management
#> 10        1      M            11 1967-06-20             5  Murraiin          Store Manager 1998-01-01  Store Management
#> # ... with 90 more rows, and 7 more variables: salary <dbl>, marital_status <chr>, full_name <chr>, employee_id <int>,
#> #   education_level <chr>, first_name <chr>, position_id <int>


@@ 285,38 283,38 @@ drill_query(dc, "SELECT COUNT(gender) AS gender FROM cp.`employee.json` GROUP BY
#> cols(
#>   gender = col_integer()
#> )
#> # A tibble: 2 × 1
#> # A tibble: 2 x 1
#>   gender
#> *  <int>
#> 1    601
#> 2    554

drill_options(dc)
#> # A tibble: 105 × 4
#> # A tibble: 113 x 4
#>                                              name value   type    kind
#> *                                           <chr> <chr>  <chr>   <chr>
#> 1                  planner.enable_hash_single_key  TRUE SYSTEM BOOLEAN
#> 2              planner.enable_limit0_optimization FALSE SYSTEM BOOLEAN
#> 3               store.json.read_numbers_as_double FALSE SYSTEM BOOLEAN
#> 4                 planner.enable_constant_folding  TRUE SYSTEM BOOLEAN
#> 5                       store.json.extended_types FALSE SYSTEM BOOLEAN
#> 6    planner.memory.non_blocking_operators_memory    64 SYSTEM    LONG
#> 7                   planner.enable_multiphase_agg  TRUE SYSTEM BOOLEAN
#> 8  planner.filter.max_selectivity_estimate_factor     1 SYSTEM  DOUBLE
#> 9                     planner.enable_mux_exchange  TRUE SYSTEM BOOLEAN
#> 10                   store.parquet.use_new_reader FALSE SYSTEM BOOLEAN
#> # ... with 95 more rows
#>  *                                          <chr> <chr>  <chr>   <chr>
#>  1                 planner.enable_hash_single_key  TRUE SYSTEM BOOLEAN
#>  2      store.parquet.reader.pagereader.queuesize     2 SYSTEM    LONG
#>  3             planner.enable_limit0_optimization FALSE SYSTEM BOOLEAN
#>  4              store.json.read_numbers_as_double FALSE SYSTEM BOOLEAN
#>  5                planner.enable_constant_folding  TRUE SYSTEM BOOLEAN
#>  6                      store.json.extended_types FALSE SYSTEM BOOLEAN
#>  7   planner.memory.non_blocking_operators_memory    64 SYSTEM    LONG
#>  8                  planner.enable_multiphase_agg  TRUE SYSTEM BOOLEAN
#>  9                  exec.query_profile.debug_mode FALSE SYSTEM BOOLEAN
#> 10 planner.filter.max_selectivity_estimate_factor     1 SYSTEM  DOUBLE
#> # ... with 103 more rows

drill_options(dc, "json")
#> # A tibble: 7 × 4
#> # A tibble: 7 x 4
#>                                                    name value   type    kind
#>                                                   <chr> <chr>  <chr>   <chr>
#> 1                     store.json.read_numbers_as_double FALSE SYSTEM BOOLEAN
#> 2                             store.json.extended_types FALSE SYSTEM BOOLEAN
#> 3                              store.json.writer.uglify  TRUE SYSTEM BOOLEAN
#> 4                store.json.reader.skip_invalid_records  TRUE SYSTEM BOOLEAN
#> 5 store.json.reader.print_skipped_invalid_record_number  TRUE SYSTEM BOOLEAN
#> 6                              store.json.all_text_mode  TRUE SYSTEM BOOLEAN
#> 3                              store.json.writer.uglify FALSE SYSTEM BOOLEAN
#> 4                store.json.reader.skip_invalid_records FALSE SYSTEM BOOLEAN
#> 5 store.json.reader.print_skipped_invalid_record_number FALSE SYSTEM BOOLEAN
#> 6                              store.json.all_text_mode FALSE SYSTEM BOOLEAN
#> 7                    store.json.writer.skip_null_fields  TRUE SYSTEM BOOLEAN
```



@@ 332,7 330,7 @@ drill_query(dc, "SELECT * FROM dfs.`/usr/local/drill/sample-data/nation.parquet`
#>   N_NATIONKEY = col_integer(),
#>   N_REGIONKEY = col_integer()
#> )
#> # A tibble: 5 × 4
#> # A tibble: 5 x 4
#>              N_COMMENT    N_NAME N_NATIONKEY N_REGIONKEY
#> *                <chr>     <chr>       <int>       <int>
#> 1  haggle. carefully f   ALGERIA           0           0


@@ 354,14 352,14 @@ drill_query(dc, "SELECT * FROM dfs.`/usr/local/drill/sample-data/nations*/nation
#>   N_REGIONKEY = col_integer(),
#>   dir0 = col_character()
#> )
#> # A tibble: 5 × 5
#> # A tibble: 5 x 5
#>              N_COMMENT    N_NAME N_NATIONKEY N_REGIONKEY      dir0
#> *                <chr>     <chr>       <int>       <int>     <chr>
#> 1  haggle. carefully f   ALGERIA           0           0 nationsMF
#> 2 al foxes promise sly ARGENTINA           1           1 nationsMF
#> 3 y alongside of the p    BRAZIL           2           1 nationsMF
#> 4 eas hang ironic, sil    CANADA           3           1 nationsMF
#> 5 y above the carefull     EGYPT           4           4 nationsMF
#> 1  haggle. carefully f   ALGERIA           0           0 nationsSF
#> 2 al foxes promise sly ARGENTINA           1           1 nationsSF
#> 3 y alongside of the p    BRAZIL           2           1 nationsSF
#> 4 eas hang ironic, sil    CANADA           3           1 nationsSF
#> 5 y above the carefull     EGYPT           4           4 nationsSF
```

### A preview of the built-in support for spatial ops


@@ 388,7 386,7 @@ select columns[2] as city, columns[4] as lon, columns[3] as lat
#>   lon = col_double(),
#>   lat = col_double()
#> )
#> # A tibble: 7 × 3
#> # A tibble: 7 x 3
#>          city       lon      lat
#> *       <chr>     <dbl>    <dbl>
#> 1     Burbank -121.9316 37.32328


@@ 406,45 404,47 @@ select columns[2] as city, columns[4] as lon, columns[3] as lat
library(RJDBC)
#> Loading required package: rJava

con <- drill_jdbc("drill.local:2181", "jla") 
#> Using [jdbc:drill:zk=drill.local:2181/drill/jla]...
# or the following if running drill-embedded
# con <- drill_jdbc("localhost:31010", use_zk=FALSE)
# Use this if connecting to a cluster with zookeeper
# con <- drill_jdbc("drill-node:2181", "drillbits1") 

# Use the following if running drill-embedded
con <- drill_jdbc("localhost:31010", use_zk=FALSE)
#> Using [jdbc:drill:drillbit=localhost:31010]...

drill_query(con, "SELECT * FROM cp.`employee.json`")
#> # A tibble: 1,155 × 16
#> # A tibble: 1,155 x 16
#>    employee_id         full_name first_name last_name position_id         position_title store_id department_id
#> *        <chr>             <chr>      <chr>     <chr>       <chr>                  <chr>    <chr>         <chr>
#> 1            1      Sheri Nowmer      Sheri    Nowmer           1              President        0             1
#> 2            2   Derrick Whelply    Derrick   Whelply           2     VP Country Manager        0             1
#> 3            4    Michael Spence    Michael    Spence           2     VP Country Manager        0             1
#> 4            5    Maya Gutierrez       Maya Gutierrez           2     VP Country Manager        0             1
#> 5            6   Roberta Damstra    Roberta   Damstra           3 VP Information Systems        0             2
#> 6            7  Rebecca Kanagaki    Rebecca  Kanagaki           4     VP Human Resources        0             3
#> 7            8       Kim Brunner        Kim   Brunner          11          Store Manager        9            11
#> 8            9   Brenda Blumberg     Brenda  Blumberg          11          Store Manager       21            11
#> 9           10      Darren Stanz     Darren     Stanz           5             VP Finance        0             5
#>  *       <dbl>             <chr>      <chr>     <chr>       <dbl>                  <chr>    <dbl>         <dbl>
#>  1           1      Sheri Nowmer      Sheri    Nowmer           1              President        0             1
#>  2           2   Derrick Whelply    Derrick   Whelply           2     VP Country Manager        0             1
#>  3           4    Michael Spence    Michael    Spence           2     VP Country Manager        0             1
#>  4           5    Maya Gutierrez       Maya Gutierrez           2     VP Country Manager        0             1
#>  5           6   Roberta Damstra    Roberta   Damstra           3 VP Information Systems        0             2
#>  6           7  Rebecca Kanagaki    Rebecca  Kanagaki           4     VP Human Resources        0             3
#>  7           8       Kim Brunner        Kim   Brunner          11          Store Manager        9            11
#>  8           9   Brenda Blumberg     Brenda  Blumberg          11          Store Manager       21            11
#>  9          10      Darren Stanz     Darren     Stanz           5             VP Finance        0             5
#> 10          11 Jonathan Murraiin   Jonathan  Murraiin          11          Store Manager        1            11
#> # ... with 1,145 more rows, and 8 more variables: birth_date <chr>, hire_date <chr>, salary <chr>, supervisor_id <chr>,
#> # ... with 1,145 more rows, and 8 more variables: birth_date <chr>, hire_date <chr>, salary <dbl>, supervisor_id <dbl>,
#> #   education_level <chr>, marital_status <chr>, gender <chr>, management_role <chr>

# but it can work via JDBC function calls, too
dbGetQuery(con, "SELECT * FROM cp.`employee.json`") %>% 
  tibble::as_tibble()
#> # A tibble: 1,155 × 16
#> # A tibble: 1,155 x 16
#>    employee_id         full_name first_name last_name position_id         position_title store_id department_id
#> *        <chr>             <chr>      <chr>     <chr>       <chr>                  <chr>    <chr>         <chr>
#> 1            1      Sheri Nowmer      Sheri    Nowmer           1              President        0             1
#> 2            2   Derrick Whelply    Derrick   Whelply           2     VP Country Manager        0             1
#> 3            4    Michael Spence    Michael    Spence           2     VP Country Manager        0             1
#> 4            5    Maya Gutierrez       Maya Gutierrez           2     VP Country Manager        0             1
#> 5            6   Roberta Damstra    Roberta   Damstra           3 VP Information Systems        0             2
#> 6            7  Rebecca Kanagaki    Rebecca  Kanagaki           4     VP Human Resources        0             3
#> 7            8       Kim Brunner        Kim   Brunner          11          Store Manager        9            11
#> 8            9   Brenda Blumberg     Brenda  Blumberg          11          Store Manager       21            11
#> 9           10      Darren Stanz     Darren     Stanz           5             VP Finance        0             5
#>  *       <dbl>             <chr>      <chr>     <chr>       <dbl>                  <chr>    <dbl>         <dbl>
#>  1           1      Sheri Nowmer      Sheri    Nowmer           1              President        0             1
#>  2           2   Derrick Whelply    Derrick   Whelply           2     VP Country Manager        0             1
#>  3           4    Michael Spence    Michael    Spence           2     VP Country Manager        0             1
#>  4           5    Maya Gutierrez       Maya Gutierrez           2     VP Country Manager        0             1
#>  5           6   Roberta Damstra    Roberta   Damstra           3 VP Information Systems        0             2
#>  6           7  Rebecca Kanagaki    Rebecca  Kanagaki           4     VP Human Resources        0             3
#>  7           8       Kim Brunner        Kim   Brunner          11          Store Manager        9            11
#>  8           9   Brenda Blumberg     Brenda  Blumberg          11          Store Manager       21            11
#>  9          10      Darren Stanz     Darren     Stanz           5             VP Finance        0             5
#> 10          11 Jonathan Murraiin   Jonathan  Murraiin          11          Store Manager        1            11
#> # ... with 1,145 more rows, and 8 more variables: birth_date <chr>, hire_date <chr>, salary <chr>, supervisor_id <chr>,
#> # ... with 1,145 more rows, and 8 more variables: birth_date <chr>, hire_date <chr>, salary <dbl>, supervisor_id <dbl>,
#> #   education_level <chr>, marital_status <chr>, gender <chr>, management_role <chr>
```



@@ 460,7 460,7 @@ library(testthat)
#>     matches

date()
#> [1] "Mon Jan 23 10:36:57 2017"
#> [1] "Tue May 30 17:28:25 2017"

test_dir("tests/")
#> testthat results ========================================================================================================

M man/src_drill.Rd => man/src_drill.Rd +2 -29
@@ 2,37 2,13 @@
% Please edit documentation in R/dplyr.r
\name{src_drill}
\alias{src_drill}
\alias{src_tbls.src_drill}
\alias{db_desc.src_drill}
\alias{sql_escape_ident.DrillConnection}
\alias{copy_to.src_drill}
\alias{tbl.src_drill}
\alias{db_explain.DrillConnection}
\alias{db_query_fields.DrillConnection}
\alias{db_data_type.DrillConnection}
\alias{sql_translate_env.DrillConnection}
\title{Connect to Drill (using \code{dplyr}).}
\title{Connect to Drill (dplyr)}
\usage{
src_drill(host = Sys.getenv("DRILL_HOST", "localhost"),
  port = as.integer(Sys.getenv("DRILL_PORT", 8047L)), ssl = FALSE)

\method{src_tbls}{src_drill}(x)

\method{db_desc}{src_drill}(x)

\method{sql_escape_ident}{DrillConnection}(con, x)

\method{copy_to}{src_drill}(dest, df)

\method{tbl}{src_drill}(src, from, ...)

\method{db_explain}{DrillConnection}(con, sql, ...)

\method{db_query_fields}{DrillConnection}(con, sql, ...)

\method{db_data_type}{DrillConnection}(con, fields, ...)

\method{sql_translate_env}{DrillConnection}(con)
}
\arguments{
\item{host}{Drill host (will pick up the value from \code{DRILL_HOST} env var)}


@@ 54,9 30,7 @@ also been made available to the \code{dplyr} interface. If you have custom Drill
SQL functions that need to be implemented please file an issue on GitHub.
}
\note{
This is a DBI wrapper around the Drill REST API.

TODO username/password support
This is a DBI wrapper around the Drill REST API. TODO username/password support
}
\examples{
\dontrun{


@@ 80,4 54,3 @@ select(emp, full_name) \%>\%
                rpdw = rpad_with(full_name, 20L, "*"))
}
}
\keyword{internal}

A man/src_tbls.Rd => man/src_tbls.Rd +36 -0
@@ 0,0 1,36 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr.r
\name{src_tbls.src_drill}
\alias{src_tbls.src_drill}
\alias{db_desc.src_drill}
\alias{sql_escape_ident.DrillConnection}
\alias{copy_to.src_drill}
\alias{db_explain.DrillConnection}
\alias{db_query_fields.DrillConnection}
\alias{db_data_type.DrillConnection}
\alias{sql_translate_env.DrillConnection}
\title{src tbls}
\usage{
\method{src_tbls}{src_drill}(x)

\method{db_desc}{src_drill}(x)

\method{sql_escape_ident}{DrillConnection}(con, x)

\method{copy_to}{src_drill}(dest, df)

\method{db_explain}{DrillConnection}(con, sql, ...)

\method{db_query_fields}{DrillConnection}(con, sql, ...)

\method{db_data_type}{DrillConnection}(con, fields, ...)

\method{sql_translate_env}{DrillConnection}(con)
}
\arguments{
\item{x}{x}
}
\description{
"SHOW DATABASES"
}
\keyword{internal}

M sergeant.Rproj => sergeant.Rproj +1 -0
@@ 3,6 3,7 @@ Version: 1.0
RestoreWorkspace: No
SaveWorkspace: No
AlwaysSaveHistory: Default
QuitChildProcessesOnExit: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes

M tests/testthat/test-sergeant.R => tests/testthat/test-sergeant.R +4 -3
@@ 4,7 4,7 @@ test_that("we can do something", {
  testthat::skip_on_cran()
  testthat::skip_on_travis()

  ds <- src_drill("drill1")
  ds <- src_drill("drillex")
  db <- tbl(ds, "cp.`employee.json`")

  count(db, gender, marital_status) %>%


@@ 12,10 12,11 @@ test_that("we can do something", {

  expect_that(res, is_a("data.frame"))

  dc <- drill_connection("drill1")
  dc <- drill_connection("drillex")
  expect_equal(drill_active(dc), TRUE)

  con <- drill_jdbc("drill1:2181", "jla")
  #con <- drill_jdbc("drill1:2181", "jla")
  con <- drill_jdbc("localhost:31010", use_zk=FALSE)
  res <- drill_query(con, "SELECT * FROM cp.`employee.json`")
  expect_that(res, is_a("data.frame"))