~hrbrmstr/sergeant

2adc808b9d6e65fac00b764950c1dc6b16b3d7fe — Bob Rudis 4 years ago 0101530
enabled use of drill_jdbc() connections in drill_query()
added an example of using Drill in knitr SQL chunks
minor tweaks (nearling a CRAN release)
8 files changed, 176 insertions(+), 36 deletions(-)

M NEWS.md
M R/jdbc.r
M R/query.r
M README.Rmd
M README.md
M man/drill_jdbc.Rd
M man/drill_query.Rd
M man/drill_uplift.Rd
M NEWS.md => NEWS.md +6 -0
@@ 1,5 1,11 @@
# sergeant 0.1.2.9000

* can pass RJDBC connections made with `drill_jdbc()` to `drill_query()`
* finally enaled `nodes` parameter to be a multi-element character vector as it said
  in the function description

# sergeant 0.1.2.9000

* support embedded drill JDBC connection

# sergeant 0.1.1.9000

M R/jdbc.r => R/jdbc.r +5 -0
@@ 16,6 16,9 @@
#' @export
#' @examples \dontrun{
#' con <- drill_jdbc("localhost:2181", "main")
#' drill_query(con, "SELECT * FROM cp.`employee.json`")
#'
#' # you can also use the connection with RJDBC calls:
#' dbGetQuery(con, "SELECT * FROM cp.`employee.json`")
#'
#' # for local/embedded mode with default configuration info


@@ 29,6 32,8 @@ drill_jdbc <- function(nodes="localhost:2181", cluster_id=NULL, schema=NULL, use
  conn_type <- "drillbit"
  if (use_zk) conn_type <- "zk"

  if (length(nodes) > 1) nodes <- paste0(nodes, collapse=",")

  conn_str <- sprintf("jdbc:drill:%s=%s", conn_type, nodes)

  if (!is.null(cluster_id)) conn_str <- sprintf("%s%s", conn_str, sprintf("/drill/%s", cluster_id))

M R/query.r => R/query.r +41 -24
@@ 1,8 1,17 @@
#' Submit a query and return results
#'
#' @param drill_con drill server connection object setup by \code{drill_connection()}
#' This function can handle REST API connections or JDBC connections. There is a benefit to
#' calling this function for JDBC connections vs a straight call to \code{dbGetQuery()} in
#' that the function result is a `tbl_df` vs a plain \code{data.frame} so you get better
#' default printing (which can be helpful if you accidentally execute a query and the result
#' set is huge).
#'
#' @param drill_con drill server connection object setup by \code{drill_connection()} or
#'                  \code{drill_jdbc()})
#' @param query query to run
#' @param uplift automatically run \code{drill_uplift()} on the result? (default: \code{TRUE})
#' @param uplift automatically run \code{drill_uplift()} on the result? (default: \code{TRUE},
#'               ignored if \code{drill_con} is a \code{JDBCConnection} created by
#'               \code{drill_jdbc()})
#' @param .progress if \code{TRUE} (default if in an interactive session) then ask
#'                  \code{httr::POST} to display a progress bar
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}


@@ 13,41 22,49 @@
#' }
drill_query <- function(drill_con, query, uplift=TRUE, .progress=interactive()) {

  drill_server <- make_server(drill_con)
  if (inherits(drill_con, "JDBCConnection")) {

    dplyr::tbl_df(RJDBC::dbGetQuery(drill_con, query) )

  if (.progress) {
    res <- httr::POST(sprintf("%s/query.json", drill_server),
                      encode="json",
                      progress(),
                      body=list(queryType="SQL",
                                query=query))
  } else {
    res <- httr::POST(sprintf("%s/query.json", drill_server),
                      encode="json",
                      body=list(queryType="SQL",
                                query=query))
  }

  out <- jsonlite::fromJSON(httr::content(res, as="text", encoding="UTF-8"), flatten=TRUE)
    drill_server <- make_server(drill_con)

    if (.progress) {
      res <- httr::POST(sprintf("%s/query.json", drill_server),
                        encode="json",
                        progress(),
                        body=list(queryType="SQL",
                                  query=query))
    } else {
      res <- httr::POST(sprintf("%s/query.json", drill_server),
                        encode="json",
                        body=list(queryType="SQL",
                                  query=query))
    }

    out <- jsonlite::fromJSON(httr::content(res, as="text", encoding="UTF-8"), flatten=TRUE)

    if ("errorMessage" %in% names(out)) {
      message(sprintf("Query ==> %s\n%s\n", gsub("[\r\n]", " ", query), out$errorMessage))
      invisible(out)
    } else {
      if (uplift) out <- drill_uplift(out)
      out
    }

  if ("errorMessage" %in% names(out)) {
    message(sprintf("Query ==> %s\n%s\n", gsub("[\r\n]", " ", query), out$errorMessage))
    invisible(out)
  } else {
    if (uplift) out <- drill_uplift(out)
    out
  }

}

#' Turn a columnar query results into a type-converted tbl
#' Turn columnar query results into a type-converted tbl
#'
#' If you know the result of `drill_query()` will be a data frame, then
#' you can pipe it to this function to pull out `rows` and automatically
#' type-convert it.
#'
#' Not really intended to be called directly, but useful if you ran \code{drill_query()}
#' without `uplift=TRUE` but want to then convert the structure.
#' Not really intended to be called directly, but useful if you accidentally ran
#' \code{drill_query()} without `uplift=TRUE` but want to then convert the structure.
#'
#' @param query_result the result of a call to `drill_query()`
#' @references \href{https://drill.apache.org/docs/}{Drill documentation}

M README.Rmd => README.Rmd +32 -1
@@ 46,7 46,7 @@ The following functions are implemented:
- `drill_jdbc`:	Connect to Drill using JDBC _(driver included with package until CRAN release)_
- `drill_metrics`:	Get the current memory metrics
- `drill_options`:	List the name, default, and data type of the system and session options
- `drill_profile`:	Get the profile of the query that has the given queryid
- `drill_profile`:	Get the profile of the query that has the given query id
- `drill_profiles`:	Get the profiles of running and completed queries
- `drill_query`:	Submit a query and return results
- `drill_set`:	Set Drill SYSTEM or SESSION options


@@ 140,10 140,41 @@ library(RJDBC)

con <- drill_jdbc("drill.local:2181", "jla")

drill_query(con, "SELECT * FROM cp.`employee.json`")

# but it can work via JDBC function calls, too
dbGetQuery(con, "SELECT * FROM cp.`employee.json`") %>% 
  tibble::as_tibble()
```

### Use in knitr SQL code chunks

If you install `knit` via GitHub (`devtools::install_github("yihui/knitr")) you can use the `sql` chunk code type with `drill_jdbc()` connections:

    ---
    output: html_document
    ---

    ```{r libraries, message=FALSE}
    library(sergeant)
    library(DBI)
    library(RJDBC)
    ```

    ## Setup JDBC connection

    ```{r conn_setup}
    dc <- drill_jdbc("drill.local:2181", "jla")
    ```

    ## Test out a query

    ```{sql, connection=dc}
    SELECT * FROM cp.`employee.json`
    ```

Which is (IMO) _way_ better than using the Drill consoles, the Drill Web UI query box or SQLWorkbench.

### Test Results

```{r}

M README.md => README.md +76 -5
@@ 34,7 34,7 @@ The following functions are implemented:
-   `drill_jdbc`: Connect to Drill using JDBC *(driver included with package until CRAN release)*
-   `drill_metrics`: Get the current memory metrics
-   `drill_options`: List the name, default, and data type of the system and session options
-   `drill_profile`: Get the profile of the query that has the given queryid
-   `drill_profile`: Get the profile of the query that has the given query id
-   `drill_profiles`: Get the profiles of running and completed queries
-   `drill_query`: Submit a query and return results
-   `drill_set`: Set Drill SYSTEM or SESSION options


@@ 152,7 152,7 @@ drill_options(dc, "json")
#> 3                              store.json.writer.uglify FALSE SYSTEM BOOLEAN
#> 4                store.json.reader.skip_invalid_records  TRUE SYSTEM BOOLEAN
#> 5 store.json.reader.print_skipped_invalid_record_number  TRUE SYSTEM BOOLEAN
#> 6                              store.json.all_text_mode FALSE SYSTEM BOOLEAN
#> 6                              store.json.all_text_mode  TRUE SYSTEM BOOLEAN
#> 7                    store.json.writer.skip_null_fields  TRUE SYSTEM BOOLEAN
```



@@ 246,11 246,29 @@ library(RJDBC)
con <- drill_jdbc("drill.local:2181", "jla")
#> Using [jdbc:drill:zk=drill.local:2181/drill/jla]...

drill_query(con, "SELECT * FROM cp.`employee.json`")
#> # A tibble: 1,155 × 16
#>    employee_id         full_name first_name last_name position_id         position_title store_id department_id
#> *        <chr>             <chr>      <chr>     <chr>       <chr>                  <chr>    <chr>         <chr>
#> 1            1      Sheri Nowmer      Sheri    Nowmer           1              President        0             1
#> 2            2   Derrick Whelply    Derrick   Whelply           2     VP Country Manager        0             1
#> 3            4    Michael Spence    Michael    Spence           2     VP Country Manager        0             1
#> 4            5    Maya Gutierrez       Maya Gutierrez           2     VP Country Manager        0             1
#> 5            6   Roberta Damstra    Roberta   Damstra           3 VP Information Systems        0             2
#> 6            7  Rebecca Kanagaki    Rebecca  Kanagaki           4     VP Human Resources        0             3
#> 7            8       Kim Brunner        Kim   Brunner          11          Store Manager        9            11
#> 8            9   Brenda Blumberg     Brenda  Blumberg          11          Store Manager       21            11
#> 9           10      Darren Stanz     Darren     Stanz           5             VP Finance        0             5
#> 10          11 Jonathan Murraiin   Jonathan  Murraiin          11          Store Manager        1            11
#> # ... with 1,145 more rows, and 8 more variables: birth_date <chr>, hire_date <chr>, salary <chr>, supervisor_id <chr>,
#> #   education_level <chr>, marital_status <chr>, gender <chr>, management_role <chr>

# but it can work via JDBC function calls, too
dbGetQuery(con, "SELECT * FROM cp.`employee.json`") %>% 
  tibble::as_tibble()
#> # A tibble: 1,155 × 16
#>    employee_id         full_name first_name last_name position_id         position_title store_id department_id
#> *        <dbl>             <chr>      <chr>     <chr>       <dbl>                  <chr>    <dbl>         <dbl>
#> *        <chr>             <chr>      <chr>     <chr>       <chr>                  <chr>    <chr>         <chr>
#> 1            1      Sheri Nowmer      Sheri    Nowmer           1              President        0             1
#> 2            2   Derrick Whelply    Derrick   Whelply           2     VP Country Manager        0             1
#> 3            4    Michael Spence    Michael    Spence           2     VP Country Manager        0             1


@@ 261,10 279,63 @@ dbGetQuery(con, "SELECT * FROM cp.`employee.json`") %>%
#> 8            9   Brenda Blumberg     Brenda  Blumberg          11          Store Manager       21            11
#> 9           10      Darren Stanz     Darren     Stanz           5             VP Finance        0             5
#> 10          11 Jonathan Murraiin   Jonathan  Murraiin          11          Store Manager        1            11
#> # ... with 1,145 more rows, and 8 more variables: birth_date <chr>, hire_date <chr>, salary <dbl>, supervisor_id <dbl>,
#> # ... with 1,145 more rows, and 8 more variables: birth_date <chr>, hire_date <chr>, salary <chr>, supervisor_id <chr>,
#> #   education_level <chr>, marital_status <chr>, gender <chr>, management_role <chr>
```

### Use in knitr SQL code chunks

If you install `knit` via GitHub (`devtools::install_github("yihui/knitr")) you can use the`sql`chunk code type with`drill\_jdbc()\` connections:

    ---
    output: html_document
    ---


    ```r
    library(sergeant)
    library(DBI)
    library(RJDBC)
    ```

    ## Setup JDBC connection


    ```r
    dc <- drill_jdbc("drill.local:2181", "jla")
    #> Using [jdbc:drill:zk=drill.local:2181/drill/jla]...
    ```

    ## Test out a query


    ```sql
    SELECT * FROM cp.`employee.json`
    ```


    <div class="knitsql-table">


    Table: Displaying records 1 - 10

    employee_id   full_name           first_name   last_name   position_id   position_title           store_id   department_id   birth_date   hire_date               salary       supervisor_id   education_level    marital_status   gender   management_role   
    ------------  ------------------  -----------  ----------  ------------  -----------------------  ---------  --------------  -----------  ----------------------  -----------  --------------  -----------------  ---------------  -------  ------------------
    1             Sheri Nowmer        Sheri        Nowmer      1             President                0          1               1961-08-26   1994-12-01 00:00:00.0   80000.0000   0               Graduate Degree    S                F        Senior Management 
    2             Derrick Whelply     Derrick      Whelply     2             VP Country Manager       0          1               1915-07-03   1994-12-01 00:00:00.0   40000.0000   1               Graduate Degree    M                M        Senior Management 
    4             Michael Spence      Michael      Spence      2             VP Country Manager       0          1               1969-06-20   1998-01-01 00:00:00.0   40000.0000   1               Graduate Degree    S                M        Senior Management 
    5             Maya Gutierrez      Maya         Gutierrez   2             VP Country Manager       0          1               1951-05-10   1998-01-01 00:00:00.0   35000.0000   1               Bachelors Degree   M                F        Senior Management 
    6             Roberta Damstra     Roberta      Damstra     3             VP Information Systems   0          2               1942-10-08   1994-12-01 00:00:00.0   25000.0000   1               Bachelors Degree   M                F        Senior Management 
    7             Rebecca Kanagaki    Rebecca      Kanagaki    4             VP Human Resources       0          3               1949-03-27   1994-12-01 00:00:00.0   15000.0000   1               Bachelors Degree   M                F        Senior Management 
    8             Kim Brunner         Kim          Brunner     11            Store Manager            9          11              1922-08-10   1998-01-01 00:00:00.0   10000.0000   5               Bachelors Degree   S                F        Store Management  
    9             Brenda Blumberg     Brenda       Blumberg    11            Store Manager            21         11              1979-06-23   1998-01-01 00:00:00.0   17000.0000   5               Graduate Degree    M                F        Store Management  
    10            Darren Stanz        Darren       Stanz       5             VP Finance               0          5               1949-08-26   1994-12-01 00:00:00.0   50000.0000   1               Partial College    M                M        Senior Management 
    11            Jonathan Murraiin   Jonathan     Murraiin    11            Store Manager            1          11              1967-06-20   1998-01-01 00:00:00.0   15000.0000   5               Graduate Degree    S                M        Store Management  

    </div>

Which is (IMO) *way* better than using the Drill consoles, the Drill Web UI query box or SQLWorkbench.

### Test Results

``` r


@@ 272,7 343,7 @@ library(sergeant)
library(testthat)

date()
#> [1] "Mon Dec 12 06:40:23 2016"
#> [1] "Thu Dec 15 12:51:49 2016"

test_dir("tests/")
#> testthat results ========================================================================================================

M man/drill_jdbc.Rd => man/drill_jdbc.Rd +3 -0
@@ 30,6 30,9 @@ if you use the JDBC connection.
\examples{
\dontrun{
con <- drill_jdbc("localhost:2181", "main")
drill_query(con, "SELECT * FROM cp.`employee.json`")

# you can also use the connection with RJDBC calls:
dbGetQuery(con, "SELECT * FROM cp.`employee.json`")

# for local/embedded mode with default configuration info

M man/drill_query.Rd => man/drill_query.Rd +10 -3
@@ 7,17 7,24 @@
drill_query(drill_con, query, uplift = TRUE, .progress = interactive())
}
\arguments{
\item{drill_con}{drill server connection object setup by \code{drill_connection()}}
\item{drill_con}{drill server connection object setup by \code{drill_connection()} or
\code{drill_jdbc()})}

\item{query}{query to run}

\item{uplift}{automatically run \code{drill_uplift()} on the result? (default: \code{TRUE})}
\item{uplift}{automatically run \code{drill_uplift()} on the result? (default: \code{TRUE},
ignored if \code{drill_con} is a \code{JDBCConnection} created by
\code{drill_jdbc()})}

\item{.progress}{if \code{TRUE} (default if in an interactive session) then ask
\code{httr::POST} to display a progress bar}
}
\description{
Submit a query and return results
This function can handle REST API connections or JDBC connections. There is a benefit to
calling this function for JDBC connections vs a straight call to \code{dbGetQuery()} in
that the function result is a `tbl_df` vs a plain \code{data.frame} so you get better
default printing (which can be helpful if you accidentally execute a query and the result
set is huge).
}
\examples{
\dontrun{

M man/drill_uplift.Rd => man/drill_uplift.Rd +3 -3
@@ 2,7 2,7 @@
% Please edit documentation in R/query.r
\name{drill_uplift}
\alias{drill_uplift}
\title{Turn a columnar query results into a type-converted tbl}
\title{Turn columnar query results into a type-converted tbl}
\usage{
drill_uplift(query_result)
}


@@ 15,8 15,8 @@ you can pipe it to this function to pull out `rows` and automatically
type-convert it.
}
\details{
Not really intended to be called directly, but useful if you ran \code{drill_query()}
without `uplift=TRUE` but want to then convert the structure.
Not really intended to be called directly, but useful if you accidentally ran
\code{drill_query()} without `uplift=TRUE` but want to then convert the structure.
}
\references{
\href{https://drill.apache.org/docs/}{Drill documentation}