~hrbrmstr/awsathena

d164f3051dde7ea1d54738a9e4d99f6fc653e039 — hrbrmstr 3 years ago 1d92afc
workgroup
M DESCRIPTION => DESCRIPTION +3 -3
@@ 1,8 1,8 @@
Package: awsathena
Type: Package
Title: rJava Interface to AWS Athena SDK
Version: 0.1.0
Date: 2019-02-22
Version: 0.2.0
Date: 2019-06-14
Authors@R: c(
    person("Bob", "Rudis", role = c("aut", "cre"), email = "bob@rud.is")
  )


@@ 25,6 25,6 @@ Depends:
Imports: 
    uuid,
    utils
RoxygenNote: 6.1.1
RoxygenNote: 7.0.2
Remotes: 
    hrbrmstr/awsathenajars

M NAMESPACE => NAMESPACE +3 -0
@@ 1,12 1,15 @@
# Generated by roxygen2: do not edit by hand

export(athena_type_trans)
export(collect_async)
export(download_query_execution_results)
export(get_query_execution)
export(get_query_results_metadata)
export(list_query_executions)
export(s3_download_file)
export(start_query_execution)
export(stop_query_execution)
export(to_cols)
import(awsathenajars)
import(rJava)
importFrom(utils,setTxtProgressBar)

M NEWS.md => NEWS.md +4 -0
@@ 1,3 1,7 @@
0.3.0
* buffer for download is now a parameter
* added `get_query_results_metadata()` to enable retrieval of col types

0.2.0
* Split into two packages as per CRAN rJava-package suggested practice.


M R/collect-async.R => R/collect-async.R +4 -1
@@ 29,6 29,7 @@
#'        you wish to use
#' @param properties_file if not using the default credentials provider chain or
#'        a named profile then provide the path to an Athena credentials proeprty file.
#' @param workgroup workgroup
#' @note `dbplyr` must be installed for this to work. It is not listed in
#'       the `Imports` as it brings with it many dependencies that may not
#'       be necessary for general use of this package.


@@ 71,6 72,7 @@ collect_async <- function(obj,
                          kms_key = NULL,
                          region = "us-east-1",
                          profile = NULL,
                          workgroup = "primary",
                          properties_file = NULL) {

  if (!requireNamespace("dbplyr", quietly = TRUE)) {


@@ 85,7 87,8 @@ collect_async <- function(obj,
      kms_key = kms_key,
      region = region,
      profile = profile,
      properties_file = properties_file
      properties_file = properties_file,
      workgroup = workgroup
    )
  }


M R/download-query-ex-res.R => R/download-query-ex-res.R +3 -0
@@ 11,6 11,7 @@
#' @param profile if not using the default credentials chain or a dedicated
#'        properties file then provide the named profile from `~/.aws/credentials`
#'        you wish to use
#' @param buffer_size S3 temp buffer size; bigger = faster d/l
#' @param properties_file if not using the default credentials provider chain or
#'        a named profile then provide the path to an Athena credentials proeprty file.
#' @export


@@ 19,6 20,7 @@ download_query_execution_results <- function(qxid,
                                             progress = FALSE,
                                             region = "us-east-1",
                                             profile = NULL,
                                             buffer_size = 16384L,
                                             properties_file = NULL) {

  if (missing(output_dir)) output_dir <- getwd()


@@ 38,6 40,7 @@ download_query_execution_results <- function(qxid,
    progress = progress,
    region = region,
    profile = profile,
    buffer_size = buffer_size,
    properties_file = properties_file
  ) -> res


A R/get-query-results-meta.R => R/get-query-results-meta.R +48 -0
@@ 0,0 1,48 @@
#' Get Query Execution Results Metadata (Schema)
#'
#' @md
#' @param qxid query execution id
#' @param region AWS region string
#' @param profile if not using the default credentials chain or a dedicated
#'        properties file then provide the named profile from `~/.aws/credentials`
#'        you wish to use
#' @param properties_file if not using the default credentials provider chain or
#'        a named profile then provide the path to an Athena credentials proeprty file.
#' @export
get_query_results_metadata <- function(qxid, region = "us-east-1", profile = NULL, properties_file = NULL) {

  client <- aws_athena_client(region = region, profile = profile, properties_file = properties_file)

  qx_req <- .jnew("com.amazonaws.services.athena.model.GetQueryResultsRequest")
  qx_req$setQueryExecutionId(qxid)
  qx_req$setMaxResults(.jnew(class = "java/lang/Integer", "1"))

  res <- client$getQueryResults(qx_req)
  res_rs <- res$getResultSet()
  res_md <- res_rs$getResultSetMetadata()
  res_ci <- res_md$getColumnInfo()

  lapply(res_ci, function(.x) {
    data.frame(
      name = .x$getName(),
      type = .x$getType(),
      caseSensitive = .x$getCaseSensitive(),
      catalogName = .x$getCatalogName(),
      label = .x$getLabel(),
      nullable = .x$getNullable(),
      precision = .x$getPrecision(),
      scale = .x$getScale(),
      schemaName = .x$getSchemaName(),
      tableName = .x$getTableName(),
      stringsAsFactors = FALSE
    )
  }) %>%
    do.call(rbind.data.frame, .) -> out

  class(out) <- c("athena_query_metadata", "tbl_df", "tbl", "data.frame")

  client$shutdown()

  out

}

M R/s3-download-file.R => R/s3-download-file.R +3 -1
@@ 8,6 8,7 @@
#' @param profile if not using the default credentials chain or a dedicated
#'        properties file then provide the named profile from `~/.aws/credentials`
#'        you wish to use
#' @param buffer_size S3 temp buffer size; bigger = faster d/l
#' @param properties_file if not using the default credentials provider chain or
#'        a named profile then provide the path to an Athena credentials proeprty file.
#' @export


@@ 15,6 16,7 @@ s3_download_file <- function(bucket, key, output_dir,
                             progress = FALSE,
                             region = "us-east-1",
                             profile = NULL,
                             buffer_size = 16384L,
                             properties_file = NULL) {

  aws_s3_client(


@@ 34,7 36,7 @@ s3_download_file <- function(bucket, key, output_dir,

  s3is <- obj$getObjectContent()

  buf <- raw(4096)
  buf <- raw(buffer_size)
  jbuf <- .jarray(buf)

  read_len <- s3is$read(jbuf)

M R/start-query-execution.R => R/start-query-execution.R +4 -1
@@ 21,6 21,7 @@
#'        you wish to use
#' @param properties_file if not using the default credentials provider chain or
#'        a named profile then provide the path to an Athena credentials proeprty file.
#' @param workgroup workgroup
#' @export
start_query_execution <- function(query, database, output_location,
                                  client_request_token = uuid::UUIDgenerate(),


@@ 28,7 29,8 @@ start_query_execution <- function(query, database, output_location,
                                  kms_key = NULL,
                                  region = "us-east-1",
                                  profile = NULL,
                                  properties_file = NULL) {
                                  properties_file = NULL,
                                  workgroup = "primary") {

  client <- aws_athena_client(region = region, profile = profile, properties_file = properties_file)



@@ 43,6 45,7 @@ start_query_execution <- function(query, database, output_location,
  qx_req <- qx_req$withQueryExecutionContext(ctx)
  qx_req <- qx_req$withResultConfiguration(res_cfg)
  qx_req <- qx_req$withClientRequestToken(client_request_token)
  qx_req <- qx_req$withWorkGroup(workgroup)

  res <- client$startQueryExecution(qx_req)


A R/type-trans.R => R/type-trans.R +65 -0
@@ 0,0 1,65 @@
#' Translate from one type system to another
#'
#' @param type type (character)
#' @param to one of `athena` or `r`
#' @export
athena_type_trans <- function(type, to = c("r", "athena")) {
  if (match.arg(tolower(to[1]), c("athena", "r")) == "r") {
    sapply(type, switch,
           type,
           boolean = "logical",
           tinyint = "integer",
           smallint = "integer",
           int = "integer",
           integer = "integer",
           bigint = "integer64",
           double = "double",
           float = "double",
           decimal = "double",
           char = "character",
           varchar = "character",
           binary = "raw",
           date = "Date",
           timestamp = "POSIXct",
           array = "character",
           map = "character",
           struct = "character"
    )
  } else {
    sapply(
      type, switch,
      logical = "boolean",
      integer = "integer",
      integer64 = "bigint",
      double = "double",
      character = "varchar",
      raw = "binary",
      Date = "date",
      POSIXct = "timestamp"
    )
  }
}

#' @rdname athena_type_trans
#' @param name,type equal length character vectors; type should be an R type
#' @export
to_cols <- function(name, type) {

  lapply(
    type, switch,
    logical = readr::col_logical(),
    integer = readr::col_integer(),
    integer64 = readr::col_number(),
    double = readr::col_double(),
    character = readr::col_character(),
    raw = readr::col_character(),
    Date = readr::col_date(),
    POSIXct = readr::col_datetime()
  ) -> l

  l <- set_names(l)

  do.call(readr::cols, l)

}


M R/utils.R => R/utils.R +5 -0
@@ 39,3 39,8 @@
    stop("No such region.", call.=FALSE)
  )
}

set_names <- function (object = nm, nm) {
  names(object) <- nm
  object
}

M README.Rmd => README.Rmd +1 -0
@@ 20,6 20,7 @@ The following functions are implemented:
- `collect_async`:	Collect Amazon Athena 'dplyr' query results asynchronously
- `download_query_execution_results`:	Use S3 to download the results of an Athena Query
- `get_query_execution`:	Get Query Execution
- `get_query_results_metadata`: Get Query Execution Results Metadata (Schema)
- `list_query_executions`:	List Query Executions
- `s3_download_file`:	Download a key from a bucket to a local file
- `start_query_execution`:	Start Query Execution

A man/athena_type_trans.Rd => man/athena_type_trans.Rd +21 -0
@@ 0,0 1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/type-trans.R
\name{athena_type_trans}
\alias{athena_type_trans}
\alias{to_cols}
\title{Translate from one type system to another}
\usage{
athena_type_trans(type, to = c("r", "athena"))

to_cols(name, type)
}
\arguments{
\item{type}{type (character)}

\item{to}{one of `athena` or `r`}

\item{name, type}{equal length character vectors; type should be an R type}
}
\description{
Translate from one type system to another
}

M man/awsathena.Rd => man/awsathena.Rd +0 -1
@@ 3,7 3,6 @@
\docType{package}
\name{awsathena}
\alias{awsathena}
\alias{awsathena-package}
\title{rJava Client to AWS Athena SDK}
\description{
Provides R wrapper methods to core 'aws-java-sdk-athena' Java library methods

M man/collect_async.Rd => man/collect_async.Rd +14 -4
@@ 4,10 4,18 @@
\alias{collect_async}
\title{Collect Amazon Athena \code{dplyr} query results asynchronously}
\usage{
collect_async(obj, database, output_location,
collect_async(
  obj,
  database,
  output_location,
  client_request_token = uuid::UUIDgenerate(),
  encryption_option = NULL, kms_key = NULL, region = "us-east-1",
  profile = NULL, properties_file = NULL)
  encryption_option = NULL,
  kms_key = NULL,
  region = "us-east-1",
  profile = NULL,
  workgroup = "primary",
  properties_file = NULL
)
}
\arguments{
\item{obj}{the \code{dplyr} query}


@@ 33,9 41,11 @@ Default is \code{NULL} (no encryption)}
\item{region}{AWS region string}

\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}

\item{workgroup}{workgroup}

\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
}

M man/download_query_execution_results.Rd => man/download_query_execution_results.Rd +12 -3
@@ 4,8 4,15 @@
\alias{download_query_execution_results}
\title{Use S3 to download the results of an Athena Query}
\usage{
download_query_execution_results(qxid, output_dir, progress = FALSE,
  region = "us-east-1", profile = NULL, properties_file = NULL)
download_query_execution_results(
  qxid,
  output_dir,
  progress = FALSE,
  region = "us-east-1",
  profile = NULL,
  buffer_size = 16384L,
  properties_file = NULL
)
}
\arguments{
\item{qxid}{query execution id}


@@ 18,9 25,11 @@ the value) to where you want the results to be stored.}
\item{region}{AWS region string}

\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}

\item{buffer_size}{S3 temp buffer size; bigger = faster d/l}

\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
}

M man/get_query_execution.Rd => man/get_query_execution.Rd +7 -3
@@ 4,8 4,12 @@
\alias{get_query_execution}
\title{Get Query Execution}
\usage{
get_query_execution(qxid, region = "us-east-1", profile = NULL,
  properties_file = NULL)
get_query_execution(
  qxid,
  region = "us-east-1",
  profile = NULL,
  properties_file = NULL
)
}
\arguments{
\item{qxid}{query execution id}


@@ 13,7 17,7 @@ get_query_execution(qxid, region = "us-east-1", profile = NULL,
\item{region}{AWS region string}

\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}

\item{properties_file}{if not using the default credentials provider chain or

A man/get_query_results_metadata.Rd => man/get_query_results_metadata.Rd +28 -0
@@ 0,0 1,28 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get-query-results-meta.R
\name{get_query_results_metadata}
\alias{get_query_results_metadata}
\title{Get Query Execution Results Metadata (Schema)}
\usage{
get_query_results_metadata(
  qxid,
  region = "us-east-1",
  profile = NULL,
  properties_file = NULL
)
}
\arguments{
\item{qxid}{query execution id}

\item{region}{AWS region string}

\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}

\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
}
\description{
Get Query Execution Results Metadata (Schema)
}

M man/list_query_executions.Rd => man/list_query_executions.Rd +7 -3
@@ 4,14 4,18 @@
\alias{list_query_executions}
\title{List Query Executions}
\usage{
list_query_executions(region = "us-east-1", profile = NULL,
  properties_file = NULL, max = NULL)
list_query_executions(
  region = "us-east-1",
  profile = NULL,
  properties_file = NULL,
  max = NULL
)
}
\arguments{
\item{region}{AWS region string}

\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}

\item{properties_file}{if not using the default credentials provider chain or

M man/s3_download_file.Rd => man/s3_download_file.Rd +14 -4
@@ 4,11 4,19 @@
\alias{s3_download_file}
\title{Download a key from a bucket to a local file}
\usage{
s3_download_file(bucket, key, output_dir, progress = FALSE,
  region = "us-east-1", profile = NULL, properties_file = NULL)
s3_download_file(
  bucket,
  key,
  output_dir,
  progress = FALSE,
  region = "us-east-1",
  profile = NULL,
  buffer_size = 16384L,
  properties_file = NULL
)
}
\arguments{
\item{bucket, key}{S3 bucket and key (no \code{s3://} prefix)}
\item{bucket, key}{S3 bucket and key (no \verb{s3://} prefix)}

\item{output_dir}{where to store \code{key}}



@@ 17,9 25,11 @@ s3_download_file(bucket, key, output_dir, progress = FALSE,
\item{region}{AWS region string}

\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}

\item{buffer_size}{S3 temp buffer size; bigger = faster d/l}

\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
}

M man/start_query_execution.Rd => man/start_query_execution.Rd +14 -4
@@ 4,10 4,18 @@
\alias{start_query_execution}
\title{Start Query Execution}
\usage{
start_query_execution(query, database, output_location,
start_query_execution(
  query,
  database,
  output_location,
  client_request_token = uuid::UUIDgenerate(),
  encryption_option = NULL, kms_key = NULL, region = "us-east-1",
  profile = NULL, properties_file = NULL)
  encryption_option = NULL,
  kms_key = NULL,
  region = "us-east-1",
  profile = NULL,
  properties_file = NULL,
  workgroup = "primary"
)
}
\arguments{
\item{query}{SQL query statements to be executed}


@@ 33,11 41,13 @@ Default is \code{NULL} (no encryption)}
\item{region}{AWS region string}

\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}

\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}

\item{workgroup}{workgroup}
}
\description{
Start Query Execution

M man/stop_query_execution.Rd => man/stop_query_execution.Rd +7 -3
@@ 4,8 4,12 @@
\alias{stop_query_execution}
\title{Stop Query Execution}
\usage{
stop_query_execution(qxid, region = "us-east-1", profile = NULL,
  properties_file = NULL)
stop_query_execution(
  qxid,
  region = "us-east-1",
  profile = NULL,
  properties_file = NULL
)
}
\arguments{
\item{qxid}{query execution id}


@@ 13,7 17,7 @@ stop_query_execution(qxid, region = "us-east-1", profile = NULL,
\item{region}{AWS region string}

\item{profile}{if not using the default credentials chain or a dedicated
properties file then provide the named profile from \code{~/.aws/credentials}
properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}

\item{properties_file}{if not using the default credentials provider chain or