M DESCRIPTION => DESCRIPTION +3 -3
@@ 1,8 1,8 @@
Package: awsathena
Type: Package
Title: rJava Interface to AWS Athena SDK
-Version: 0.1.0
-Date: 2019-02-22
+Version: 0.2.0
+Date: 2019-06-14
Authors@R: c(
person("Bob", "Rudis", role = c("aut", "cre"), email = "bob@rud.is")
)
@@ 25,6 25,6 @@ Depends:
Imports:
uuid,
utils
-RoxygenNote: 6.1.1
+RoxygenNote: 7.0.2
Remotes:
hrbrmstr/awsathenajars
M NAMESPACE => NAMESPACE +3 -0
@@ 1,12 1,15 @@
# Generated by roxygen2: do not edit by hand
+export(athena_type_trans)
export(collect_async)
export(download_query_execution_results)
export(get_query_execution)
+export(get_query_results_metadata)
export(list_query_executions)
export(s3_download_file)
export(start_query_execution)
export(stop_query_execution)
+export(to_cols)
import(awsathenajars)
import(rJava)
importFrom(utils,setTxtProgressBar)
M NEWS.md => NEWS.md +4 -0
@@ 1,3 1,7 @@
+0.3.0
+* buffer for download is now a parameter
+* added `get_query_results_metadata()` to enable retrieval of col types
+
0.2.0
* Split into two packages as per CRAN rJava-package suggested practice.
M R/collect-async.R => R/collect-async.R +4 -1
@@ 29,6 29,7 @@
#' you wish to use
#' @param properties_file if not using the default credentials provider chain or
#' a named profile then provide the path to an Athena credentials proeprty file.
+#' @param workgroup workgroup
#' @note `dbplyr` must be installed for this to work. It is not listed in
#' the `Imports` as it brings with it many dependencies that may not
#' be necessary for general use of this package.
@@ 71,6 72,7 @@ collect_async <- function(obj,
kms_key = NULL,
region = "us-east-1",
profile = NULL,
+ workgroup = "primary",
properties_file = NULL) {
if (!requireNamespace("dbplyr", quietly = TRUE)) {
@@ 85,7 87,8 @@ collect_async <- function(obj,
kms_key = kms_key,
region = region,
profile = profile,
- properties_file = properties_file
+ properties_file = properties_file,
+ workgroup = workgroup
)
}
M R/download-query-ex-res.R => R/download-query-ex-res.R +3 -0
@@ 11,6 11,7 @@
#' @param profile if not using the default credentials chain or a dedicated
#' properties file then provide the named profile from `~/.aws/credentials`
#' you wish to use
+#' @param buffer_size S3 temp buffer size; bigger = faster d/l
#' @param properties_file if not using the default credentials provider chain or
#' a named profile then provide the path to an Athena credentials proeprty file.
#' @export
@@ 19,6 20,7 @@ download_query_execution_results <- function(qxid,
progress = FALSE,
region = "us-east-1",
profile = NULL,
+ buffer_size = 16384L,
properties_file = NULL) {
if (missing(output_dir)) output_dir <- getwd()
@@ 38,6 40,7 @@ download_query_execution_results <- function(qxid,
progress = progress,
region = region,
profile = profile,
+ buffer_size = buffer_size,
properties_file = properties_file
) -> res
A R/get-query-results-meta.R => R/get-query-results-meta.R +48 -0
@@ 0,0 1,48 @@
+#' Get Query Execution Results Metadata (Schema)
+#'
+#' @md
+#' @param qxid query execution id
+#' @param region AWS region string
+#' @param profile if not using the default credentials chain or a dedicated
+#' properties file then provide the named profile from `~/.aws/credentials`
+#' you wish to use
+#' @param properties_file if not using the default credentials provider chain or
+#' a named profile then provide the path to an Athena credentials proeprty file.
+#' @export
+get_query_results_metadata <- function(qxid, region = "us-east-1", profile = NULL, properties_file = NULL) {
+
+ client <- aws_athena_client(region = region, profile = profile, properties_file = properties_file)
+
+ qx_req <- .jnew("com.amazonaws.services.athena.model.GetQueryResultsRequest")
+ qx_req$setQueryExecutionId(qxid)
+ qx_req$setMaxResults(.jnew(class = "java/lang/Integer", "1"))
+
+ res <- client$getQueryResults(qx_req)
+ res_rs <- res$getResultSet()
+ res_md <- res_rs$getResultSetMetadata()
+ res_ci <- res_md$getColumnInfo()
+
+ lapply(res_ci, function(.x) {
+ data.frame(
+ name = .x$getName(),
+ type = .x$getType(),
+ caseSensitive = .x$getCaseSensitive(),
+ catalogName = .x$getCatalogName(),
+ label = .x$getLabel(),
+ nullable = .x$getNullable(),
+ precision = .x$getPrecision(),
+ scale = .x$getScale(),
+ schemaName = .x$getSchemaName(),
+ tableName = .x$getTableName(),
+ stringsAsFactors = FALSE
+ )
+ }) %>%
+ do.call(rbind.data.frame, .) -> out
+
+ class(out) <- c("athena_query_metadata", "tbl_df", "tbl", "data.frame")
+
+ client$shutdown()
+
+ out
+
+}
M R/s3-download-file.R => R/s3-download-file.R +3 -1
@@ 8,6 8,7 @@
#' @param profile if not using the default credentials chain or a dedicated
#' properties file then provide the named profile from `~/.aws/credentials`
#' you wish to use
+#' @param buffer_size S3 temp buffer size; bigger = faster d/l
#' @param properties_file if not using the default credentials provider chain or
#' a named profile then provide the path to an Athena credentials proeprty file.
#' @export
@@ 15,6 16,7 @@ s3_download_file <- function(bucket, key, output_dir,
progress = FALSE,
region = "us-east-1",
profile = NULL,
+ buffer_size = 16384L,
properties_file = NULL) {
aws_s3_client(
@@ 34,7 36,7 @@ s3_download_file <- function(bucket, key, output_dir,
s3is <- obj$getObjectContent()
- buf <- raw(4096)
+ buf <- raw(buffer_size)
jbuf <- .jarray(buf)
read_len <- s3is$read(jbuf)
M R/start-query-execution.R => R/start-query-execution.R +4 -1
@@ 21,6 21,7 @@
#' you wish to use
#' @param properties_file if not using the default credentials provider chain or
#' a named profile then provide the path to an Athena credentials proeprty file.
+#' @param workgroup workgroup
#' @export
start_query_execution <- function(query, database, output_location,
client_request_token = uuid::UUIDgenerate(),
@@ 28,7 29,8 @@ start_query_execution <- function(query, database, output_location,
kms_key = NULL,
region = "us-east-1",
profile = NULL,
- properties_file = NULL) {
+ properties_file = NULL,
+ workgroup = "primary") {
client <- aws_athena_client(region = region, profile = profile, properties_file = properties_file)
@@ 43,6 45,7 @@ start_query_execution <- function(query, database, output_location,
qx_req <- qx_req$withQueryExecutionContext(ctx)
qx_req <- qx_req$withResultConfiguration(res_cfg)
qx_req <- qx_req$withClientRequestToken(client_request_token)
+ qx_req <- qx_req$withWorkGroup(workgroup)
res <- client$startQueryExecution(qx_req)
A R/type-trans.R => R/type-trans.R +65 -0
@@ 0,0 1,65 @@
+#' Translate from one type system to another
+#'
+#' @param type type (character)
+#' @param to one of `athena` or `r`
+#' @export
+athena_type_trans <- function(type, to = c("r", "athena")) {
+ if (match.arg(tolower(to[1]), c("athena", "r")) == "r") {
+ sapply(type, switch,
+ type,
+ boolean = "logical",
+ tinyint = "integer",
+ smallint = "integer",
+ int = "integer",
+ integer = "integer",
+ bigint = "integer64",
+ double = "double",
+ float = "double",
+ decimal = "double",
+ char = "character",
+ varchar = "character",
+ binary = "raw",
+ date = "Date",
+ timestamp = "POSIXct",
+ array = "character",
+ map = "character",
+ struct = "character"
+ )
+ } else {
+ sapply(
+ type, switch,
+ logical = "boolean",
+ integer = "integer",
+ integer64 = "bigint",
+ double = "double",
+ character = "varchar",
+ raw = "binary",
+ Date = "date",
+ POSIXct = "timestamp"
+ )
+ }
+}
+
+#' @rdname athena_type_trans
+#' @param name,type equal length character vectors; type should be an R type
+#' @export
+to_cols <- function(name, type) {
+
+ lapply(
+ type, switch,
+ logical = readr::col_logical(),
+ integer = readr::col_integer(),
+ integer64 = readr::col_number(),
+ double = readr::col_double(),
+ character = readr::col_character(),
+ raw = readr::col_character(),
+ Date = readr::col_date(),
+ POSIXct = readr::col_datetime()
+ ) -> l
+
+ l <- set_names(l)
+
+ do.call(readr::cols, l)
+
+}
+
M R/utils.R => R/utils.R +5 -0
@@ 39,3 39,8 @@
stop("No such region.", call.=FALSE)
)
}
+
+set_names <- function (object = nm, nm) {
+ names(object) <- nm
+ object
+}
M README.Rmd => README.Rmd +1 -0
@@ 20,6 20,7 @@ The following functions are implemented:
- `collect_async`: Collect Amazon Athena 'dplyr' query results asynchronously
- `download_query_execution_results`: Use S3 to download the results of an Athena Query
- `get_query_execution`: Get Query Execution
+- `get_query_results_metadata`: Get Query Execution Results Metadata (Schema)
- `list_query_executions`: List Query Executions
- `s3_download_file`: Download a key from a bucket to a local file
- `start_query_execution`: Start Query Execution
A man/athena_type_trans.Rd => man/athena_type_trans.Rd +21 -0
@@ 0,0 1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/type-trans.R
+\name{athena_type_trans}
+\alias{athena_type_trans}
+\alias{to_cols}
+\title{Translate from one type system to another}
+\usage{
+athena_type_trans(type, to = c("r", "athena"))
+
+to_cols(name, type)
+}
+\arguments{
+\item{type}{type (character)}
+
+\item{to}{one of `athena` or `r`}
+
+\item{name, type}{equal length character vectors; type should be an R type}
+}
+\description{
+Translate from one type system to another
+}
M man/awsathena.Rd => man/awsathena.Rd +0 -1
@@ 3,7 3,6 @@
\docType{package}
\name{awsathena}
\alias{awsathena}
-\alias{awsathena-package}
\title{rJava Client to AWS Athena SDK}
\description{
Provides R wrapper methods to core 'aws-java-sdk-athena' Java library methods
M man/collect_async.Rd => man/collect_async.Rd +14 -4
@@ 4,10 4,18 @@
\alias{collect_async}
\title{Collect Amazon Athena \code{dplyr} query results asynchronously}
\usage{
-collect_async(obj, database, output_location,
+collect_async(
+ obj,
+ database,
+ output_location,
client_request_token = uuid::UUIDgenerate(),
- encryption_option = NULL, kms_key = NULL, region = "us-east-1",
- profile = NULL, properties_file = NULL)
+ encryption_option = NULL,
+ kms_key = NULL,
+ region = "us-east-1",
+ profile = NULL,
+ workgroup = "primary",
+ properties_file = NULL
+)
}
\arguments{
\item{obj}{the \code{dplyr} query}
@@ 33,9 41,11 @@ Default is \code{NULL} (no encryption)}
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
-properties file then provide the named profile from \code{~/.aws/credentials}
+properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
+\item{workgroup}{workgroup}
+
\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
}
M man/download_query_execution_results.Rd => man/download_query_execution_results.Rd +12 -3
@@ 4,8 4,15 @@
\alias{download_query_execution_results}
\title{Use S3 to download the results of an Athena Query}
\usage{
-download_query_execution_results(qxid, output_dir, progress = FALSE,
- region = "us-east-1", profile = NULL, properties_file = NULL)
+download_query_execution_results(
+ qxid,
+ output_dir,
+ progress = FALSE,
+ region = "us-east-1",
+ profile = NULL,
+ buffer_size = 16384L,
+ properties_file = NULL
+)
}
\arguments{
\item{qxid}{query execution id}
@@ 18,9 25,11 @@ the value) to where you want the results to be stored.}
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
-properties file then provide the named profile from \code{~/.aws/credentials}
+properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
+\item{buffer_size}{S3 temp buffer size; bigger = faster d/l}
+
\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
}
M man/get_query_execution.Rd => man/get_query_execution.Rd +7 -3
@@ 4,8 4,12 @@
\alias{get_query_execution}
\title{Get Query Execution}
\usage{
-get_query_execution(qxid, region = "us-east-1", profile = NULL,
- properties_file = NULL)
+get_query_execution(
+ qxid,
+ region = "us-east-1",
+ profile = NULL,
+ properties_file = NULL
+)
}
\arguments{
\item{qxid}{query execution id}
@@ 13,7 17,7 @@ get_query_execution(qxid, region = "us-east-1", profile = NULL,
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
-properties file then provide the named profile from \code{~/.aws/credentials}
+properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{properties_file}{if not using the default credentials provider chain or
A man/get_query_results_metadata.Rd => man/get_query_results_metadata.Rd +28 -0
@@ 0,0 1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get-query-results-meta.R
+\name{get_query_results_metadata}
+\alias{get_query_results_metadata}
+\title{Get Query Execution Results Metadata (Schema)}
+\usage{
+get_query_results_metadata(
+ qxid,
+ region = "us-east-1",
+ profile = NULL,
+ properties_file = NULL
+)
+}
+\arguments{
+\item{qxid}{query execution id}
+
+\item{region}{AWS region string}
+
+\item{profile}{if not using the default credentials chain or a dedicated
+properties file then provide the named profile from \verb{~/.aws/credentials}
+you wish to use}
+
+\item{properties_file}{if not using the default credentials provider chain or
+a named profile then provide the path to an Athena credentials proeprty file.}
+}
+\description{
+Get Query Execution Results Metadata (Schema)
+}
M man/list_query_executions.Rd => man/list_query_executions.Rd +7 -3
@@ 4,14 4,18 @@
\alias{list_query_executions}
\title{List Query Executions}
\usage{
-list_query_executions(region = "us-east-1", profile = NULL,
- properties_file = NULL, max = NULL)
+list_query_executions(
+ region = "us-east-1",
+ profile = NULL,
+ properties_file = NULL,
+ max = NULL
+)
}
\arguments{
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
-properties file then provide the named profile from \code{~/.aws/credentials}
+properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{properties_file}{if not using the default credentials provider chain or
M man/s3_download_file.Rd => man/s3_download_file.Rd +14 -4
@@ 4,11 4,19 @@
\alias{s3_download_file}
\title{Download a key from a bucket to a local file}
\usage{
-s3_download_file(bucket, key, output_dir, progress = FALSE,
- region = "us-east-1", profile = NULL, properties_file = NULL)
+s3_download_file(
+ bucket,
+ key,
+ output_dir,
+ progress = FALSE,
+ region = "us-east-1",
+ profile = NULL,
+ buffer_size = 16384L,
+ properties_file = NULL
+)
}
\arguments{
-\item{bucket, key}{S3 bucket and key (no \code{s3://} prefix)}
+\item{bucket, key}{S3 bucket and key (no \verb{s3://} prefix)}
\item{output_dir}{where to store \code{key}}
@@ 17,9 25,11 @@ s3_download_file(bucket, key, output_dir, progress = FALSE,
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
-properties file then provide the named profile from \code{~/.aws/credentials}
+properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
+\item{buffer_size}{S3 temp buffer size; bigger = faster d/l}
+
\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
}
M man/start_query_execution.Rd => man/start_query_execution.Rd +14 -4
@@ 4,10 4,18 @@
\alias{start_query_execution}
\title{Start Query Execution}
\usage{
-start_query_execution(query, database, output_location,
+start_query_execution(
+ query,
+ database,
+ output_location,
client_request_token = uuid::UUIDgenerate(),
- encryption_option = NULL, kms_key = NULL, region = "us-east-1",
- profile = NULL, properties_file = NULL)
+ encryption_option = NULL,
+ kms_key = NULL,
+ region = "us-east-1",
+ profile = NULL,
+ properties_file = NULL,
+ workgroup = "primary"
+)
}
\arguments{
\item{query}{SQL query statements to be executed}
@@ 33,11 41,13 @@ Default is \code{NULL} (no encryption)}
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
-properties file then provide the named profile from \code{~/.aws/credentials}
+properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{properties_file}{if not using the default credentials provider chain or
a named profile then provide the path to an Athena credentials proeprty file.}
+
+\item{workgroup}{workgroup}
}
\description{
Start Query Execution
M man/stop_query_execution.Rd => man/stop_query_execution.Rd +7 -3
@@ 4,8 4,12 @@
\alias{stop_query_execution}
\title{Stop Query Execution}
\usage{
-stop_query_execution(qxid, region = "us-east-1", profile = NULL,
- properties_file = NULL)
+stop_query_execution(
+ qxid,
+ region = "us-east-1",
+ profile = NULL,
+ properties_file = NULL
+)
}
\arguments{
\item{qxid}{query execution id}
@@ 13,7 17,7 @@ stop_query_execution(qxid, region = "us-east-1", profile = NULL,
\item{region}{AWS region string}
\item{profile}{if not using the default credentials chain or a dedicated
-properties file then provide the named profile from \code{~/.aws/credentials}
+properties file then provide the named profile from \verb{~/.aws/credentials}
you wish to use}
\item{properties_file}{if not using the default credentials provider chain or