~hrbrmstr/metis-jars

3cb5c769890fd17666219ab08386c91491bc9148 — hrbrmstr 2 years ago dc75fc8
update
M DESCRIPTION => DESCRIPTION +3 -3
@@ 1,7 1,7 @@
Package: metis
Type: Package
Title: Helpers for Accessing and Querying Amazon Athena
Version: 0.3.0
Version: 0.4.0
Date: 2018-03-19
Authors@R: c(
    person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"),


@@ 31,6 31,6 @@ Imports:
    readr,
    aws.signature,
    uuid,
    sys,
    reticulate,
    jsonlite
RoxygenNote: 6.0.1.9000
RoxygenNote: 6.1.1

M NAMESPACE => NAMESPACE +1 -0
@@ 1,5 1,6 @@
# Generated by roxygen2: do not edit by hand

S3method(sql_translate_env,AthenaConnection)
export(Athena)
export(athena_connect)
export(read_credentials)

M NEWS.md => NEWS.md +5 -0
@@ 1,6 1,11 @@
0.2.0
=========

- Added `sql_translate_env.AthenaConnection()`

0.2.0
=========

- Updated authentication provider to be `com.amazonaws.athena.jdbc.shaded.com.amazonaws.auth.DefaultAWSCredentialsProviderChain` (via @dabdine)
- Now supports additional DBI/RJDBC methods including: `dbExistsTable()`,
  `dbListFields()`, `dbListTables()`, `dbReadTable()`

D R/awscli-util.R => R/awscli-util.R +0 -26
@@ 1,26 0,0 @@
.aws_bin <- function() {
  unname(Sys.which('aws'))
}

.athenacli <- function(...) {

  args <- c("athena")

  in_args <- list(...)
  if (length(in_args) == 0) in_args <- "help"

  args <- c(args, unlist(in_args, use.names=FALSE))

  res <- sys::exec_internal(.aws_bin(), args = args, error = FALSE)

  if (length(res$stdout) > 0) {

    out <- rawToChar(res$stdout)

    if ("help" %in% args) cat(out, sep="")

    invisible(out)

  }

}

D R/list-query-executions.R => R/list-query-executions.R +0 -13
@@ 1,13 0,0 @@
list_query_executions <- function(max_items=10, starting_token=NULL, page_size=NULL) {


  args <- c("list-query-executions", sprintf("--max-items=%s", as.integer(max_items)))

  if (!is.null(starting_token)) args <- c(args, sprintf("--starting-token=%s", starting_token))
  if (!is.null(page_size)) args <- c(args, sprintf("--page-size=%s", as.integer(page_size)))

  res <- .athenacli(args)

  jsonlite::fromJSON()

}
\ No newline at end of file

A R/sql_translate_env.R => R/sql_translate_env.R +101 -0
@@ 0,0 1,101 @@
#' @rdname Athena
#' @keywords internal
#' @export
sql_translate_env.AthenaConnection <- function(con) {

  x <- con

  dbplyr::sql_variant(

    scalar = dbplyr::sql_translator(
      .parent = dbplyr::base_scalar,
      `!=` = dbplyr::sql_infix("<>"),
      as.integer64 = function(x) dbplyr::build_sql("CAST(", x, "AS BIGINT)"),
      as.numeric = function(x) dbplyr::build_sql("CAST(", x, " AS DOUBLE)"),
      as.character = function(x) dbplyr::build_sql("CAST(", x, " AS CHARACTER)"),
      as.date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"),
      as.Date = function(x) dbplyr::build_sql("CAST(", x, " AS DATE)"),
      as.POSIXct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"),
      as.posixct = function(x) dbplyr::build_sql("CAST(", x, " AS TIMESTAMP)"),
      as.logical = function(x) dbplyr::build_sql("CAST(", x, " AS BOOLEAN)"),
      date_part = function(x, y) dbplyr::build_sql("DATE_PART(", x, ",", y ,")"),
      grepl = function(x, y) dbplyr::build_sql("CONTAINS(", y, ", ", x, ")"),
      gsub = function(x, y, z) dbplyr::build_sql("REGEXP_REPLACE(", z, ", ", x, ",", y ,")"),
      trimws = function(x) dbplyr::build_sql("TRIM(both ' ' FROM ", x, ")"),
      cbrt = dbplyr::sql_prefix("CBRT", 1),
      degrees = dbplyr::sql_prefix("DEGREES", 1),
      e = dbplyr::sql_prefix("E", 0),
      row_number = dbplyr::sql_prefix("row_number", 0),
      lshift = dbplyr::sql_prefix("LSHIFT", 2),
      mod = dbplyr::sql_prefix("MOD", 2),
      age = dbplyr::sql_prefix("AGE", 1),
      negative = dbplyr::sql_prefix("NEGATIVE", 1),
      pi = dbplyr::sql_prefix("PI", 0),
      pow = dbplyr::sql_prefix("POW", 2),
      radians = dbplyr::sql_prefix("RADIANS", 1),
      rand = dbplyr::sql_prefix("RAND", 0),
      rshift = dbplyr::sql_prefix("RSHIFT", 2),
      trunc = dbplyr::sql_prefix("TRUNC", 2),
      contains = dbplyr::sql_prefix("CONTAINS", 2),
      convert_to = dbplyr::sql_prefix("CONVERT_TO", 2),
      convert_from = dbplyr::sql_prefix("CONVERT_FROM", 2),
      string_binary = dbplyr::sql_prefix("STRING_BINARY", 1),
      binary_string = dbplyr::sql_prefix("BINARY_STRING", 1),
      to_char = dbplyr::sql_prefix("TO_CHAR", 2),
      to_date = dbplyr::sql_prefix("TO_DATE", 2),
      to_number = dbplyr::sql_prefix("TO_NUMBER", 2),
      char_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 2),
      double_to_timestamp = dbplyr::sql_prefix("TO_TIMESTAMP", 1),
      char_length = dbplyr::sql_prefix("CHAR_LENGTH", 1),
      flatten = dbplyr::sql_prefix("FLATTEN", 1),
      kvgen = dbplyr::sql_prefix("KVGEN", 1),
      repeated_count = dbplyr::sql_prefix("REPEATED_COUNT", 1),
      repeated_contains = dbplyr::sql_prefix("REPEATED_CONTAINS", 2),
      ilike = dbplyr::sql_prefix("ILIKE", 2),
      init_cap = dbplyr::sql_prefix("INIT_CAP", 1),
      length = dbplyr::sql_prefix("LENGTH", 1),
      lower = dbplyr::sql_prefix("LOWER", 1),
      tolower = dbplyr::sql_prefix("LOWER", 1),
      ltrim = dbplyr::sql_prefix("LTRIM", 2),
      nullif = dbplyr::sql_prefix("NULLIF", 2),
      position = function(x, y) dbplyr::build_sql("POSITION(", x, " IN ", y, ")"),
      regexp_replace = dbplyr::sql_prefix("REGEXP_REPLACE", 3),
      rtrim = dbplyr::sql_prefix("RTRIM", 2),
      rpad = dbplyr::sql_prefix("RPAD", 2),
      rpad_with = dbplyr::sql_prefix("RPAD", 3),
      lpad = dbplyr::sql_prefix("LPAD", 2),
      lpad_with = dbplyr::sql_prefix("LPAD", 3),
      strpos = dbplyr::sql_prefix("STRPOS", 2),
      substr = dbplyr::sql_prefix("SUBSTR", 3),
      trim = function(x, y, z) dbplyr::build_sql("TRIM(", x, " ", y, " FROM ", z, ")"),
      upper = dbplyr::sql_prefix("UPPER", 1),
      toupper = dbplyr::sql_prefix("UPPER", 1)
    ),

    aggregate = dbplyr::sql_translator(
      .parent = dbplyr::base_agg,
      n = function() dbplyr::sql("COUNT(*)"),
      cor = dbplyr::sql_prefix("CORR"),
      cov = dbplyr::sql_prefix("COVAR_SAMP"),
      sd =  dbplyr::sql_prefix("STDDEV_SAMP"),
      var = dbplyr::sql_prefix("VAR_SAMP"),
      n_distinct = function(x) {
        dbplyr::build_sql(dbplyr::sql("COUNT(DISTINCT "), x, dbplyr::sql(")"))
      }
    ),

    window = dbplyr::sql_translator(
      .parent = dbplyr::base_win,
      n = function() { dbplyr::win_over(dbplyr::sql("count(*)"),
                                        partition = dbplyr::win_current_group()) },
      cor = dbplyr::win_recycled("corr"),
      cov = dbplyr::win_recycled("covar_samp"),
      sd =  dbplyr::win_recycled("stddev_samp"),
      var = dbplyr::win_recycled("var_samp"),
      all = dbplyr::win_recycled("bool_and"),
      any = dbplyr::win_recycled("bool_or")
    )

  )

}
\ No newline at end of file

M man/Athena.Rd => man/Athena.Rd +5 -1
@@ 1,11 1,15 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/jdbc.r
% Please edit documentation in R/jdbc.r, R/sql_translate_env.R
\name{Athena}
\alias{Athena}
\alias{sql_translate_env.AthenaConnection}
\title{AthenaJDBC}
\usage{
Athena(identifier.quote = "`")

\method{sql_translate_env}{AthenaConnection}(con)
}
\description{
AthenaJDBC
}
\keyword{internal}

M man/athena_connect.Rd => man/athena_connect.Rd +4 -3
@@ 6,9 6,10 @@
\usage{
athena_connect(default_schema = "default", region = c("us-east-1",
  "us-east-2", "us-west-2"),
  s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"), max_error_retries = 10,
  connection_timeout = 10000, socket_timeout = 10000, log_path = "",
  log_level = c("OFF", "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
  s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
  max_error_retries = 10, connection_timeout = 10000,
  socket_timeout = 10000, log_path = "", log_level = c("OFF",
  "FATAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"))
}
\arguments{
\item{default_schema}{default schema (you'll still need to fully qualify non-default schema table names)}

M man/dbConnect-AthenaDriver-method.Rd => man/dbConnect-AthenaDriver-method.Rd +4 -3
@@ 7,10 7,11 @@
\usage{
\S4method{dbConnect}{AthenaDriver}(drv,
  provider = "com.simba.athena.amazonaws.auth.DefaultAWSCredentialsProviderChain",
  region = "us-east-1", s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
  region = "us-east-1",
  s3_staging_dir = Sys.getenv("AWS_S3_STAGING_DIR"),
  schema_name = "default", max_error_retries = 10,
  connection_timeout = 10000, socket_timeout = 10000, log_path, log_level,
  ...)
  connection_timeout = 10000, socket_timeout = 10000, log_path,
  log_level, ...)
}
\arguments{
\item{provider}{JDBC auth provider (ideally leave default)}

M man/dbExistsTable-AthenaConnection-character-method.Rd => man/dbExistsTable-AthenaConnection-character-method.Rd +2 -1
@@ 5,7 5,8 @@
\alias{dbExistsTable,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbExistsTable}{AthenaConnection,character}(conn, name, schema, ...)
\S4method{dbExistsTable}{AthenaConnection,character}(conn, name, schema,
  ...)
}
\arguments{
\item{conn}{Athena connection}

M man/dbListFields-AthenaConnection-character-method.Rd => man/dbListFields-AthenaConnection-character-method.Rd +2 -1
@@ 5,7 5,8 @@
\alias{dbListFields,AthenaConnection,character-method}
\title{AthenaJDBC}
\usage{
\S4method{dbListFields}{AthenaConnection,character}(conn, name, schema, ...)
\S4method{dbListFields}{AthenaConnection,character}(conn, name, schema,
  ...)
}
\arguments{
\item{conn}{Athena connection}

M man/dbListTables-AthenaConnection-method.Rd => man/dbListTables-AthenaConnection-method.Rd +2 -1
@@ 5,7 5,8 @@
\alias{dbListTables,AthenaConnection-method}
\title{AthenaJDBC}
\usage{
\S4method{dbListTables}{AthenaConnection}(conn, pattern = "*", schema, ...)
\S4method{dbListTables}{AthenaConnection}(conn, pattern = "*", schema,
  ...)
}
\arguments{
\item{conn}{Athena connection}