~hrbrmstr/splashr

55adc4feb3986f11baec55511e5db7830c17c075 — hrbrmstr 1 year, 6 months ago 24ff888
updated vignette
M .Rbuildignore => .Rbuildignore +2 -0
@@ 1,3 1,5 @@
^Meta$
^doc$
^LICENSE\.md$
^.*\.Rproj$
^\.Rproj\.user$

M .gitignore => .gitignore +2 -0
@@ 1,3 1,5 @@
Meta
doc
.Rproj.user
.Rhistory
.RData

M NAMESPACE => NAMESPACE +3 -0
@@ 20,10 20,13 @@ export(get_body_size)
export(get_content_size)
export(get_content_type)
export(get_har_entry)
export(get_header_val)
export(get_headers)
export(get_headers_size)
export(get_request_type)
export(get_request_url)
export(get_response_body)
export(get_response_url)
export(har_entries)
export(har_entry_count)
export(install_splash)

M R/docker-splash.r => R/docker-splash.r +8 -6
@@ 120,12 120,14 @@ killall_splash <- function() {
  x <- docker$container$list(all=TRUE)

  for (i in 1:nrow(x)) {
    if (grepl("bin/splash", x$command[i])) {
      message(sprintf("Pruning: %s...", x$id[i]))
      if (x$state[i] == "running") {
        cntnr <- docker$container$get(x$id[i])
        cntnr$stop()
        cntnr$remove()
    if (length(x$command[i])) {
      if (grepl("bin/splash", x$command[i])) {
        message(sprintf("Pruning: %s...", x$id[i]))
        if (x$state[i] == "running") {
          cntnr <- docker$container$get(x$id[i])
          cntnr$stop()
          cntnr$remove()
        }
      }
    }
  }

M R/helpers.r => R/helpers.r +60 -2
@@ 33,7 33,9 @@ get_content_type <- function(har_resp_obj) {
#' @param type content type to compare to (default: "`application/json`")
#' @export
is_content_type <- function(har_resp_obj, type="application/json") {
  get_content_type(har_resp_obj) == type
  res <- get_content_type(har_resp_obj) == type
  if (is.na(res)) res <- FALSE
  res
}

#' @rdname get_content_type


@@ 101,6 103,51 @@ is_xhr <- function(har_resp_obj) {

}

#' Retrieve response headers as a data frame
#'
#' @md
#' @param har_resp_obj HAR response object
#' @note the `name` column that contains the header key is normalized to lower case
#' @family splash_har_helpers
#' @export
get_headers <- function(har_resp_obj) {
  if (length(har_resp_obj$response$headers)) {
    do.call(
      rbind.data.frame,
      lapply(har_resp_obj$response$headers, as.data.frame, stringsAsFactors=FALSE)
    ) -> ret
    ret[["name"]] <- tolower(ret[["name"]])
    class(ret) <- c("tbl_df", "tbl", "data.frame")
    ret
  }
}

#' Retrieve the value of a specific response header
#'
#' @md
#' @param har_resp_obj HAR response object
#' @param header the header you want the value for
#' @note the `name` column that contains the header key is normalized to lower case
#'        as is the passed-in requested header. Also, if there is more than one only
#'        the first is returned.
#' @family splash_har_helpers
#' @export
get_header_val <- function(har_resp_obj, header) {
  if (length(har_resp_obj$response$headers)) {
    header <- tolower(header)
    do.call(
      rbind.data.frame,
      lapply(har_resp_obj$response$headers, as.data.frame, stringsAsFactors=FALSE)
    ) -> ret
    ret[["name"]] <- tolower(ret[["name"]])
    ret <- unlist(ret[ret$name == header, "value"], use.names = FALSE)
    if (length(ret)) ret <- ret[1] else ret <- NA_character_
    ret
  } else {
    NA_character_
  }
}

#' Retrieve request URL
#'
#' @param har_resp_obj HAR response object


@@ 108,7 155,18 @@ is_xhr <- function(har_resp_obj) {
#' @export
get_request_url <- function(har_resp_obj) {
  utype <- har_resp_obj$request$url
  if (utype == "") return(NA_character_)
  if (utype == "") utype <- NA_character_
  utype
}

#' Retrieve response URL
#'
#' @param har_resp_obj HAR response object
#' @family splash_har_helpers
#' @export
get_response_url <- function(har_resp_obj) {
  utype <- har_resp_obj$response$url
  if (utype == "") utype <- NA_character_
  utype
}


M cran-comments.md => cran-comments.md +22 -12
@@ 1,23 1,33 @@
## Test environments

* local OS X install, R 3.4.3 on both 10.12 and 10.13.2
* local ubuntu 3.4.2 and r-devel
* local macOS install, R 3.5.2 on both macOS 10.14
* local ubuntu 3.5.1
* ubuntu on travis-ci, R oldrel, current and r-devel
* win-builder (devel and release)

## R CMD check results

0 errors | 0 warnings | 1 note
---

* This is a new release.
Per a note from Kurt the splashr now uses the
stevedore package since the docker package is
likely being retired from CRAN.

## Reverse dependencies
The invalid URL in the vignette (as noted in
an email thread) has been fixed.

This is a new release, so there are no reverse dependencies.
Tests require instllation of ~1.2GB docker image
which also means docker needs to be available.
Examples also require a Splash instance (dockerized
or full install) to work. Therefore, as has been the
case since the previous CRAN version, examples
are marked as dontrun and tests do not run on CRAN.
They do run monthly and on every repo push in Travis
https://travis-ci.org/hrbrmstr/splashr/settings.

---
I can modify any of the above behavior to conform
to any CRAN policy I may be violating.

Submitting patch due to CRAN note.
License has been changed to MIT.

Removed clipboard functionality since that was the path of 
least resistance. 
\ No newline at end of file
As always, thanks to the CRAN team for their
herculean efforts to keep the R package universe
healthy!
\ No newline at end of file

M man/as_har.Rd => man/as_har.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/render-har.r
% Please edit documentation in R/render-har.R
\name{as_har}
\alias{as_har}
\title{Turn a generic Splash HAR response into a HAR object}

M man/as_httr_req.Rd => man/as_httr_req.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_req.r
% Please edit documentation in R/as_req.R
\name{as_httr_req}
\alias{as_httr_req}
\title{Create an httr verb request function from an HAR request}

M man/as_response.Rd => man/as_response.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/as_request.r
% Please edit documentation in R/as_request.R
\name{as_response}
\alias{as_response}
\title{Return a HAR entry response as an httr::response object}

M man/execute_lua.Rd => man/execute_lua.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/execute.r
% Please edit documentation in R/execute.R
\name{execute_lua}
\alias{execute_lua}
\title{Execute a custom rendering script and return a result.}

M man/get_content_size.Rd => man/get_content_size.Rd +3 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/content.r
% Please edit documentation in R/content.R
\name{get_content_size}
\alias{get_content_size}
\alias{get_body_size}


@@ 21,9 21,11 @@ Retrieve size of content | body | headers
\seealso{
Other splash_har_helpers: \code{\link{get_content_type}},
  \code{\link{get_har_entry}},
  \code{\link{get_header_val}}, \code{\link{get_headers}},
  \code{\link{get_request_type}},
  \code{\link{get_request_url}},
  \code{\link{get_response_body}},
  \code{\link{get_response_url}},
  \code{\link{har_entry_count}}
}
\concept{splash_har_helpers}

M man/get_content_type.Rd => man/get_content_type.Rd +3 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
% Please edit documentation in R/helpers.R
\name{get_content_type}
\alias{get_content_type}
\alias{is_content_type}


@@ 56,9 56,11 @@ Retrieve or test content type of a HAR request object
\seealso{
Other splash_har_helpers: \code{\link{get_content_size}},
  \code{\link{get_har_entry}},
  \code{\link{get_header_val}}, \code{\link{get_headers}},
  \code{\link{get_request_type}},
  \code{\link{get_request_url}},
  \code{\link{get_response_body}},
  \code{\link{get_response_url}},
  \code{\link{har_entry_count}}
}
\concept{splash_har_helpers}

M man/get_har_entry.Rd => man/get_har_entry.Rd +3 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
% Please edit documentation in R/helpers.R
\name{get_har_entry}
\alias{get_har_entry}
\title{Retrieve an entry by index from a HAR object}


@@ 17,9 17,11 @@ Retrieve an entry by index from a HAR object
\seealso{
Other splash_har_helpers: \code{\link{get_content_size}},
  \code{\link{get_content_type}},
  \code{\link{get_header_val}}, \code{\link{get_headers}},
  \code{\link{get_request_type}},
  \code{\link{get_request_url}},
  \code{\link{get_response_body}},
  \code{\link{get_response_url}},
  \code{\link{har_entry_count}}
}
\concept{splash_har_helpers}

A man/get_header_val.Rd => man/get_header_val.Rd +32 -0
@@ 0,0 1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.R
\name{get_header_val}
\alias{get_header_val}
\title{Retrieve the value of a specific response header}
\usage{
get_header_val(har_resp_obj, header)
}
\arguments{
\item{har_resp_obj}{HAR response object}

\item{header}{the header you want the value for}
}
\description{
Retrieve the value of a specific response header
}
\note{
the \code{name} column that contains the header key is normalized to lower case
as is the passed-in requested header. Also, if there is more than one only
the first is returned.
}
\seealso{
Other splash_har_helpers: \code{\link{get_content_size}},
  \code{\link{get_content_type}},
  \code{\link{get_har_entry}}, \code{\link{get_headers}},
  \code{\link{get_request_type}},
  \code{\link{get_request_url}},
  \code{\link{get_response_body}},
  \code{\link{get_response_url}},
  \code{\link{har_entry_count}}
}
\concept{splash_har_helpers}

A man/get_headers.Rd => man/get_headers.Rd +29 -0
@@ 0,0 1,29 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.R
\name{get_headers}
\alias{get_headers}
\title{Retrieve response headers as a data frame}
\usage{
get_headers(har_resp_obj)
}
\arguments{
\item{har_resp_obj}{HAR response object}
}
\description{
Retrieve response headers as a data frame
}
\note{
the \code{name} column that contains the header key is normalized to lower case
}
\seealso{
Other splash_har_helpers: \code{\link{get_content_size}},
  \code{\link{get_content_type}},
  \code{\link{get_har_entry}},
  \code{\link{get_header_val}},
  \code{\link{get_request_type}},
  \code{\link{get_request_url}},
  \code{\link{get_response_body}},
  \code{\link{get_response_url}},
  \code{\link{har_entry_count}}
}
\concept{splash_har_helpers}

M man/get_request_type.Rd => man/get_request_type.Rd +3 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
% Please edit documentation in R/helpers.R
\name{get_request_type}
\alias{get_request_type}
\alias{is_get}


@@ 22,8 22,10 @@ Retrieve or test request type
Other splash_har_helpers: \code{\link{get_content_size}},
  \code{\link{get_content_type}},
  \code{\link{get_har_entry}},
  \code{\link{get_header_val}}, \code{\link{get_headers}},
  \code{\link{get_request_url}},
  \code{\link{get_response_body}},
  \code{\link{get_response_url}},
  \code{\link{har_entry_count}}
}
\concept{splash_har_helpers}

M man/get_request_url.Rd => man/get_request_url.Rd +3 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
% Please edit documentation in R/helpers.R
\name{get_request_url}
\alias{get_request_url}
\title{Retrieve request URL}


@@ 16,8 16,10 @@ Retrieve request URL
Other splash_har_helpers: \code{\link{get_content_size}},
  \code{\link{get_content_type}},
  \code{\link{get_har_entry}},
  \code{\link{get_header_val}}, \code{\link{get_headers}},
  \code{\link{get_request_type}},
  \code{\link{get_response_body}},
  \code{\link{get_response_url}},
  \code{\link{har_entry_count}}
}
\concept{splash_har_helpers}

M man/get_response_body.Rd => man/get_response_body.Rd +3 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
% Please edit documentation in R/helpers.R
\name{get_response_body}
\alias{get_response_body}
\title{Retrieve the body content of a HAR entry}


@@ 22,8 22,10 @@ Retrieve the body content of a HAR entry
Other splash_har_helpers: \code{\link{get_content_size}},
  \code{\link{get_content_type}},
  \code{\link{get_har_entry}},
  \code{\link{get_header_val}}, \code{\link{get_headers}},
  \code{\link{get_request_type}},
  \code{\link{get_request_url}},
  \code{\link{get_response_url}},
  \code{\link{har_entry_count}}
}
\concept{splash_har_helpers}

A man/get_response_url.Rd => man/get_response_url.Rd +25 -0
@@ 0,0 1,25 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.R
\name{get_response_url}
\alias{get_response_url}
\title{Retrieve response URL}
\usage{
get_response_url(har_resp_obj)
}
\arguments{
\item{har_resp_obj}{HAR response object}
}
\description{
Retrieve response URL
}
\seealso{
Other splash_har_helpers: \code{\link{get_content_size}},
  \code{\link{get_content_type}},
  \code{\link{get_har_entry}},
  \code{\link{get_header_val}}, \code{\link{get_headers}},
  \code{\link{get_request_type}},
  \code{\link{get_request_url}},
  \code{\link{get_response_body}},
  \code{\link{har_entry_count}}
}
\concept{splash_har_helpers}

M man/har_entries.Rd => man/har_entries.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
% Please edit documentation in R/helpers.R
\name{har_entries}
\alias{har_entries}
\title{Retrieve just the HAR entries from a splashr request}

M man/har_entry_count.Rd => man/har_entry_count.Rd +4 -2
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helpers.r
% Please edit documentation in R/helpers.R
\name{har_entry_count}
\alias{har_entry_count}
\title{Retrieves number of HAR entries in a response}


@@ 16,8 16,10 @@ Retrieves number of HAR entries in a response
Other splash_har_helpers: \code{\link{get_content_size}},
  \code{\link{get_content_type}},
  \code{\link{get_har_entry}},
  \code{\link{get_header_val}}, \code{\link{get_headers}},
  \code{\link{get_request_type}},
  \code{\link{get_request_url}},
  \code{\link{get_response_body}}
  \code{\link{get_response_body}},
  \code{\link{get_response_url}}
}
\concept{splash_har_helpers}

M man/json_fromb64.Rd => man/json_fromb64.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.r
% Please edit documentation in R/utils.R
\name{json_fromb64}
\alias{json_fromb64}
\title{Convert a Base64 encoded string into an R object}

M man/render_har.Rd => man/render_har.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/render-har.r
% Please edit documentation in R/render-har.R
\name{render_har}
\alias{render_har}
\title{Return information about Splash interaction with a website in HAR format.}

M man/render_html.Rd => man/render_html.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/render-html.r
% Please edit documentation in R/render-html.R
\name{render_html}
\alias{render_html}
\title{Return the HTML of the javascript-rendered page.}

M man/render_jpeg.Rd => man/render_jpeg.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/render-jpg.r
% Please edit documentation in R/render-jpg.R
\name{render_jpeg}
\alias{render_jpeg}
\title{Return a image (in JPEG format) of the javascript-rendered page.}

M man/render_json.Rd => man/render_json.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/render-json.r
% Please edit documentation in R/render-json.R
\name{render_json}
\alias{render_json}
\title{Return a json-encoded dictionary with information about javascript-rendered webpage.}

M man/render_png.Rd => man/render_png.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/render-png.r
% Please edit documentation in R/render-png.R
\name{render_png}
\alias{render_png}
\title{Return an image (in PNG format) of the javascript-rendered page.}

M man/splash_add_lua.Rd => man/splash_add_lua.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_add_lua}
\alias{splash_add_lua}
\title{Add raw lua code into DSL call chain}

M man/splash_click.Rd => man/splash_click.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_click}
\alias{splash_click}
\title{Trigger mouse click event in web page.}

M man/splash_enable_javascript.Rd => man/splash_enable_javascript.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_enable_javascript}
\alias{splash_enable_javascript}
\title{Enable or disable execution of JavaSript code embedded in the page.}

M man/splash_focus.Rd => man/splash_focus.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_focus}
\alias{splash_focus}
\title{Focus on a document element provided by a CSS selector}

M man/splash_go.Rd => man/splash_go.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_go}
\alias{splash_go}
\title{Go to an URL.}

M man/splash_har.Rd => man/splash_har.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_har}
\alias{splash_har}
\title{Return information about Splash interaction with a website in HAR format.}

M man/splash_har_reset.Rd => man/splash_har_reset.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_har_reset}
\alias{splash_har_reset}
\title{Drops all internally stored HAR records.}

M man/splash_html.Rd => man/splash_html.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_html}
\alias{splash_html}
\title{Return a HTML snapshot of a current page.}

M man/splash_images.Rd => man/splash_images.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_images}
\alias{splash_images}
\title{Enable/disable images}

M man/splash_plugins.Rd => man/splash_plugins.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_plugins}
\alias{splash_plugins}
\title{Enable or disable browser plugins (e.g. Flash).}

M man/splash_png.Rd => man/splash_png.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_png}
\alias{splash_png}
\title{Return a screenshot of a current page in PNG format.}

M man/splash_press.Rd => man/splash_press.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_press}
\alias{splash_press}
\title{Trigger mouse press event in web page.}

M man/splash_private_mode.Rd => man/splash_private_mode.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_private_mode}
\alias{splash_private_mode}
\title{Enable or disable execution of JavaSript code embedded in the page.}

M man/splash_release.Rd => man/splash_release.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_release}
\alias{splash_release}
\title{Trigger mouse release event in web page.}

M man/splash_response_body.Rd => man/splash_response_body.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_response_body}
\alias{splash_response_body}
\title{Enable or disable response content tracking.}

M man/splash_send_keys.Rd => man/splash_send_keys.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_send_keys}
\alias{splash_send_keys}
\title{Send keyboard events to page context.}

M man/splash_send_text.Rd => man/splash_send_text.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_send_text}
\alias{splash_send_text}
\title{Send text as input to page context, literally, character by character.}

M man/splash_user_agent.Rd => man/splash_user_agent.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r, R/user-agents.R
% Please edit documentation in R/dsl.R, R/user-agents.R
\docType{data}
\name{splash_user_agent}
\alias{splash_user_agent}

M man/splash_wait.Rd => man/splash_wait.Rd +1 -1
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dsl.r
% Please edit documentation in R/dsl.R
\name{splash_wait}
\alias{splash_wait}
\title{Wait for a period time}

D vignettes/figures/splashr04.png => vignettes/figures/splashr04.png +0 -0

M vignettes/splashr_helpers.Rmd => vignettes/splashr_helpers.Rmd +219 -136
@@ 22,109 22,106 @@ Let's see what extra goodies `splashr` provides to make our lives easier.
## Handling `splashr` Objects

One of the most powerful functions in `splashr` is `render_har()`. You get every component loaded by dynamic web page, and some sites have upwards of 100 elements for any given page. How can you get to the bits that you want?

Let's use a different example that's a bit gnarly (i.e. you may need to work through it a couple times).

The U.K. government has an open data portal and one of the sections contains map tiles for various grid quadrants. It's a really nice site, but it's designed for interactive use and we want to be able to get to all the tile files programmatically. For our example, we'll be grabbing data from <http://environment.data.gov.uk/ds/survey/index.jsp#/survey?grid=TQ38>.

<img width="100%" style="max-width:100%" src="figures/splashr04.png"/>

Since we don't know what we need, let's use `render_har()` to get everything back into R:

We'll use `render_har()` to demonstrate how to find resources a site loads and use the data we gather to assess how "safe" these sites are &mdash; i.e. how many third-party javascript components they load and how safely they are loaded. Note that code in this vignette assumes a Splash instance is running locally on your system.

We'll check <https://apple.com/> first since Apple claims to care about our privacy. If that's true, then they'll will load few or no third-party content.

```{r eval=FALSE}
(apple <- render_har(url = "https://apple.com/", response_body = TRUE))
## --------HAR VERSION-------- 
## HAR specification version: 1.2 
## --------HAR CREATOR-------- 
## Created by: Splash 
## version: 3.3.1 
## --------HAR BROWSER-------- 
## Browser: QWebKit 
## version: 602.1 
## --------HAR PAGES-------- 
## Page id: 1 , Page title: Apple 
## --------HAR ENTRIES-------- 
## Number of entries: 84 
## REQUESTS: 
## Page: 1 
## Number of entries: 84 
##   -  https://apple.com/ 
##   -  https://www.apple.com/ 
##   -  https://www.apple.com/ac/globalnav/4/en_US/styles/ac-globalnav.built.css 
##   -  https://www.apple.com/ac/localnav/4/styles/ac-localnav.built.css 
##   -  https://www.apple.com/ac/globalfooter/4/en_US/styles/ac-globalfooter.built.css 
##      ........ 
##   -  https://www.apple.com/v/home/ea/images/heroes/iphone-xs/iphone_xs_0afef_mediumtall.jpg 
##   -  https://www.apple.com/v/home/ea/images/heroes/iphone-xr/iphone_xr_5e40f_mediumtall.jpg 
##   -  https://www.apple.com/v/home/ea/images/heroes/iphone-xs/iphone_xs_0afef_mediumtall.jpg 
##   -  https://www.apple.com/v/home/ea/images/heroes/macbook-air/macbook_air_mediumtall.jpg 
##   -  https://www.apple.com/v/home/ea/images/heroes/macbook-air/macbook_air_mediumtall.jpg 
```
library(splashr)
library(httr)
library(tidyverse)

pg_har <- render_har(url = "http://environment.data.gov.uk/ds/survey/index.jsp#/survey?grid=TQ38", response_body = TRUE, wait = 10)

entries <- har_entries(pg_har)

map_chr(entries, get_content_type) %>%
  table()
## .
## application/json        image/gif        image/png         text/css        text/html
##               33                1               24                1                1
##  text/javascript
##                1

map_chr(entries, get_request_url)
##  [1] "http://environment.data.gov.uk/ds/survey/index.jsp#/survey?grid=TQ38"
##  [2] "http://www.geostore.com/environment-agency/survey.full.min.170718.css"
##  [3] "http://www.geostore.com/environment-agency/survey.full.min.170718.js"
##  [4] "http://environment.data.gov.uk/ds/survey/images/busy.gif"
##  [5] "http://environment.data.gov.uk/ds/survey/rest/config/download?_=1503933543160"
##  [6] "http://www.geostore.com/environment-agency/rest/grid/EA_SUPPLIED_OS_10KM/TQ38"
##  [7] "http://www.geostore.com/environment-agency/rest/gazetteer/search/postcode/TQ38"
##  [8] "http://environment.data.gov.uk/ds/survey/images/download.png"
##  [9] "http://www.geostore.com/environment-agency/images/dgu-header-white.png"
## [10] "http://www.geostore.com/environment-agency/images/airbus-footer-logo.png"
## [11] "http://www.geostore.com/environment-agency/images/ogl-symbol-41px-retina-black.png"
## [12] "http://environment.data.gov.uk/ds/survey/fonts/glyphicons-halflings-regular.woff2"
## [13] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=390919.47990708053%2C234551.68794424832%2C469103.375888497%2C312735.5839256648"
## [14] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=469103.37588850036%2C234551.68794424832%2C547287.2718699168%2C312735.5839256648"
## [15] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=390919.47990708053%2C312735.5839256644%2C469103.375888497%2C390919.4799070809"
## [16] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=390919.47990708053%2C156367.7919628322%2C469103.375888497%2C234551.68794424867"
## [17] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=312735.5839256644%2C234551.68794424832%2C390919.4799070809%2C312735.5839256648"
## [18] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=469103.37588850036%2C312735.5839256644%2C547287.2718699168%2C390919.4799070809"
## [19] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=469103.37588850036%2C156367.7919628322%2C547287.2718699168%2C234551.68794424867"
## [20] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=312735.5839256644%2C312735.5839256644%2C390919.4799070809%2C390919.4799070809"
## [21] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=312735.5839256644%2C156367.7919628322%2C390919.4799070809%2C234551.68794424867"
## [22] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=547287.2718699165%2C234551.68794424832%2C625471.1678513329%2C312735.5839256648"
## [23] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=547287.2718699165%2C312735.5839256644%2C625471.1678513329%2C390919.4799070809"
## [24] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=547287.2718699165%2C156367.7919628322%2C625471.1678513329%2C234551.68794424867"
## [25] "http://www.geostore.com/environment-agency/rest/grid/EA_SUPPLIED_OS_10KM/535000/185000"
## [26] "http://www.geostore.com/environment-agency/rest/gazetteer/search/postcode/TQ38 - OS"
## [27] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=527741.2978745624%2C175913.76595818624%2C537514.2848722395%2C185686.7529558633"
## [28] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=527741.2978745624%2C185686.75295586511%2C537514.2848722395%2C195459.7399535422"
## [29] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=537514.2848722376%2C175913.76595818624%2C547287.2718699146%2C185686.7529558633"
## [30] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=537514.2848722376%2C185686.75295586511%2C547287.2718699146%2C195459.7399535422"
## [31] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=517968.31087688357%2C175913.76595818624%2C527741.2978745606%2C185686.7529558633"
## [32] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=517968.31087688357%2C185686.75295586511%2C527741.2978745606%2C195459.7399535422"
## [33] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=547287.2718699165%2C175913.76595818624%2C557060.2588675935%2C185686.7529558633"
## [34] "http://www.geostore.com/environment-agency/WMSExpeditedAdapter?SESSIONID=UEADOWNLOAD&CID=CDEFAULTEAGEOSTORE&UID=UEADOWNLOAD&PASSWORD=A1r5us2015DLD&INTERFACE=EAPUBLICDOWNLOAD&MAP=%2Fvar%2Fmapserver%2Fmapfiles%2FEAPUBLIC.map&SERVICE=WMS&VERSION=1.3.0&REQUEST=GetMap&FORMAT=image%2Fpng&TRANSPARENT=true&LAYERS=EA-DLD-OSRASTERS&TILED=false&SRS=EPSG%3A27700&WIDTH=256&HEIGHT=256&CRS=EPSG%3A27700&STYLES=&BBOX=547287.2718699165%2C185686.75295586511%2C557060.2588675935%2C195459.7399535422"
## [35] "http://www.geostore.com/environment-agency/rest/product/EA_SUPPLIED_OS_10KM/TQ38?catalogName=Survey"
## [36] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2003-EA"
## [37] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2005-EA"
## [38] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2009-EA"
## [39] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2015-EA"
## [40] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-GROUP-ENGLAND-EA"
## [41] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-1999-EA"
## [42] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2002-EA"
## [43] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2007-EA"
## [44] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2011-EA"
## [45] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-2012-EA"
## [46] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2003-EA"
## [47] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2005-EA"
## [48] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2009-EA"
## [49] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2015-EA"
## [50] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-GROUP-ENGLAND-EA"
## [51] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2002-EA"
## [52] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2007-EA"
## [53] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2011-EA"
## [54] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-2012-EA"
## [55] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-LAZ-ENGLAND-EA"
## [56] "http://www.geostore.com/environment-agency/rest/product/group/OAP-INCIDENTRESPONSE-ENGLAND-EA"
## [57] "http://www.geostore.com/environment-agency/rest/product/group/VAP-NIGHTTIME-ENGLAND-2012-EA"
## [58] "http://www.geostore.com/environment-agency/rest/product/group/VAP-RGB-ENGLAND-2008-EA"
## [59] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DSM-TIMESTAMPED-ENGLAND-EA"
## [60] "http://www.geostore.com/environment-agency/rest/product/group/LIDAR-DTM-TIMESTAMPED-ENGLAND-EA"
## [61] "http://www.geostore.com/environment-agency/rest/product/group/VAP-NIGHTTIME-ENGLAND-EA"
## [62] "http://www.geostore.com/environment-agency/rest/product/group/VAP-RGB-ENGLAND-EA"

The HAR output shows that when you visit `apple.com` your browser makes at least 84 requests for resources. We can see what types of content is loaded:

```{r eval=FALSE}
har_entries(apple) %>% 
  purrr::map_chr(get_content_type) %>% 
  table(dnn = "content_type") %>% 
  broom::tidy() %>% 
  dplyr::arrange(desc(n))
## # A tibble: 9 x 2
##   content_type                 n
##   <chr>                    <int>
## 1 font/woff2                  27
## 2 application/x-javascript    15
## 3 image/svg+xml               10
## 4 text/css                     9
## 5 image/jpeg                   7
## 6 image/png                    6
## 7 application/font-woff        4
## 8 text/html                    3
## 9 application/json             2
```

Many of those resources are just image tiles for the map you see in the screenshot. Let's try to find data files:
Lots of calls to fonts, 15 javascript files and even 2 JSON files. Let's see what the domains are for these resources:

```{r eval=FALSE}
har_entries(apple) %>% 
  purrr::map_chr(get_response_url) %>% 
  purrr::map_chr(urltools::domain) %>% 
  unique()
## [1] "apple.com"               "www.apple.com"           "securemetrics.apple.com"
```
map_lgl(entries, is_json)
##  [1] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE    NA FALSE
## [14] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE
## [27] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [40]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [53]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE

Wow! Only calls to Apple-controlled resources. 

I wonder what's in those JSON files, though:

```{r eval=FALSE}
har_entries(apple) %>% 
  purrr::keep(is_json) %>% 
  purrr::map(get_response_body, "text") %>% 
  purrr::map(jsonlite::fromJSON) %>% 
  str(3)
## List of 2
##  $ :List of 2
##   ..$ locale        :List of 3
##   .. ..$ country      : chr "us"
##   .. ..$ attr         : chr "en-US"
##   .. ..$ textDirection: chr "ltr"
##   ..$ localeswitcher:List of 7
##   .. ..$ name        : chr "localeswitcher"
##   .. ..$ metadata    : Named list()
##   .. ..$ displayIndex: int 1
##   .. ..$ copy        :List of 5
##   .. ..$ continue    :List of 5
##   .. ..$ exit        :List of 5
##   .. ..$ select      :List of 5
##  $ :List of 2
##   ..$ id     : chr "ad6ca319-1ef1-20da-c4e0-5185088996cb"
##   ..$ results:'data.frame': 2 obs. of  2 variables:
##   .. ..$ sectionName   : chr [1:2] "quickLinks" "suggestions"
##   .. ..$ sectionResults:List of 2
```

Now, we're getting somewhere. The `har_entries()` function makes it easy to get to the individual elements and we can use the `is_json()` helper with `purrr` functions to slice and dice at will. Here are all the `is_` functions you can use with HAR objects:
So, locale metadata and something to do with on-page links/suggestions.

As demonstrated, the `har_entries()` function makes it easy to get to the individual elements and we used the `is_json()` helper with `purrr` functions to slice and dice the structure at will. Here are all the `is_` functions you can use with HAR objects:

- `is_binary()`
- `is_content_type()`


@@ 145,60 142,141 @@ You can also use various `get_` helpers to avoid gnarly `$` or `[[]]` constructs
- `get_body_size()` ---	Retrieve size of content | body | headers
- `get_content_size()` ---	Retrieve size of content | body | headers
- `get_content_type()` ---	Retrieve or test content type of a HAR request object
- `get_headers` --- Retrieve response headers as a data frame
- `get_headers_size()` ---	Retrieve size of content | body | headers
- `get_request_type()` ---	Retrieve or test request type
- `get_request_url()` ---	Retrieve request URL
- `get_response_url()` ---	Retrieve response URL
- `get_response_body()` ---	Retrieve the body content of a HAR entry

We've seen one example of them already, here's another:

```
map_dbl(entries, get_body_size)
##  [1]    1180  132571 1211097     701      -1     466   20342     579    4489
## [10]   13332    1774   18028   59782   48008   55270   48323   42879   36116
## [19]   69560   59602   58135   37443   17266   49840     464   20342   14579
## [28]   14626   16265   14473   14565   13639   15106   12383   41887     186
## [37]     186     186     186     185     186     186     186     186     186
## [46]     186     186     186     186     185     186     186     186     186
## [55]     223     286     170     158     272     272     280     267
```{r eval=FALSE}
har_entries(apple) %>% 
  purrr::map_dbl(get_body_size)
##  [1]      0  54521  95644  98069  43183   8689  19035 794210  66487 133730 311054  13850 199928 161859  90322 343189  19035
## [18] 794210  66487 133730    554    802   1002   1160   1694    264   1082   1661    390    416 108468 108828 100064 109728
## [35] 109412  99196 108856 109360 108048   8868  10648  10380  10476    137 311054  13850   3192   3253   4130   2027   1247
## [52]   1748    582 199928 109628 107832 109068 100632 108928  97812 108312 108716 107028  65220  73628  72188  72600  70400
## [69]  73928  72164  73012  71080   1185 161859  90322 343189      0    491  60166  58509  60166  58509  53281  53281
```

You can bop around the data and you'll find that the one we want is a "catalog" file). We can look for it with these tools:
So, a visit to Apple's page transfers nearly 8MB of content down to your browser.

```
idx <- which(map_lgl(entries, is_json))
California also claims to care about your privacy, but is it _really_ true?

map_chr(entries[idx], get_request_url) %>%
  grepl("catalog", .) %>%
  which()
## [1] 6
```
```{r eval=FALSE}
ca <- render_har(url = "https://www.ca.gov/", response_body = TRUE)

and, then use another helper `as_response()` which makes the HAR entry behave like an `httr` `response` object so we can use familiar idioms to get the data.
har_entries(ca) %>% 
  purrr::map_chr(~.x$response$url %>% urltools::domain()) %>% 
  unique()
##  [1] "www.ca.gov"                      "fonts.googleapis.com"            "california.azureedge.net"       
##  [4] "portal-california.azureedge.net" "az416426.vo.msecnd.net"          "fonts.gstatic.com"              
##  [7] "ssl.google-analytics.com"        "cse.google.com"                  "translate.google.com"           
## [10] "api.stateentityprofile.ca.gov"   "translate.googleapis.com"        "www.google.com"                 
## [13] "clients1.google.com"             "www.gstatic.com"                 "platform.twitter.com"           
## [16] "dc.services.visualstudio.com"   
```

Yikes! It _sure_ doesn't look that way given all the folks they let track you when you visit their main page. Are they executing javascript from those sites?

```{r eval=FALSE}
## # A tibble: 8 x 2
##   dom                      type                    
##   <chr>                    <chr>                   
## 1 california.azureedge.net application/javascript  
## 2 california.azureedge.net application/x-javascript
## 3 az416426.vo.msecnd.net   application/x-javascript
## 4 cse.google.com           text/javascript         
## 5 translate.google.com     text/javascript         
## 6 translate.googleapis.com text/javascript         
## 7 www.google.com           text/javascript         
## 8 platform.twitter.com     application/javascript  
```
as_response(entries[idx][[6]]) %>%
  content(as = "text", encoding = "UTF-8") %>%
  jsonlite::fromJSON(flatten=TRUE) %>%
  tbl_df() %>%
  glimpse()
## Observations: 99
## Variables: 12
## $ id              <int> 170653, 170659, 170560, 170565, 178189, 178307, 201556, 238312, 238307, 2383...
## $ guid            <chr> "54595a8c-b267-11e6-93d3-9457a5578ca0", "63176082-b267-11e6-93d3-9457a5578ca...
## $ pyramid         <chr> "LIDAR-DSM-1M-ENGLAND-2003-EA", "LIDAR-DSM-1M-ENGLAND-2003-EA", "LIDAR-DSM-1...
## $ tileReference   <chr> "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ38", "TQ3...
## $ fileName        <chr> "LIDAR-DSM-1M-2003-TQ38se.zip", "LIDAR-DSM-1M-2003-TQ38ne.zip", "LIDAR-DSM-1...
## $ coverageLayer   <chr> "LIDAR-DSM-1M-ENGLAND-2003-EA-MD-YY", "LIDAR-DSM-1M-ENGLAND-2003-EA-MD-YY", ...
## $ fileSize        <int> 76177943, 52109669, 59326278, 18048623, 11919071, 13204420, 511124, 11736980...
## $ descriptiveName <chr> "LIDAR Tiles DSM at 1m spatial resolution 2003", "LIDAR Tiles DSM at 1m spat...
## $ description     <chr> "1m", "1m", "1m", "1m", "1m", "1m", "1m", "1m", "1m", "1m", "1m", "DSM at 1m...
## $ groupName       <chr> "LIDAR-DSM-TIMESTAMPED-ENGLAND-2003-EA", "LIDAR-DSM-TIMESTAMPED-ENGLAND-2003...
## $ displayOrder    <int> -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100...
## $ metaDataUrl     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "https://data.gov.uk/dataset/lid...

We can also examine the response headers to check for signs of safety as well (i.e. are there content security policy headers or other types of security-oriented headers):

```{r eval=FALSE}
har_entries(ca) %>% 
  purrr::map_df(get_headers) %>% 
  dplyr::count(name, sort=TRUE) %>% 
  print(n=50)
## # A tibble: 42 x 2
##    name                              n
##    <chr>                         <int>
##  1 date                            149
##  2 server                          148
##  3 content-type                    142
##  4 last-modified                   126
##  5 etag                            104
##  6 content-encoding                 83
##  7 access-control-allow-origin      78
##  8 accept-ranges                    74
##  9 vary                             69
## 10 content-length                   66
## 11 x-ms-ref                         57
## 12 x-ms-ref-originshield            57
## 13 access-control-expose-headers    56
## 14 content-md5                      51
## 15 x-ms-blob-type                   51
## 16 x-ms-lease-status                51
## 17 x-ms-request-id                  51
## 18 x-ms-version                     51
## 19 cache-control                    37
## 20 expires                          34
## 21 alt-svc                          30
## 22 x-xss-protection                 29
## 23 x-content-type-options           27
## 24 age                              22
## 25 transfer-encoding                20
## 26 timing-allow-origin              14
## 27 x-powered-by                     14
## 28 access-control-allow-headers      7
## 29 pragma                            6
## 30 request-context                   5
## 31 x-aspnet-version                  5
## 32 x-frame-options                   4
## 33 content-disposition               3
## 34 access-control-max-age            2
## 35 content-language                  2
## 36 p3p                               2
## 37 x-cache                           2
## 38 access-control-allow-methods      1
## 39 location                          1
## 40 set-cookie                        1
## 41 strict-transport-security         1
## 42 x-ms-session-id                   1
```

Nowm, we have the data file download and metadata info.
Unfortunately, they do let Google and Twitter execute javascript.

They seem to use quite a bit of Microsoft tech. Let's look at the HTTP servers they directly and indirectly rely on:

```{r eval=FALSE}
har_entries(ca) %>% 
  purrr::map_chr(get_header_val, "server") %>% 
  table(dnn = "server") %>% 
  broom::tidy() %>% 
  dplyr::arrange(desc(n))
## # A tibble: 14 x 2
##    server                                           n
##    <chr>                                        <int>
##  1 Apache                                          55
##  2 Windows-Azure-Blob/1.0 Microsoft-HTTPAPI/2.0    50
##  3 sffe                                            23
##  4 Microsoft-IIS/10.0                               7
##  5 ESF                                              3
##  6 HTTP server (unknown)                            2
##  7 ECAcc (bsa/EAD2)                                 1
##  8 ECD (sjc/16E0)                                   1
##  9 ECD (sjc/16EA)                                   1
## 10 ECD (sjc/16F4)                                   1
## 11 ECD (sjc/4E95)                                   1
## 12 ECD (sjc/4E9F)                                   1
## 13 ECS (bsa/EB1F)                                   1
## 14 gws                                              1
```

## Impersonating Other Browsers



@@ 216,12 294,17 @@ The various `render_` functions present themselves as modern WebKit Linux browse
- `ua_linux_chrome`
- `ua_linux_firefox`
- `ua_ios_safari`
- `ua_android_samsung`
- `ua_kindle`
- `ua_ps4`
- `ua_apple_tv`
- `ua_chromecast`

NOTE: These can be used with `curl`, `httr`, `rvest` and `RCurl` calls as well.

We can wee it in action:

```
```{r eval=FALSE}
URL <- "https://httpbin.org/user-agent"

splash_local %>%


@@ 251,7 334,7 @@ The `install_splash()` will pull the image locally for you. It takes a bit (the 

The best way to use start/stop is to:

```
```{r eval=FALSE}
spi <- start_splash()

# ... scraping tasks ...