~hrbrmstr/urlscan

ref: 4d8ceac4f9e24a72e4f366ae192dc24b617e8eb9 urlscan/R/results.R -rw-r--r-- 1.5 KiB
4d8ceac4boB Rudis v0.2.0 1 year, 8 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#' Retrieve detailed results for a given scan ID
#'
#' @md
#' @param scan_id scan id (UUID)
#' @param include_dom (logical) include the website DOM? (default: `FALSE`)
#' @param include_shot (logical) include the website screen shot? (default: `FALSE`)
#' @return `list` with `scan_result` task, page, content lists, fetch data,
#'         connection metadata and computed stats.\cr
#'         \cr
#'         The list can also include `dom` if
#'         `include_dom` is `TRUE`. If so, `dom` will be an `httr` `response` object
#'         since the data could be binary. Use `httr` tools to process it.\cr
#'         \cr
#'         The list can also include `screenshot` if `include_shot` is `TRUE` and
#'         a screenshot was available.
#' @export
urlscan_result <- function(scan_id, include_dom=FALSE, include_shot=FALSE) {

  httr::GET(
    url = sprintf("https://urlscan.io/api/v1/result/%s", scan_id),
    .URLSCANUA
  ) -> res

  httr::stop_for_status(res)

  res <- httr::content(res, as="text")

  res <- jsonlite::fromJSON(res)

  out <- list(scan_result = res)

  if (include_dom) {

    httr::GET(
      url = sprintf("https://urlscan.io/dom/%s", scan_id),
      .URLSCANUA
    ) -> res

    out$dom <- res

  }

  if (include_shot) {

    httr::GET(
      url = sprintf("https://urlscan.io/screenshots/%s.png", scan_id),
      .URLSCANUA
    ) -> res

    if (httr::status_code(res) == 200) out$screenshot <-  magick::image_read(res$content)

  }

  class(out) <- c("urlscan_result", "list")

  out

}