~hrbrmstr/urlscan

4d8ceac4f9e24a72e4f366ae192dc24b617e8eb9 — boB Rudis 1 year, 7 months ago af7b473
v0.2.0
M DESCRIPTION => DESCRIPTION +3 -3
@@ 1,8 1,8 @@
Package: urlscan
Type: Package
Title: Analyze Websites and Resources They Request
Version: 0.1.0
Date: 2018-03-23
Version: 0.2.0
Date: 2019-02-03
Authors@R: c(
    person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "cre"), 
           comment = c(ORCID = "0000-0001-5670-2640"))


@@ 26,4 26,4 @@ Imports:
    httr,
    jsonlite,
    magick
RoxygenNote: 6.0.1.9000
RoxygenNote: 6.1.1

M NAMESPACE => NAMESPACE +5 -7
@@ 1,14 1,12 @@
# Generated by roxygen2: do not edit by hand

S3method(print,urlscan)
S3method(print,urlscan_result)
S3method(print,urlscan_submit)
export(urlscan_api_key)
export(urlscan_result)
export(urlscan_search)
export(urlscan_submit)
importFrom(httr,GET)
importFrom(httr,POST)
importFrom(httr,content)
importFrom(httr,status_code)
importFrom(httr,stop_for_status)
importFrom(httr,user_agent)
importFrom(httr,warn_for_status)
import(httr)
importFrom(jsonlite,fromJSON)
importFrom(magick,image_read)

M NEWS.md => NEWS.md +6 -0
@@ 1,2 1,8 @@
0.2.0
* Fixed namespace imports
* Changed user agent generation
* Updated for the new API including support for authentication
* Added object printers

0.1.0 
* Initial release

A R/aaa.R => R/aaa.R +7 -0
@@ 0,0 1,7 @@
httr::user_agent(
  sprintf(
    "urlscanR package v%s: (<%s>)",
    utils::packageVersion("urlscan"),
    utils::packageDescription("urlscan")$URL
  )
) -> .URLSCANUA

A R/api-key.R => R/api-key.R +36 -0
@@ 0,0 1,36 @@
#' Get or set URLSCAN_API_KEY value
#'
#' The API wrapper functions in this package all rely on a urlscan API
#' key residing in the environment variable `URLSCAN_API_KEY`. The
#' easiest way to accomplish this is to set it in the `.Renviron` file in your
#' home directory.
#'
#' @md
#' @param force Force setting a new urlscan API key for the current environment?
#' @return atomic character vector containing the urlscan API key
#' @references <https://urlscan.io/about-api/>
#' @export
urlscan_api_key <- function(force = FALSE) {
  env <- Sys.getenv("URLSCAN_API_KEY")
  if (!identical(env, "") && !force) return(env)

  if (!interactive()) {
    stop(
      "Please set env var URLSCAN_API_KEY to your urlscan API key",
      call. = FALSE
    )
  }

  message("Couldn't find env var URLSCAN_API_KEY See ?urlscan_api_key for more details.")
  message("Please enter your urlscan API key and press enter:")
  pat <- readline(": ")

  if (identical(pat, "")) {
    stop("urlscan API key entry failed", call. = FALSE)
  }

  message("Updating URLSCAN_API_KEY env var to PAT")
  Sys.setenv(urlscan_API_KEY = pat)

  pat
}

A R/print.R => R/print.R +76 -0
@@ 0,0 1,76 @@
#' Print for urlscan_submit objects
#'
#' @param x urlscan_submit object
#' @param ... ignored
#' @keywords internal
#' @export
print.urlscan_submit <- function(x, ...) {

  cat(
    sprintf("  URL Submitted: %s", x$url),
    sprintf("  Submission ID: %s", x$uuid),
    sprintf("Submission Type: %s", x$visibility),
    sprintf("Submission Note: %s", x$message),
    sep = "\n"
  )

  invisible(x)

}

#' Print for urlscan objects
#'
#' @param x urlscan object
#' @param ... ignored
#' @keywords internal
#' @export
print.urlscan <- function(x, ...) {

  if (length(x$results) == 0) {
    cat("No search results\n")
  } else {
    cat(
      sprintf(
        "Total submissions for %s: %s.",
        x$results$page$domain[1],
        nrow(x$results$page)
      ),
      "Last submission results:",
      sprintf("Scan ID: %s", x$results$`_id`[1]),
      sprintf("     IP: %s", x$results$page$ip[1]),
      sprintf("     AS: %s / %s", x$results$page$asn[1], x$results$page$asnname[1]),
      sprintf("Country: %s", x$results$page$country[1]),
      sep = "\n"
    )
  }

  invisible(x)

}

#' Print for urlscan_result objects
#'
#' @param x urlscan_result object
#' @param ... ignored
#' @keywords internal
#' @export
print.urlscan_result <- function(x, ...) {

  if (length(x$scan_result) == 0) {
    cat("No result data\n")
  } else {
    cat(
      sprintf("            URL: %s", x$scan_result$page$url[1]),
      sprintf("        Scan ID: %s", x$scan_result$task$uuid[1]),
      sprintf("      Malicious: %s", x$scan_result$stats$malicious[1] == 1),
      sprintf("     Ad Blocked: %s", x$scan_result$stats$adBlocked[1] == 1),
      sprintf("    Total Links: %s", x$scan_result$stats$totalLinks),
      sprintf("Secure Requests: %s", x$scan_result$stats$secureRequests[1]),
      sprintf("   Secure Req %%: %s%%", x$scan_result$stats$securePercentage[1]),
      sep = "\n"
    )
  }

  invisible(x)

}

M R/results.R => R/results.R +5 -5
@@ 18,7 18,7 @@ urlscan_result <- function(scan_id, include_dom=FALSE, include_shot=FALSE) {

  httr::GET(
    url = sprintf("https://urlscan.io/api/v1/result/%s", scan_id),
    httr::user_agent("urlscan #rstats package : https://github.com/hrbrmstr/urlscan")
    .URLSCANUA
  ) -> res

  httr::stop_for_status(res)


@@ 27,15 27,13 @@ urlscan_result <- function(scan_id, include_dom=FALSE, include_shot=FALSE) {

  res <- jsonlite::fromJSON(res)

  class(res) <- c("urlscan_result", "list")

  out <- list(scan_result = res)

  if (include_dom) {

    httr::GET(
      url = sprintf("https://urlscan.io/dom/%s", scan_id),
      httr::user_agent("urlscan #rstats package : https://github.com/hrbrmstr/urlscan")
      .URLSCANUA
    ) -> res

    out$dom <- res


@@ 46,13 44,15 @@ urlscan_result <- function(scan_id, include_dom=FALSE, include_shot=FALSE) {

    httr::GET(
      url = sprintf("https://urlscan.io/screenshots/%s.png", scan_id),
      httr::user_agent("urlscan #rstats package : https://github.com/hrbrmstr/urlscan")
      .URLSCANUA
    ) -> res

    if (httr::status_code(res) == 200) out$screenshot <-  magick::image_read(res$content)

  }

  class(out) <- c("urlscan_result", "list")

  out

}
\ No newline at end of file

M R/submit.R => R/submit.R +10 -4
@@ 16,22 16,28 @@
#' @param public Public or private results? `TRUE` (default) = public.
#' @param custom_agent (character) Override User-Agent for this scan
#' @param referer (character) Override HTTP referer for this scan
#' @param api_key your API key. See [urlscan_api_key()]
#' @return The response to the API call will give you the ID and API endpoint for t
#'         he scan, you can use it to retrieve the result after waiting for a short while.
#' @references <https://urlscan.io/about-api/#submission>
#' @export
urlscan_submit <- function(url, public=TRUE, custom_agent=NULL, referer=NULL) {
urlscan_submit <- function(url, public=TRUE, custom_agent=NULL, referer=NULL,
                           api_key = urlscan_api_key()) {

   httr::POST(
     url = "https://urlscan.io/api/v1/scan/",
     content_type_json(),
     query = list(
     encode = "json",
     httr::content_type_json(),
     body = list(
       url = url,
       public = if (public[1]) "on" else NULL,
       customagent = custom_agent,
       referer = referer
     ),
     httr::user_agent("urlscan #rstats package : https://github.com/hrbrmstr/urlscan")
     httr::add_headers(
       `API-Key` = api_key
     ),
     .URLSCANUA
   ) -> res

   httr::stop_for_status(res)

M R/urlscan-package.R => R/urlscan-package.R +1 -1
@@ 10,7 10,7 @@
#' @name urlscan
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @importFrom httr GET user_agent content stop_for_status warn_for_status status_code POST
#' @import httr
#' @importFrom jsonlite fromJSON
#' @importFrom magick image_read
NULL

M R/urlscan.R => R/urlscan.R +1 -2
@@ 2,7 2,6 @@
#'
#' urlscan.io uses an Elasticsearch back-end and enables querying by a number
#' of fields, including:
#'
#' - `domain`: Domain (or a subdomain of it) is contacted in one of the requests
#' - `page.domain`: Domain (or a subdomain of it) is the first domain to be contacted
#' - `ip`: The IP or subnet are contacted in one request


@@ 35,7 34,7 @@ urlscan_search <- function(query, size=100, offset=0, sort=NULL) {
       offset = offset,
       sort = sort
     ),
     httr::user_agent("urlscan #rstats package : https://github.com/hrbrmstr/urlscan")
     .URLSCANUA
   ) -> res

   httr::stop_for_status(res)

M README.Rmd => README.Rmd +17 -19
@@ 1,5 1,7 @@
---
output: rmarkdown::github_document
editor_options: 
  chunk_output_type: console
---

# urlscan


@@ 8,8 10,6 @@ Analyze Websites and Resources They Request

## Description

WIP

The <urlscan.io> service provides an 'API' enabling analysis of 
websites and the resources they request. Much like the 'Inspector' of your 
browser, <urlscan.io> will let you take a look at the individual resources 


@@ 27,6 27,10 @@ The following functions are implemented:
## Installation

```{r eval=FALSE}
devtools::install_git("https://git.sr.ht/~hrbrmstr/urlscan")
# or
devtools::install_gitlab("hrbrmstr/urlscan")
# or
devtools::install_github("hrbrmstr/urlscan")
```



@@ 38,33 42,27 @@ options(width=120)

```{r message=FALSE, warning=FALSE, error=FALSE}
library(urlscan)
library(tidyverse) # for demos

# current verison
packageVersion("urlscan")
```

```{r}
library(tidyverse)

x <- urlscan_search("domain:r-project.org")

bind_cols(
  select(x$results$task, -options) %>% 
    mutate(user_agent = x$results$task$options$useragent)
  ,x$results$stats, 
  x$results$page
) %>% 
  mutate(id = x$results$`_id`) %>% 
  mutate(result_api_url = x$results$result) %>% 
  tbl_df() -> xdf

xdf

glimpse(xdf)
as_tibble(x$results$task) %>% 
  bind_cols(as_tibble(x$results$page)) %>% 
  mutate(
    time = anytime::anytime(time),
    id = x$results$`_id`
  ) %>%
  arrange(desc(time)) %>% 
  select(url, country, server, ip, id) -> xdf

ures <- urlscan_result(xdf$id[2], TRUE, TRUE)
ures <- urlscan_result(xdf$id[2], include_dom = TRUE, include_shot = TRUE)

str(ures$scan_result, 2)
ures

magick::image_write(ures$screenshot, "img/shot.png")
```

M README.md => README.md +27 -142
@@ 5,26 5,28 @@ Analyze Websites and Resources They Request

## Description

WIP

The \<urlscan.io\> service provides an ‘API’ enabling analysis of
websites and the resources they request. Much like the ‘Inspector’ of
your browser, \<urlscan.io\> will let you take a look at the individual
resources that are requested when a site is loaded. Tools are provided
to search public \<urlscans.io\> scan submissions/results and submit URLs 
for scanning.
to search public \<urlscans.io\> scan submissions/results and submit
URLs for scanning.

## What’s Inside The Tin

The following functions are implemented:

  - `urlscan_search`: Perform a urlscan.io query
  - `urlscan_result`:	Retrieve detailed results for a given scan ID
  - `urlscan_submit`:	Submit a URL for scanning
  - `urlscan_result`: Retrieve detailed results for a given scan ID
  - `urlscan_submit`: Submit a URL for scanning

## Installation

``` r
devtools::install_git("https://git.sr.ht/~hrbrmstr/urlscan")
# or
devtools::install_gitlab("hrbrmstr/urlscan")
# or
devtools::install_github("hrbrmstr/urlscan")
```



@@ 32,155 34,38 @@ devtools::install_github("hrbrmstr/urlscan")

``` r
library(urlscan)
library(tidyverse) # for demos

# current verison
packageVersion("urlscan")
```

    ## [1] '0.1.0'

``` r
library(tidyverse)
```

    ## ── Attaching packages ────────────────────────────────────── tidyverse 1.2.1 ──

    ## ✔ ggplot2 2.2.1.9000     ✔ purrr   0.2.4     
    ## ✔ tibble  1.4.2          ✔ dplyr   0.7.4     
    ## ✔ tidyr   0.7.2          ✔ stringr 1.2.0     
    ## ✔ readr   1.1.1          ✔ forcats 0.2.0

    ## ── Conflicts ───────────────────────────────────────── tidyverse_conflicts() ──
    ## ✖ dplyr::filter() masks stats::filter()
    ## ✖ dplyr::lag()    masks stats::lag()
    ## [1] '0.2.0'

``` r
x <- urlscan_search("domain:r-project.org")

bind_cols(
  select(x$results$task, -options) %>% 
    mutate(user_agent = x$results$task$options$useragent)
  ,x$results$stats, 
  x$results$page
) %>% 
  mutate(id = x$results$`_id`) %>% 
  mutate(result_api_url = x$results$result) %>% 
  tbl_df() -> xdf

xdf
```

    ## # A tibble: 12 x 22
    ##    visibility method  time  source url    user_agent   uniqIPs consoleMsgs dataLength encodedDataLeng… requests country
    ##    <chr>      <chr>   <chr> <chr>  <chr>  <chr>          <int>       <int>      <int>            <int>    <int> <chr>  
    ##  1 public     manual  2017… web    https… Mozilla/5.0…       1           0      12758              676        2 AT     
    ##  2 public     manual  2017… web    https… Mozilla/5.0…       1           0      14396              676        2 AT     
    ##  3 public     manual  2017… web    https… Mozilla/5.0…       2           0     286138            97317        6 AT     
    ##  4 public     manual  2017… web    https… <NA>               1           0          0                0        1 AT     
    ##  5 public     manual  2017… web    https… <NA>               1           0          0                0        1 AT     
    ##  6 public     manual  2017… web    https… <NA>               1           0       5284             1813        2 AT     
    ##  7 public     manual  2017… web    https… <NA>               1           0       5284             1813        2 AT     
    ##  8 public     manual  2017… web    https… <NA>               1           0       4297             1640        2 AT     
    ##  9 public     manual  2017… web    https… <NA>               1           0      14722             6288        9 AT     
    ## 10 public     manual  2017… web    https… <NA>               2           0     285893            97695        6 AT     
    ## 11 public     automa… 2017… hacke… https… <NA>               1           0     343270           101327        4 AT     
    ## 12 public     automa… 2017… hacke… https… <NA>               1           0     345452           101840        4 AT     
    ## # ... with 10 more variables: server <chr>, city <chr>, domain <chr>, ip <chr>, asnname <chr>, asn <chr>, url1 <chr>,
    ## #   ptr <chr>, id <chr>, result_api_url <chr>
as_tibble(x$results$task) %>% 
  bind_cols(as_tibble(x$results$page)) %>% 
  mutate(
    time = anytime::anytime(time),
    id = x$results$`_id`
  ) %>%
  arrange(desc(time)) %>% 
  select(url, country, server, ip, id) -> xdf

``` r
glimpse(xdf)
```

    ## Observations: 12
    ## Variables: 22
    ## $ visibility        <chr> "public", "public", "public", "public", "public", "public", "public", "public", "public",...
    ## $ method            <chr> "manual", "manual", "manual", "manual", "manual", "manual", "manual", "manual", "manual",...
    ## $ time              <chr> "2017-12-29T17:23:39.785Z", "2017-12-20T15:52:22.902Z", "2017-11-10T13:40:19.991Z", "2017...
    ## $ source            <chr> "web", "web", "web", "web", "web", "web", "web", "web", "web", "web", "hackernews", "hack...
    ## $ url               <chr> "https://cran.r-project.org/web/packages/randomForest/index.html", "https://cran.r-projec...
    ## $ user_agent        <chr> "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) C...
    ## $ uniqIPs           <int> 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1
    ## $ consoleMsgs       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    ## $ dataLength        <int> 12758, 14396, 286138, 0, 0, 5284, 5284, 4297, 14722, 285893, 343270, 345452
    ## $ encodedDataLength <int> 676, 676, 97317, 0, 0, 1813, 1813, 1640, 6288, 97695, 101327, 101840
    ## $ requests          <int> 2, 2, 6, 1, 1, 2, 2, 2, 9, 6, 4, 4
    ## $ country           <chr> "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT"
    ## $ server            <chr> "Apache/2.4.10 (Debian)", "Apache/2.4.10 (Debian)", "Apache/2.4.10 (Debian)", "Apache/2.4...
    ## $ city              <chr> "Vienna", "Vienna", "Vienna", "Vienna", "Vienna", "Vienna", "Vienna", "Vienna", "Vienna",...
    ## $ domain            <chr> "cran.r-project.org", "cran.r-project.org", "www.r-project.org", "cran.r-project.org", "c...
    ## $ ip                <chr> "137.208.57.37", "137.208.57.37", "137.208.57.37", "137.208.57.37", "137.208.57.37", "137...
    ## $ asnname           <chr> "Welthandelsplatz 1, AT", "Welthandelsplatz 1, AT", "Welthandelsplatz 1, AT", "Welthandel...
    ## $ asn               <chr> "AS1776", "AS1776", "AS1776", "AS1776", "AS1776", "AS1776", "AS1776", "AS1776", "AS1776",...
    ## $ url1              <chr> "https://cran.r-project.org/web/packages/randomForest/index.html", "https://cran.r-projec...
    ## $ ptr               <chr> "cran.wu-wien.ac.at", "cran.wu-wien.ac.at", "cran.wu-wien.ac.at", "cran.wu-wien.ac.at", "...
    ## $ id                <chr> "d134c3b7-f306-4c7b-b2cb-c0f900793083", "075778b6-20f6-45a9-bb76-a80ac9bae1d2", "fbacb280...
    ## $ result_api_url    <chr> "https://urlscan.io/api/v1/result/d134c3b7-f306-4c7b-b2cb-c0f900793083", "https://urlscan...

``` r
ures <- urlscan_result(xdf$id[2], TRUE, TRUE)
ures <- urlscan_result(xdf$id[2], include_dom = TRUE, include_shot = TRUE)

str(ures$scan_result, 2)
ures
```

    ## List of 6
    ##  $ data :List of 6
    ##   ..$ requests:'data.frame': 2 obs. of  3 variables:
    ##   ..$ cookies : list()
    ##   ..$ console : list()
    ##   ..$ links   : list()
    ##   ..$ timing  :List of 6
    ##   ..$ globals :'data.frame': 2 obs. of  2 variables:
    ##  $ stats:List of 14
    ##   ..$ resourceStats   :'data.frame': 2 obs. of  9 variables:
    ##   ..$ protocolStats   :'data.frame': 1 obs. of  7 variables:
    ##   ..$ tlsStats        :'data.frame': 1 obs. of  7 variables:
    ##   ..$ serverStats     :'data.frame': 1 obs. of  6 variables:
    ##   ..$ domainStats     :'data.frame': 1 obs. of  9 variables:
    ##   ..$ regDomainStats  :'data.frame': 1 obs. of  9 variables:
    ##   ..$ secureRequests  : int 2
    ##   ..$ securePercentage: int 100
    ##   ..$ IPv6Percentage  : int 0
    ##   ..$ uniqCountries   : int 1
    ##   ..$ totalLinks      : int 0
    ##   ..$ malicious       : int 0
    ##   ..$ adBlocked       : int 0
    ##   ..$ ipStats         :'data.frame': 1 obs. of  14 variables:
    ##  $ meta :List of 1
    ##   ..$ processors:List of 8
    ##  $ task :List of 11
    ##   ..$ uuid         : chr "075778b6-20f6-45a9-bb76-a80ac9bae1d2"
    ##   ..$ time         : chr "2017-12-20T15:52:22.902Z"
    ##   ..$ url          : chr "https://cran.r-project.org/web/packages/e1071/"
    ##   ..$ visibility   : chr "public"
    ##   ..$ options      :List of 1
    ##   ..$ method       : chr "manual"
    ##   ..$ source       : chr "web"
    ##   ..$ userAgent    : chr "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"
    ##   ..$ reportURL    : chr "https://urlscan.io/result/075778b6-20f6-45a9-bb76-a80ac9bae1d2/"
    ##   ..$ screenshotURL: chr "https://urlscan.io/screenshots/075778b6-20f6-45a9-bb76-a80ac9bae1d2.png"
    ##   ..$ domURL       : chr "https://urlscan.io/dom/075778b6-20f6-45a9-bb76-a80ac9bae1d2/"
    ##  $ page :List of 9
    ##   ..$ url    : chr "https://cran.r-project.org/web/packages/e1071/"
    ##   ..$ domain : chr "cran.r-project.org"
    ##   ..$ country: chr "AT"
    ##   ..$ city   : chr "Vienna"
    ##   ..$ server : chr "Apache/2.4.10 (Debian)"
    ##   ..$ ip     : chr "137.208.57.37"
    ##   ..$ ptr    : chr "cran.wu-wien.ac.at"
    ##   ..$ asn    : chr "AS1776"
    ##   ..$ asnname: chr "Welthandelsplatz 1, AT"
    ##  $ lists:List of 9
    ##   ..$ ips         : chr "137.208.57.37"
    ##   ..$ countries   : chr "AT"
    ##   ..$ asns        : chr "1776"
    ##   ..$ domains     : chr "cran.r-project.org"
    ##   ..$ servers     : chr "Apache/2.4.10 (Debian)"
    ##   ..$ urls        : chr [1:2] "https://cran.r-project.org/web/packages/e1071/" "https://cran.r-project.org/web/CRAN_web.css"
    ##   ..$ linkDomains : list()
    ##   ..$ certificates:'data.frame': 1 obs. of  5 variables:
    ##   ..$ hashes      : chr [1:2] "48f7615c35fe15989530b1df31256a02340bed62069275c534a4222791eb23b2" "6a738f3da9f1203b5d765088a4ff4e4ac36c59fad008f450b808354d9625bc51"
    ##  - attr(*, "class")= chr [1:2] "urlscan_result" "list"
    ##             URL: https://cran.r-project.org/
    ##         Scan ID: cdc2b957-548c-447a-a1b2-bebd6a734aec
    ##       Malicious: FALSE
    ##      Ad Blocked: FALSE
    ##     Total Links: 0
    ## Secure Requests: 9
    ##    Secure Req %: 100%

``` r
magick::image_write(ures$screenshot, "img/shot.png")

M img/shot.png => img/shot.png +0 -0

A man/print.urlscan.Rd => man/print.urlscan.Rd +17 -0
@@ 0,0 1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/print.R
\name{print.urlscan}
\alias{print.urlscan}
\title{Print for urlscan objects}
\usage{
\method{print}{urlscan}(x, ...)
}
\arguments{
\item{x}{urlscan object}

\item{...}{ignored}
}
\description{
Print for urlscan objects
}
\keyword{internal}

A man/print.urlscan_result.Rd => man/print.urlscan_result.Rd +17 -0
@@ 0,0 1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/print.R
\name{print.urlscan_result}
\alias{print.urlscan_result}
\title{Print for urlscan_result objects}
\usage{
\method{print}{urlscan_result}(x, ...)
}
\arguments{
\item{x}{urlscan_result object}

\item{...}{ignored}
}
\description{
Print for urlscan_result objects
}
\keyword{internal}

A man/print.urlscan_submit.Rd => man/print.urlscan_submit.Rd +17 -0
@@ 0,0 1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/print.R
\name{print.urlscan_submit}
\alias{print.urlscan_submit}
\title{Print for urlscan_submit objects}
\usage{
\method{print}{urlscan_submit}(x, ...)
}
\arguments{
\item{x}{urlscan_submit object}

\item{...}{ignored}
}
\description{
Print for urlscan_submit objects
}
\keyword{internal}

A man/urlscan_api_key.Rd => man/urlscan_api_key.Rd +23 -0
@@ 0,0 1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/api-key.R
\name{urlscan_api_key}
\alias{urlscan_api_key}
\title{Get or set URLSCAN_API_KEY value}
\usage{
urlscan_api_key(force = FALSE)
}
\arguments{
\item{force}{Force setting a new urlscan API key for the current environment?}
}
\value{
atomic character vector containing the urlscan API key
}
\description{
The API wrapper functions in this package all rely on a urlscan API
key residing in the environment variable \code{URLSCAN_API_KEY}. The
easiest way to accomplish this is to set it in the \code{.Renviron} file in your
home directory.
}
\references{
\url{https://urlscan.io/about-api/}
}

M man/urlscan_search.Rd => man/urlscan_search.Rd +2 -3
@@ 18,8 18,6 @@ urlscan_search(query, size = 100, offset = 0, sort = NULL)
\description{
urlscan.io uses an Elasticsearch back-end and enables querying by a number
of fields, including:
}
\details{
\itemize{
\item \code{domain}: Domain (or a subdomain of it) is contacted in one of the requests
\item \code{page.domain}: Domain (or a subdomain of it) is the first domain to be contacted


@@ 31,7 29,8 @@ of fields, including:
\item \code{server}: The page contact a host running this web server
\item \code{task.method}: one of "\code{manual}" or "\code{api}"; show manual (user) or API submissions
}

}
\details{
The fields \code{ip}, \code{domain}, \code{url}, \code{asn}, \code{asnname}, \code{country} and \code{server} can also be prefixed with \code{page.}
to only match the value for the first request/response (e.g. \code{page.server:nginx AND page.domain:de}).
Furthermore, you can concatenate search-terms with \code{AND}, \code{OR}, etc.

M man/urlscan_submit.Rd => man/urlscan_submit.Rd +4 -1
@@ 4,7 4,8 @@
\alias{urlscan_submit}
\title{Submit a URL for scanning}
\usage{
urlscan_submit(url, public = TRUE, custom_agent = NULL, referer = NULL)
urlscan_submit(url, public = TRUE, custom_agent = NULL,
  referer = NULL, api_key = urlscan_api_key())
}
\arguments{
\item{url}{URL to submit}


@@ 14,6 15,8 @@ urlscan_submit(url, public = TRUE, custom_agent = NULL, referer = NULL)
\item{custom_agent}{(character) Override User-Agent for this scan}

\item{referer}{(character) Override HTTP referer for this scan}

\item{api_key}{your API key. See \code{\link[=urlscan_api_key]{urlscan_api_key()}}}
}
\value{
The response to the API call will give you the ID and API endpoint for t