~hrbrmstr/urlscan

af7b473a760184beb3065541beec2c18408a1101 — boB Rudis 2 years ago eadb13b
added submit function
8 files changed, 94 insertions(+), 6 deletions(-)

M DESCRIPTION
M NAMESPACE
A R/submit.R
M R/urlscan-package.R
M README.Rmd
M README.md
M man/urlscan.Rd
A man/urlscan_submit.Rd
M DESCRIPTION => DESCRIPTION +1 -1
@@ 12,7 12,7 @@ Description: The <urlscan.io> service provides an 'API' enabling analysis of
    websites and the resources they request. Much like the 'Inspector' of your 
    browser, <urlscan.io> will let you take a look at the individual resources 
    that are requested when a site is loaded. Tools are provided to search
    public <urlscans.io> scan submissions.
    public <urlscans.io> scan submissions/results and submit URLs for scanning.
URL: https://github.com/hrbrmstr/urlscan
BugReports: https://github.com/hrbrmstr/urlscan/issues
Encoding: UTF-8

M NAMESPACE => NAMESPACE +2 -0
@@ 2,7 2,9 @@

export(urlscan_result)
export(urlscan_search)
export(urlscan_submit)
importFrom(httr,GET)
importFrom(httr,POST)
importFrom(httr,content)
importFrom(httr,status_code)
importFrom(httr,stop_for_status)

A R/submit.R => R/submit.R +47 -0
@@ 0,0 1,47 @@
#' Submit a URL for scanning
#'
#' urlscan offers API access to submit URLs for researchers and other institutions.
#' Using the API requires an API key. If you'd like to start submitting URLs via the
#' API, contact them at <mailto:info@@urlscan.io> with a short description on how
#' you're planning to use the API. Submissions via the API can be public or private,
#' though they prefer public submissions so other users can benefit from the results
#' of your scans.\cr
#' \cr
#' The API key should be in the environment variable `URLSCAN_API_KEY` and the
#' easiest way to do that is via the `~/.Renviron` file. You can also manually pass
#' it in as a parameter.
#'
#' @md
#' @param url URL to submit
#' @param public Public or private results? `TRUE` (default) = public.
#' @param custom_agent (character) Override User-Agent for this scan
#' @param referer (character) Override HTTP referer for this scan
#' @return The response to the API call will give you the ID and API endpoint for t
#'         he scan, you can use it to retrieve the result after waiting for a short while.
#' @references <https://urlscan.io/about-api/#submission>
#' @export
urlscan_submit <- function(url, public=TRUE, custom_agent=NULL, referer=NULL) {

   httr::POST(
     url = "https://urlscan.io/api/v1/scan/",
     content_type_json(),
     query = list(
       url = url,
       public = if (public[1]) "on" else NULL,
       customagent = custom_agent,
       referer = referer
     ),
     httr::user_agent("urlscan #rstats package : https://github.com/hrbrmstr/urlscan")
   ) -> res

   httr::stop_for_status(res)

   res <- httr::content(res, as="text")

   res <- jsonlite::fromJSON(res)

   class(res) <- c("urlscan_submit", "list")

   res

}
\ No newline at end of file

M R/urlscan-package.R => R/urlscan-package.R +2 -2
@@ 4,13 4,13 @@
#' websites and the resources they request. Much like the 'Inspector' of your
#' browser, <urlscan.io> will let you take a look at the individual resources
#' that are requested when a site is loaded. Tools are provided to search
#' public <urlscans.io> scan submissions.
#' public <urlscans.io> scan submissions/results and submit URLs for scanning.
#'
#' @md
#' @name urlscan
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @importFrom httr GET user_agent content stop_for_status warn_for_status status_code
#' @importFrom httr GET user_agent content stop_for_status warn_for_status status_code POST
#' @importFrom jsonlite fromJSON
#' @importFrom magick image_read
NULL

M README.Rmd => README.Rmd +2 -1
@@ 14,7 14,7 @@ The <urlscan.io> service provides an 'API' enabling analysis of
websites and the resources they request. Much like the 'Inspector' of your 
browser, <urlscan.io> will let you take a look at the individual resources 
that are requested when a site is loaded. Tools are provided to search
public <urlscans.io> scan submissions.
public <urlscans.io> scan submissions/results and submit URLs for scanning.

## What's Inside The Tin



@@ 22,6 22,7 @@ The following functions are implemented:

- `urlscan_search`: Perform a urlscan.io query
- `urlscan_result`:	Retrieve detailed results for a given scan ID
- `urlscan_submit`:	Submit a URL for scanning

## Installation


M README.md => README.md +3 -1
@@ 11,7 11,8 @@ The \<urlscan.io\> service provides an ‘API’ enabling analysis of
websites and the resources they request. Much like the ‘Inspector’ of
your browser, \<urlscan.io\> will let you take a look at the individual
resources that are requested when a site is loaded. Tools are provided
to search public \<urlscans.io\> scan submissions.
to search public \<urlscans.io\> scan submissions/results and submit URLs 
for scanning.

## What’s Inside The Tin



@@ 19,6 20,7 @@ The following functions are implemented:

  - `urlscan_search`: Perform a urlscan.io query
  - `urlscan_result`:	Retrieve detailed results for a given scan ID
  - `urlscan_submit`:	Submit a URL for scanning

## Installation


M man/urlscan.Rd => man/urlscan.Rd +1 -1
@@ 10,7 10,7 @@ The <urlscan.io> service provides an 'API' enabling analysis of
websites and the resources they request. Much like the 'Inspector' of your
browser, <urlscan.io> will let you take a look at the individual resources
that are requested when a site is loaded. Tools are provided to search
public <urlscans.io> scan submissions.
public <urlscans.io> scan submissions/results and submit URLs for scanning.
}
\author{
Bob Rudis (bob@rud.is)

A man/urlscan_submit.Rd => man/urlscan_submit.Rd +36 -0
@@ 0,0 1,36 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/submit.R
\name{urlscan_submit}
\alias{urlscan_submit}
\title{Submit a URL for scanning}
\usage{
urlscan_submit(url, public = TRUE, custom_agent = NULL, referer = NULL)
}
\arguments{
\item{url}{URL to submit}

\item{public}{Public or private results? \code{TRUE} (default) = public.}

\item{custom_agent}{(character) Override User-Agent for this scan}

\item{referer}{(character) Override HTTP referer for this scan}
}
\value{
The response to the API call will give you the ID and API endpoint for t
he scan, you can use it to retrieve the result after waiting for a short while.
}
\description{
urlscan offers API access to submit URLs for researchers and other institutions.
Using the API requires an API key. If you'd like to start submitting URLs via the
API, contact them at \url{mailto:info@urlscan.io} with a short description on how
you're planning to use the API. Submissions via the API can be public or private,
though they prefer public submissions so other users can benefit from the results
of your scans.\cr
\cr
The API key should be in the environment variable \code{URLSCAN_API_KEY} and the
easiest way to do that is via the \code{~/.Renviron} file. You can also manually pass
it in as a parameter.
}
\references{
\url{https://urlscan.io/about-api/#submission}
}