~hrbrmstr/spiderbar

9ac5e43b50a3f631f1455b60a27b57e1a6ea9875 — boB Rudis 4 years ago d84438c
fixed long example lines NOTE
M R/RcppExports.R => R/RcppExports.R +2 -1
@@ 24,7 24,8 @@ rep_crawl_delays <- function(xp) {
#' @return charcter vector of all sitemaps found in the parsed `robots.txt` file
#' @export
#' @examples
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n")
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt",
#'                package="rep")), collapse="\n")
#' rt <- robxp(imdb)
#' sitemaps(rt)
sitemaps <- function(xp) {

M R/can-fetch.r => R/can-fetch.r +2 -1
@@ 10,7 10,8 @@
#' @param user_agent user agent to test
#' @export
#' @examples
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\n")
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt",
#'              package="spiderbar")), collapse="\n")
#' gh_rt <- robxp(gh)
#'
#' can_fetch(gh_rt, "/humans.txt", "*") # TRUE

M R/crawl-delay.r => R/crawl-delay.r +4 -2
@@ 6,11 6,13 @@
#' @note `-1` will be returned for any listed agent _without_ a crawl delay setting
#' @export
#' @examples
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\n")
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt",
#'              package="spiderbar")), collapse="\n")
#' gh_rt <- robxp(gh)
#' crawl_delays(gh_rt)
#'
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\n")
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt",
#'                package="spiderbar")), collapse="\n")
#' imdb_rt <- robxp(imdb)
#' crawl_delays(imdb_rt)
crawl_delays <- function(obj) {

M R/robxp.r => R/robxp.r +2 -1
@@ 9,7 9,8 @@
#'        will be concatenated into a single string and parsed and the connection will be closed.
#' @export
#' @examples
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\n")
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt",
#'                package="spiderbar")), collapse="\n")
#' rt <- robxp(imdb)
robxp <- function(x) {


M cran-comments.md => cran-comments.md +5 -3
@@ 17,9 17,11 @@ This is a new release, so there are no reverse dependencies.

---

* Code coverage is provided via codecov.io: https://codecov.io/gh/hrbrmstr/rep
* Travis-CI build/test results are at https://travis-ci.org/hrbrmstr/rep
* Appveyor build/test results are at https://ci.appveyor.com/project/hrbrmstr/rep
* Package name, Title & Description were changed/fided 
  as requested by CRAN (Swetlana Herbrandt)
* Code coverage is provided via codecov.io: https://codecov.io/gh/hrbrmstr/spiderbar
* Travis-CI build/test results are at https://travis-ci.org/hrbrmstr/spiderbar
* Appveyor build/test results are at https://ci.appveyor.com/project/hrbrmstr/spiderbar
* No external network calls are made for the robots.txt tests or examples as there
  are four files in the inst/extdata folder which are used instead.
* The README.md generation does exercise the external URL tests.

M man/can_fetch.Rd => man/can_fetch.Rd +2 -1
@@ 19,7 19,8 @@ return a logical vector indicating whether you have permission to fetch the cont
at the respective path.
}
\examples{
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\\n")
gh <- paste0(readLines(system.file("extdata", "github-robots.txt",
             package="spiderbar")), collapse="\\n")
gh_rt <- robxp(gh)

can_fetch(gh_rt, "/humans.txt", "*") # TRUE

M man/crawl_delays.Rd => man/crawl_delays.Rd +4 -2
@@ 19,11 19,13 @@ Retrive all agent crawl delay values in a \code{robxp} \code{robots.txt} object
\code{-1} will be returned for any listed agent \emph{without} a crawl delay setting
}
\examples{
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="spiderbar")), collapse="\\n")
gh <- paste0(readLines(system.file("extdata", "github-robots.txt",
             package="spiderbar")), collapse="\\n")
gh_rt <- robxp(gh)
crawl_delays(gh_rt)

imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\\n")
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt",
               package="spiderbar")), collapse="\\n")
imdb_rt <- robxp(imdb)
crawl_delays(imdb_rt)
}

M man/robxp.Rd => man/robxp.Rd +2 -1
@@ 17,6 17,7 @@ This function takes in a single element character vector and parses it into
a `robxp` object.
}
\examples{
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="spiderbar")), collapse="\\n")
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt",
               package="spiderbar")), collapse="\\n")
rt <- robxp(imdb)
}

M man/sitemaps.Rd => man/sitemaps.Rd +2 -1
@@ 16,7 16,8 @@ charcter vector of all sitemaps found in the parsed \code{robots.txt} file
Retrieve a character vector of sitemaps from a parsed robots.txt object
}
\examples{
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\\n")
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt",
               package="rep")), collapse="\\n")
rt <- robxp(imdb)
sitemaps(rt)
}

M src/repmain.cpp => src/repmain.cpp +2 -1
@@ 47,7 47,8 @@ DataFrame rep_crawl_delays(SEXP xp) {
//' @return charcter vector of all sitemaps found in the parsed `robots.txt` file
//' @export
//' @examples
//' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n")
//' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt",
//'                package="rep")), collapse="\n")
//' rt <- robxp(imdb)
//' sitemaps(rt)
// [[Rcpp::export]]