~hrbrmstr/spiderbar

4277d704f7aae7ae01f3f9fd2bd27b1749d48085 — boB Rudis 3 years ago 4848ff5
Getting closer to CRAN (ref #1)
M DESCRIPTION => DESCRIPTION +2 -2
@@ 1,8 1,8 @@
Package: rep
Type: Package
Title: Tools to Parse and Test Robots Exclusion Protocol Files and Rules
Version: 0.1.0
Date: 2017-08-14
Version: 0.2.0
Date: 2017-09-23
Author: Bob Rudis (bob@rud.is) [aut, cre], SEOmoz, Inc [aut]
Maintainer: Bob Rudis <bob@rud.is>
Description: The 'Robots Exclusion Protocol' <http://www.robotstxt.org/orig.html> documents

M NEWS.md => NEWS.md +4 -0
@@ 1,2 1,6 @@
0.2.0
* Added crawl delay extraction
* Made all examples local so CRAN doesn't have to hit actual websites

0.1.0 
* Initial release

A R/can-fetch.r => R/can-fetch.r +22 -0
@@ 0,0 1,22 @@
#' Test URL path against robots.txt
#'
#' @md
#' @param obj `robxp` object
#' @param path path to test
#' @param user_agent user agent to test
#' @export
#' @examples
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\n")
#' gh_rt <- robxp(gh)
#' can_fetch(gh_rt, "/humans.txt", "*") # TRUE
#' can_fetch(gh_rt, "/login", "*") # FALSE
#' can_fetch(gh_rt, "/oembed", "CCBot") # FALSE
can_fetch <- function(obj, path="/", user_agent="*") {

  if (inherits(obj, "robxp")) {
    rep_path_allowed(obj, path, user_agent)
  } else {
    return(NULL)
  }

}
\ No newline at end of file

M R/cd.r => R/cd.r +10 -1
@@ 1,9 1,18 @@
#' Get all crawl_delay
#' Get all agent crawl delay values
#'
#' @md
#' @param obj `robxp` object
#' @return data frame of agents and their crawl delays
#' @note `-1` will be returned for any listed agent without a crawl delay setting
#' @export
#' @examples
#' gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\n")
#' gh_rt <- robxp(gh)
#' crawl_delays(gh_rt)
#'
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n")
#' imdb_rt <- robxp(imdb)
#' crawl_delays(imdb_rt)
crawl_delays <- function(obj) {

  if (inherits(obj, "robxp")) {

M R/rep.r => R/rep.r +2 -23
@@ 3,9 3,8 @@
#' @param x atomic character vector containing a complete robots.txt file
#' @export
#' @examples
#' library(robotstxt)
#' can_fetch(rt, "/asthma/asthma_stats/default.htm", "*") # TRUE
#' can_fetch(rt, "/_borders", "*") # FALSE
#' imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n")
#' rt <- robxp(imdb)
robxp <- function(x) {

  robxp <- rep_parse(x)


@@ 15,26 14,6 @@ robxp <- function(x) {

}

#' Test URL path against robots.txt
#'
#' @md
#' @param obj `robxp` object
#' @param path path to test
#' @param user_agent user agent to test
#' @export
#' @examples
#' library(robotstxt)
#' can_fetch(rt, "/asthma/asthma_stats/default.htm", "*") # TRUE
#' can_fetch(rt, "/_borders", "*") # FALSE
can_fetch <- function(obj, path="/", user_agent="*") {

  if (inherits(obj, "robxp")) {
    rep_path_allowed(obj, path, user_agent)
  } else {
    return(NULL)
  }

}

#' Custom printer for 'robexp' objects
#'

M README.Rmd => README.Rmd +27 -9
@@ 2,23 2,31 @@
output: rmarkdown::github_document
---

[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep) 
[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/rep?branch=master&svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep) 
[![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg)](https://codecov.io/github/hrbrmstr/rep?branch=master)
[Travis-CI Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master) | 
[AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/rep?branch=master&svg=true) | 
[Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg)

`rep` : Tools to Parse and Test Robots Exclusion Protocol Files and Rules
# rep

Tools to Parse and Test Robots Exclusion Protocol Files and Rules

## Description

The 'Robots Exclusion Protocol' <http://www.robotstxt.org/orig.html> documents a set of standards for allowing or excluding robot/spider crawling of different areas of site content. Tools are provided which wrap The 'rep-cpp` <https://github.com/seomoz/rep-cpp> C++ library for processing these 'robots.txt' files.

- [`rep-cpp`](https://github.com/seomoz/rep-cpp)
- [`url-cpp`](https://github.com/seomoz/url-cpp)

## Tools

The following functions are implemented:

- `robxp`:	Create a robots.txt object
- `can_fetch`:	Test URL path against robots.txt
- `crawl_delays`:	Get all agent crawl delay values
- `print.robxp`:	Custom printer for 'robexp' objects
- `robxp`:	Create a robots.txt object

### Installation
## Installation

```{r eval=FALSE}
devtools::install_github("hrbrmstr/rep")


@@ 28,7 36,7 @@ devtools::install_github("hrbrmstr/rep")
options(width=120)
```

### Usage
## Usage

```{r message=FALSE, warning=FALSE, error=FALSE}
library(rep)


@@ 44,9 52,19 @@ print(rt)
can_fetch(rt, "/asthma/asthma_stats/default.htm", "*")

can_fetch(rt, "/_borders", "*")

gh_rt <- robxp(robotstxt::get_robotstxt("github.com"))
can_fetch(gh_rt, "/humans.txt", "*") # TRUE
can_fetch(gh_rt, "/login", "*") # FALSE
can_fetch(gh_rt, "/oembed", "CCBot") # FALSE

crawl_delays(gh_rt)

imdb_rt <- robxp(robotstxt::get_robotstxt("imdb.com"))
crawl_delays(imdb_rt)
```

### Test Results
## Test Results

```{r message=FALSE, warning=FALSE, error=FALSE}
library(rep)


@@ 57,6 75,6 @@ date()
test_dir("tests/")
```

### Code of Conduct
## Code of Conduct

Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.
\ No newline at end of file

M README.md => README.md +85 -10
@@ 1,25 1,38 @@

[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master)](https://travis-ci.org/hrbrmstr/rep) [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/rep?branch=master&svg=true)](https://ci.appveyor.com/project/hrbrmstr/rep) [![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg)](https://codecov.io/github/hrbrmstr/rep?branch=master)
[Travis-CI Build Status](https://travis-ci.org/hrbrmstr/rep.svg?branch=master) | [AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/rep?branch=master&svg=true) | [Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/rep/master.svg)

`rep` : Tools to Parse and Test Robots Exclusion Protocol Files and Rules
rep
===

Tools to Parse and Test Robots Exclusion Protocol Files and Rules

Description
-----------

The 'Robots Exclusion Protocol' <http://www.robotstxt.org/orig.html> documents a set of standards for allowing or excluding robot/spider crawling of different areas of site content. Tools are provided which wrap The 'rep-cpp\` <https://github.com/seomoz/rep-cpp> C++ library for processing these 'robots.txt' files.

-   [`rep-cpp`](https://github.com/seomoz/rep-cpp)
-   [`url-cpp`](https://github.com/seomoz/url-cpp)

Tools
-----

The following functions are implemented:

-   `robxp`: Create a robots.txt object
-   `can_fetch`: Test URL path against robots.txt
-   `crawl_delays`: Get all agent crawl delay values
-   `print.robxp`: Custom printer for 'robexp' objects
-   `robxp`: Create a robots.txt object

### Installation
Installation
------------

``` r
devtools::install_github("hrbrmstr/rep")
```

### Usage
Usage
-----

``` r
library(rep)


@@ 29,7 42,7 @@ library(robotstxt)
packageVersion("rep")
```

    ## [1] '0.1.0'
    ## [1] '0.2.0'

``` r
rt <- robxp(get_robotstxt("https://cdc.gov"))


@@ 51,7 64,68 @@ can_fetch(rt, "/_borders", "*")

    ## [1] FALSE

### Test Results
``` r
gh_rt <- robxp(robotstxt::get_robotstxt("github.com"))
can_fetch(gh_rt, "/humans.txt", "*") # TRUE
```

    ## [1] TRUE

``` r
can_fetch(gh_rt, "/login", "*") # FALSE
```

    ## [1] FALSE

``` r
can_fetch(gh_rt, "/oembed", "CCBot") # FALSE
```

    ## [1] FALSE

``` r
crawl_delays(gh_rt)
```

    ##                agent crawl_delay
    ## 1             yandex          -1
    ## 2         twitterbot          -1
    ## 3              ccbot          -1
    ## 4        mail.ru_bot          -1
    ## 5         telefonica          -1
    ## 6              slurp          -1
    ## 7          seznambot          -1
    ## 8         sanddollar          -1
    ## 9             coccoc          -1
    ## 10       ia_archiver          -1
    ## 11          swiftbot          -1
    ## 12 red-app-gsa-p-one          -1
    ## 13          naverbot          -1
    ## 14            msnbot          -1
    ## 15             teoma          -1
    ## 16                 *          -1
    ## 17  intuitgsacrawler          -1
    ## 18           bingbot          -1
    ## 19            daumoa          -1
    ## 20         googlebot          -1
    ## 21           httrack          -1
    ## 22       duckduckbot          -1
    ## 23        etaospider          -1
    ## 24          rogerbot          -1
    ## 25            dotbot          -1

``` r
imdb_rt <- robxp(robotstxt::get_robotstxt("imdb.com"))
crawl_delays(imdb_rt)
```

    ##      agent crawl_delay
    ## 1    slurp         0.1
    ## 2 scoutjet         3.0
    ## 3        *        -1.0

Test Results
------------

``` r
library(rep)


@@ 60,17 134,18 @@ library(testthat)
date()
```

    ## [1] "Mon Aug 14 16:35:08 2017"
    ## [1] "Sat Sep 23 09:14:02 2017"

``` r
test_dir("tests/")
```

    ## testthat results ========================================================================================================
    ## OK: 3 SKIPPED: 0 FAILED: 0
    ## OK: 5 SKIPPED: 0 FAILED: 0
    ## 
    ## DONE ===================================================================================================================

### Code of Conduct
Code of Conduct
---------------

Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.

A inst/extdata/cdc-robots.txt => inst/extdata/cdc-robots.txt +46 -0
@@ 0,0 1,46 @@
# Ignore FrontPage files
User-agent: *
Disallow: /_borders
Disallow: /_derived
Disallow: /_fpclass
Disallow: /_overlay
Disallow: /_private
Disallow: /_themes
Disallow: /_vti_bin
Disallow: /_vti_cnf
Disallow: /_vti_log
Disallow: /_vti_map
Disallow: /_vti_pvt
Disallow: /_vti_txt

# Do not index the following URLs
Disallow: /travel/
Disallow: /flu/espanol/
Disallow: /migration/
Disallow: /Features/SpinaBifidaProgram/
Disallow: /concussion/HeadsUp/training/

# Don't spider search pages
Disallow: /search.do

# Don't spider email-this-page pages
Disallow: /email.do
 
# Don't spider printer-friendly versions of pages
Disallow: /print.do

# Rover is a bad dog
User-agent: Roverbot
Disallow: /

# EmailSiphon is a hunter/gatherer which extracts email addresses for spam-mailers to use
User-agent: EmailSiphon
Disallow: /

# Exclude MindSpider since it appears to be ill-behaved
User-agent: MindSpider
Disallow: /

# Sitemap link per CR14586
Sitemap: http://www.cdc.gov/niosh/sitemaps/sitemapsNIOSH.xml


A inst/extdata/github-robots.txt => inst/extdata/github-robots.txt +1375 -0
@@ 0,0 1,1375 @@
# If you would like to crawl GitHub contact us at support@github.com.
# We also provide an extensive API: https://developer.github.com/

User-agent: CCBot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: coccoc
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: Daumoa
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: dotbot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: duckduckbot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: EtaoSpider
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: Googlebot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: HTTrack
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: ia_archiver
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: IntuitGSACrawler
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: Mail.RU_Bot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: msnbot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: Bingbot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: naverbot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: red-app-gsa-p-one
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: rogerbot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: SandDollar
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: seznambot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: Slurp
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: Swiftbot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: Telefonica
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: teoma
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: Twitterbot
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login

User-agent: Yandex
Allow: /*/*/tree/master
Allow: /*/*/blob/master
Disallow: /ekansa/Open-Context-Data
Disallow: /ekansa/opencontext-*
Disallow: /*/*/pulse
Disallow: /*/*/tree/*
Disallow: /*/*/blob/*
Disallow: /*/*/wiki/*/*
Disallow: /gist/*/*/*
Disallow: /oembed
Disallow: /*/forks
Disallow: /*/stars
Disallow: /*/download
Disallow: /*/revisions
Disallow: /*/*/issues/new
Disallow: /*/*/issues/search
Disallow: /*/*/commits/*/*
Disallow: /*/*/commits/*?author
Disallow: /*/*/commits/*?path
Disallow: /*/*/branches
Disallow: /*/*/tags
Disallow: /*/*/contributors
Disallow: /*/*/comments
Disallow: /*/*/stargazers
Disallow: /*/*/search
Disallow: /*/tarball/
Disallow: /*/zipball/
Disallow: /*/*/archive/
Disallow: /raw/*
Disallow: /*/followers
Disallow: /*/following
Disallow: /stars/*
Disallow: /*/blame/
Disallow: /*/watchers
Disallow: /*/network
Disallow: /*/graphs
Disallow: /*/raw/
Disallow: /*/compare/
Disallow: /*/cache/
Disallow: /*/*/blame/
Disallow: /*/*/watchers
Disallow: /*/*/network
Disallow: /*/*/graphs
Disallow: /*/*/raw/
Disallow: /*/*/compare/
Disallow: /*/*/cache/
Disallow: /.git/
Disallow: /*/.git/
Disallow: /*.git$
Disallow: /*/sitemap.xml
Disallow: /search/advanced
Disallow: /search
Disallow: /*q=
Disallow: /*.atom
Disallow: /login


User-agent: *
Allow: /humans.txt
Disallow: /

A inst/extdata/imdb-robots.txt => inst/extdata/imdb-robots.txt +263 -0
@@ 0,0 1,263 @@
# robots.txt for IMDb properties
#
#
# Limit ScoutJet's crawl rate
#
User-agent: ScoutJet
Crawl-delay: 3
#
#
# Yahoo!
User-agent: Slurp
Crawl-delay: .1
Disallow: /tvschedule
Disallow: /ActorSearch
Disallow: /ActressSearch
Disallow: /AddRecommendation
Disallow: /ads/
Disallow: /AlternateVersions
Disallow: /AName
Disallow: /Awards
Disallow: /BAgent
Disallow: /Ballot/
Disallow: /BornInYear
Disallow: /BornWhere
Disallow: /BPublicity
Disallow: /BQuotes
Disallow: /BTrivia
Disallow: /BusinessThisDay
Disallow: /BWorks
Disallow: /careers
Disallow: /help/show_leaf?careeratimdb
Disallow: /CommentsAuthor
Disallow: /CommentsEnter
Disallow: /CommentsIndex
Disallow: /Companies
Disallow: /CrazyCredits
Disallow: /Credits
Disallow: /DiedInYear
Disallow: /DiedWhere
Disallow: /DVD
Disallow: /ExciteTitle
Disallow: /Find
Disallow: /FName
Disallow: /GName
Disallow: /Guests
Disallow: /harvest_me
Disallow: /HelpPage
Disallow: /Icons/
Disallow: /JointVentures
Disallow: /Laserdisc
Disallow: /List
Disallow: /Literature
Disallow: /Locations
Disallow: /LocationTree
Disallow: /Lookup
Disallow: /M/
Disallow: /Maltin
Disallow: /MarriedInYear
Disallow: /MetaSearch
Disallow: /Mlinks
Disallow: /More
Disallow: /Movies
Disallow: /Movies/
Disallow: /MyMovies
Disallow: /mymovies/
Disallow: /name_pick_n_mix
Disallow: /Nsearch
Disallow: /NUrls
Disallow: /OnThisDay
Disallow: /Ontv
Disallow: /OnTV
Disallow: /Overlap
Disallow: /Pawards
Disallow: /pick_n_mix
Disallow: /PName
Disallow: /Posters
Disallow: /prepare_data
Disallow: /Psales
Disallow: /Quiz
Disallow: /r/
Disallow: /ra/
Disallow: /Ratings
Disallow: /rd/
Disallow: /Recommendations
Disallow: /register
Disallow: /ReleaseDates
Disallow: /ReleasedInYear
Disallow: /Reviews
Disallow: /rg/
Disallow: /ri/
Disallow: /RName
Disallow: /Sales
Disallow: /SearchAwards
Disallow: /SearchBios
Disallow: /SearchBusiness
Disallow: /SearchCrazy
Disallow: /SearchDVD
Disallow: /SearchGoofs
Disallow: /SearchLaserdisc
Disallow: /SearchLiterature
Disallow: /SearchPlots
Disallow: /SearchPlotWriters
Disallow: /SearchQuotes
Disallow: /SearchRatios
Disallow: /SearchSongs
Disallow: /SearchTaglines
Disallow: /SearchTechnical
Disallow: /SearchTrivia
Disallow: /SearchVersions
Disallow: /ShowAll
Disallow: /Showing
Disallow: /SName
Disallow: /Soundtracks
Disallow: /Taglines
Disallow: /Tawards
Disallow: /Technical
Disallow: /tiger_redirect
Disallow: /Title/ASIN
Disallow: /TitleBrowse
Disallow: /Trailers
Disallow: /Tsearch
Disallow: /TUrls
Disallow: /VName
Disallow: /Vote
Disallow: /WorkedWith
Disallow: /updates
Disallow: /board
Disallow: /boards
Disallow: /name/*/board
Disallow: /title/*/board
Disallow: /character/*/select-*
Disallow: /character/*/update
#
# Everyone else
#
User-agent: *
Disallow: /tvschedule
Disallow: /ActorSearch
Disallow: /ActressSearch
Disallow: /AddRecommendation
Disallow: /ads/
Disallow: /AlternateVersions
Disallow: /AName
Disallow: /Awards
Disallow: /BAgent
Disallow: /Ballot/
Disallow: /BornInYear
Disallow: /BornWhere
Disallow: /BPublicity
Disallow: /BQuotes
Disallow: /BTrivia
Disallow: /BusinessThisDay
Disallow: /BWorks
Disallow: /careers
Disallow: /help/show_leaf?careeratimdb
Disallow: /CommentsAuthor
Disallow: /CommentsEnter
Disallow: /CommentsIndex
Disallow: /Companies
Disallow: /CrazyCredits
Disallow: /Credits
Disallow: /DiedInYear
Disallow: /DiedWhere
Disallow: /DVD
Disallow: /ExciteTitle
Disallow: /Find
Disallow: /FName
Disallow: /GName
Disallow: /Guests
Disallow: /harvest_me
Disallow: /HelpPage
Disallow: /Icons/
Disallow: /JointVentures
Disallow: /Laserdisc
Disallow: /List
Disallow: /Literature
Disallow: /Locations
Disallow: /LocationTree
Disallow: /Lookup
Disallow: /M/
Disallow: /Maltin
Disallow: /MarriedInYear
Disallow: /MetaSearch
Disallow: /Mlinks
Disallow: /More
Disallow: /Movies
Disallow: /Movies/
Disallow: /MyMovies
Disallow: /mymovies/
Disallow: /name_pick_n_mix
Disallow: /Nsearch
Disallow: /NUrls
Disallow: /OnThisDay
Disallow: /Ontv
Disallow: /OnTV
Disallow: /Overlap
Disallow: /Pawards
Disallow: /pick_n_mix
Disallow: /PName
Disallow: /Posters
Disallow: /prepare_data
Disallow: /Psales
Disallow: /Quiz
Disallow: /r/
Disallow: /ra/
Disallow: /Ratings
Disallow: /rd/
Disallow: /Recommendations
Disallow: /register
Disallow: /ReleaseDates
Disallow: /ReleasedInYear
Disallow: /Reviews
Disallow: /rg/
Disallow: /ri/
Disallow: /RName
Disallow: /Sales
Disallow: /SearchAwards
Disallow: /SearchBios
Disallow: /SearchBusiness
Disallow: /SearchCrazy
Disallow: /SearchDVD
Disallow: /SearchGoofs
Disallow: /SearchLaserdisc
Disallow: /SearchLiterature
Disallow: /SearchPlots
Disallow: /SearchPlotWriters
Disallow: /SearchQuotes
Disallow: /SearchRatios
Disallow: /SearchSongs
Disallow: /SearchTaglines
Disallow: /SearchTechnical
Disallow: /SearchTrivia
Disallow: /SearchVersions
Disallow: /ShowAll
Disallow: /Showing
Disallow: /SName
Disallow: /Soundtracks
Disallow: /Taglines
Disallow: /Tawards
Disallow: /Technical
Disallow: /tiger_redirect
Disallow: /Title/ASIN
Disallow: /TitleBrowse
Disallow: /Trailers
Disallow: /Tsearch
Disallow: /TUrls
Disallow: /VName
Disallow: /Vote
Disallow: /WorkedWith
Disallow: /updates
Disallow: /board
Disallow: /boards
Disallow: /name/*/board
Disallow: /title/*/board
Disallow: /user/*/boards
Disallow: /user/*/boards/
Disallow: /lists/tt*
Disallow: /lists/nm*
Disallow: /character/*/select-*
Disallow: /character/*/update
#
#
Sitemap: http://www.imdb.com/sitemap_US_index.xml.gz

A inst/extdata/wikipedia-robots.txt => inst/extdata/wikipedia-robots.txt +703 -0
@@ 0,0 1,703 @@
# robots.txt for http://www.wikipedia.org/ and friends
#
# Please note: There are a lot of pages on this site, and there are
# some misbehaved spiders out there that go _way_ too fast. If you're
# irresponsible, your access to the site may be blocked.
#

# advertising-related bots:
User-agent: Mediapartners-Google*
Disallow: /

# Wikipedia work bots:
User-agent: IsraBot
Disallow:

User-agent: Orthogaffe
Disallow:

# Crawlers that are kind enough to obey, but which we'd rather not have
# unless they're feeding search engines.
User-agent: UbiCrawler
Disallow: /

User-agent: DOC
Disallow: /

User-agent: Zao
Disallow: /

# Some bots are known to be trouble, particularly those designed to copy
# entire sites. Please obey robots.txt.
User-agent: sitecheck.internetseer.com
Disallow: /

User-agent: Zealbot
Disallow: /

User-agent: MSIECrawler
Disallow: /

User-agent: SiteSnagger
Disallow: /

User-agent: WebStripper
Disallow: /

User-agent: WebCopier
Disallow: /

User-agent: Fetch
Disallow: /

User-agent: Offline Explorer
Disallow: /

User-agent: Teleport
Disallow: /

User-agent: TeleportPro
Disallow: /

User-agent: WebZIP
Disallow: /

User-agent: linko
Disallow: /

User-agent: HTTrack
Disallow: /

User-agent: Microsoft.URL.Control
Disallow: /

User-agent: Xenu
Disallow: /

User-agent: larbin
Disallow: /

User-agent: libwww
Disallow: /

User-agent: ZyBORG
Disallow: /

User-agent: Download Ninja
Disallow: /

# Misbehaving: requests much too fast:
User-agent: fast
Disallow: /

#
# Sorry, wget in its recursive mode is a frequent problem.
# Please read the man page and use it properly; there is a
# --wait option you can use to set the delay between hits,
# for instance.
#
User-agent: wget
Disallow: /

#
# The 'grub' distributed client has been *very* poorly behaved.
#
User-agent: grub-client
Disallow: /

#
# Doesn't follow robots.txt anyway, but...
#
User-agent: k2spider
Disallow: /

#
# Hits many times per second, not acceptable
# http://www.nameprotect.com/botinfo.html
User-agent: NPBot
Disallow: /

# A capture bot, downloads gazillions of pages with no public benefit
# http://www.webreaper.net/
User-agent: WebReaper
Disallow: /

# Wayback Machine: defaults and whether to index user-pages
# FIXME: Complete the removal of this block, per T7582.
# User-agent: archive.org_bot
# Allow: /


#
# Friendly, low-speed bots are welcome viewing article pages, but not
# dynamically-generated pages please.
#
# Inktomi's "Slurp" can read a minimum delay between hits; if your
# bot supports such a thing using the 'Crawl-delay' or another
# instruction, please let us know.
#
# There is a special exception for API mobileview to allow dynamic
# mobile web & app views to load section content.
# These views aren't HTTP-cached but use parser cache aggressively
# and don't expose special: pages etc.
#
# Another exception is for REST API documentation, located at
# /api/rest_v1/?doc.
#
User-agent: *
Allow: /w/api.php?action=mobileview&
Allow: /w/load.php?
Allow: /api/rest_v1/?doc
Disallow: /w/
Disallow: /api/
Disallow: /trap/
#
# ar:
Disallow: /wiki/%D8%AE%D8%A7%D8%B5:Search
Disallow: /wiki/%D8%AE%D8%A7%D8%B5%3ASearch
#
# dewiki:
# T6937
# sensible deletion and meta user discussion pages:
Disallow: /wiki/Wikipedia:L%C3%B6schkandidaten/
Disallow: /wiki/Wikipedia:Löschkandidaten/
Disallow: /wiki/Wikipedia:Vandalensperrung/
Disallow: /wiki/Wikipedia:Benutzersperrung/
Disallow: /wiki/Wikipedia:Vermittlungsausschuss/
Disallow: /wiki/Wikipedia:Administratoren/Probleme/
Disallow: /wiki/Wikipedia:Adminkandidaturen/
Disallow: /wiki/Wikipedia:Qualitätssicherung/
Disallow: /wiki/Wikipedia:Qualit%C3%A4tssicherung/
# Search- and random-page
Disallow: /wiki/Spezial:Suche
Disallow: /wiki/Special:Suche
Disallow: /wiki/Spezial:Zufällige_Seite
Disallow: /wiki/Spezial:Zuf%C3%A4llige_Seite
Disallow: /wiki/Special:Zufällige_Seite
Disallow: /wiki/Special:Zuf%C3%A4llige_Seite
# 4937#5
Disallow: /wiki/Wikipedia:Vandalismusmeldung/
Disallow: /wiki/Wikipedia:Gesperrte_Lemmata/
Disallow: /wiki/Wikipedia:Löschprüfung/
Disallow: /wiki/Wikipedia:L%C3%B6schprüfung/
Disallow: /wiki/Wikipedia:Administratoren/Notizen/
Disallow: /wiki/Wikipedia:Schiedsgericht/Anfragen/
Disallow: /wiki/Wikipedia:L%C3%B6schpr%C3%BCfung/
# T14111
Disallow: /wiki/Wikipedia:Checkuser/
Disallow: /wiki/Wikipedia_Diskussion:Checkuser/
Disallow: /wiki/Wikipedia_Diskussion:Adminkandidaturen/
# T15961
Disallow: /wiki/Wikipedia:Spam-Blacklist-Log
Disallow: /wiki/Wikipedia%3ASpam-Blacklist-Log
Disallow: /wiki/Wikipedia_Diskussion:Spam-Blacklist-Log
Disallow: /wiki/Wikipedia_Diskussion%3ASpam-Blacklist-Log
#
# enwiki:
# Folks get annoyed when VfD discussions end up the number 1 google hit for
# their name. See T6776
Disallow: /wiki/Wikipedia:Articles_for_deletion/
Disallow: /wiki/Wikipedia%3AArticles_for_deletion/
Disallow: /wiki/Wikipedia:Votes_for_deletion/
Disallow: /wiki/Wikipedia%3AVotes_for_deletion/
Disallow: /wiki/Wikipedia:Pages_for_deletion/
Disallow: /wiki/Wikipedia%3APages_for_deletion/
Disallow: /wiki/Wikipedia:Miscellany_for_deletion/
Disallow: /wiki/Wikipedia%3AMiscellany_for_deletion/
Disallow: /wiki/Wikipedia:Miscellaneous_deletion/
Disallow: /wiki/Wikipedia%3AMiscellaneous_deletion/
Disallow: /wiki/Wikipedia:Copyright_problems
Disallow: /wiki/Wikipedia%3ACopyright_problems
Disallow: /wiki/Wikipedia:Protected_titles/
Disallow: /wiki/Wikipedia%3AProtected_titles/
# T15398
Disallow: /wiki/Wikipedia:WikiProject_Spam/
Disallow: /wiki/Wikipedia%3AWikiProject_Spam/
# T16075
Disallow: /wiki/MediaWiki:Spam-blacklist
Disallow: /wiki/MediaWiki%3ASpam-blacklist
Disallow: /wiki/MediaWiki_talk:Spam-blacklist
Disallow: /wiki/MediaWiki_talk%3ASpam-blacklist
# T13261
Disallow: /wiki/Wikipedia:Requests_for_arbitration/
Disallow: /wiki/Wikipedia%3ARequests_for_arbitration/
Disallow: /wiki/Wikipedia:Requests_for_comment/
Disallow: /wiki/Wikipedia%3ARequests_for_comment/
Disallow: /wiki/Wikipedia:Requests_for_adminship/
Disallow: /wiki/Wikipedia%3ARequests_for_adminship/
# T12288
Disallow: /wiki/Wikipedia_talk:Articles_for_deletion/
Disallow: /wiki/Wikipedia_talk%3AArticles_for_deletion/
Disallow: /wiki/Wikipedia_talk:Votes_for_deletion/
Disallow: /wiki/Wikipedia_talk%3AVotes_for_deletion/
Disallow: /wiki/Wikipedia_talk:Pages_for_deletion/
Disallow: /wiki/Wikipedia_talk%3APages_for_deletion/
Disallow: /wiki/Wikipedia_talk:Miscellany_for_deletion/
Disallow: /wiki/Wikipedia_talk%3AMiscellany_for_deletion/
Disallow: /wiki/Wikipedia_talk:Miscellaneous_deletion/
Disallow: /wiki/Wikipedia_talk%3AMiscellaneous_deletion/
# T16793
Disallow: /wiki/Wikipedia:Changing_username
Disallow: /wiki/Wikipedia%3AChanging_username
Disallow: /wiki/Wikipedia:Changing_username/
Disallow: /wiki/Wikipedia%3AChanging_username/
Disallow: /wiki/Wikipedia_talk:Changing_username
Disallow: /wiki/Wikipedia_talk%3AChanging_username
Disallow: /wiki/Wikipedia_talk:Changing_username/
Disallow: /wiki/Wikipedia_talk%3AChanging_username/
#
# eswiki:
# T8746
Disallow: /wiki/Wikipedia:Consultas_de_borrado/
Disallow: /wiki/Wikipedia%3AConsultas_de_borrado/
#
# fiwiki:
# T10695
Disallow: /wiki/Wikipedia:Poistettavat_sivut
Disallow: /wiki/K%C3%A4ytt%C3%A4j%C3%A4:
Disallow: /wiki/Käyttäjä:
Disallow: /wiki/Keskustelu_k%C3%A4ytt%C3%A4j%C3%A4st%C3%A4:
Disallow: /wiki/Keskustelu_käyttäjästä:
Disallow: /wiki/Wikipedia:Yll%C3%A4pit%C3%A4j%C3%A4t/
Disallow: /wiki/Wikipedia:Ylläpitäjät/
#
# frwiki:
Disallow: /wiki/Wikip%C3%A9dia:Pages_%C3%A0_supprimer/
Disallow: /wiki/Wikip%C3%A9dia:Pages_soup%C3%A7onn%C3%A9es_de_violation_de_copyright/
#
# hewiki:
Disallow: /wiki/%D7%9E%D7%99%D7%95%D7%97%D7%93:Search
Disallow: /wiki/%D7%9E%D7%99%D7%95%D7%97%D7%93%3ASearch
#T11517
Disallow: /wiki/ויקיפדיה:רשימת_מועמדים_למחיקה/
Disallow: /wiki/ויקיפדיה%3Aרשימת_מועמדים_למחיקה/
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94:%D7%A8%D7%A9%D7%99%D7%9E%D7%AA_%D7%9E%D7%95%D7%A2%D7%9E%D7%93%D7%99%D7%9D_%D7%9C%D7%9E%D7%97%D7%99%D7%A7%D7%94/
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94%3A%D7%A8%D7%A9%D7%99%D7%9E%D7%AA_%D7%9E%D7%95%D7%A2%D7%9E%D7%93%D7%99%D7%9D_%D7%9C%D7%9E%D7%97%D7%99%D7%A7%D7%94/
Disallow: /wiki/ויקיפדיה:ערכים_לא_קיימים_ומוגנים
Disallow: /wiki/ויקיפדיה%3Aערכים_לא_קיימים_ומוגנים
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94:%D7%A2%D7%A8%D7%9B%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94%3A%D7%A2%D7%A8%D7%9B%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D
Disallow: /wiki/ויקיפדיה:דפים_לא_קיימים_ומוגנים
Disallow: /wiki/ויקיפדיה%3Aדפים_לא_קיימים_ומוגנים
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94:%D7%93%D7%A4%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D
Disallow: /wiki/%D7%95%D7%99%D7%A7%D7%99%D7%A4%D7%93%D7%99%D7%94%3A%D7%93%D7%A4%D7%99%D7%9D_%D7%9C%D7%90_%D7%A7%D7%99%D7%99%D7%9E%D7%99%D7%9D_%D7%95%D7%9E%D7%95%D7%92%D7%A0%D7%99%D7%9D
#
# huwiki:
Disallow: /wiki/Speci%C3%A1lis:Search
Disallow: /wiki/Speci%C3%A1lis%3ASearch
#
# itwiki:
# T7545
Disallow: /wiki/Wikipedia:Pagine_da_cancellare
Disallow: /wiki/Wikipedia%3APagine_da_cancellare
Disallow: /wiki/Wikipedia:Utenti_problematici
Disallow: /wiki/Wikipedia%3AUtenti_problematici
Disallow: /wiki/Wikipedia:Vandalismi_in_corso
Disallow: /wiki/Wikipedia%3AVandalismi_in_corso
Disallow: /wiki/Wikipedia:Amministratori
Disallow: /wiki/Wikipedia%3AAmministratori
Disallow: /wiki/Wikipedia:Proposte_di_cancellazione_semplificata
Disallow: /wiki/Wikipedia%3AProposte_di_cancellazione_semplificata
Disallow: /wiki/Categoria:Da_cancellare_subito
Disallow: /wiki/Categoria%3ADa_cancellare_subito
Disallow: /wiki/Wikipedia:Sospette_violazioni_di_copyright
Disallow: /wiki/Wikipedia%3ASospette_violazioni_di_copyright
Disallow: /wiki/Categoria:Da_controllare_per_copyright
Disallow: /wiki/Categoria%3ADa_controllare_per_copyright
Disallow: /wiki/Progetto:Rimozione_contributi_sospetti
Disallow: /wiki/Progetto%3ARimozione_contributi_sospetti
Disallow: /wiki/Categoria:Da_cancellare_subito_per_violazione_integrale_copyright
Disallow: /wiki/Categoria%3ADa_cancellare_subito_per_violazione_integrale_copyright
Disallow: /wiki/Progetto:Cococo
Disallow: /wiki/Progetto%3ACococo
Disallow: /wiki/Discussioni_progetto:Cococo
Disallow: /wiki/Discussioni_progetto%3ACococo
#
# jawiki
Disallow: /wiki/%E7%89%B9%E5%88%A5:Search
Disallow: /wiki/%E7%89%B9%E5%88%A5%3ASearch
# T7239
Disallow: /wiki/Wikipedia:%E5%89%8A%E9%99%A4%E4%BE%9D%E9%A0%BC/
Disallow: /wiki/Wikipedia%3A%E5%89%8A%E9%99%A4%E4%BE%9D%E9%A0%BC/
Disallow: /wiki/Wikipedia:%E5%88%A9%E7%94%A8%E8%80%85%E3%83%9A%E3%83%BC%E3%82%B8%E3%81%AE%E5%89%8A%E9%99%A4%E4%BE%9D%E9%A0%BC
Disallow: /wiki/Wikipedia%3A%E5%88%A9%E7%94%A8%E8%80%85%E3%83%9A%E3%83%BC%E3%82%B8%E3%81%AE%E5%89%8A%E9%99%A4%E4%BE%9D%E9%A0%BC
# nowiki
# T13432
Disallow: /wiki/Bruker:
Disallow: /wiki/Bruker%3A
Disallow: /wiki/Brukerdiskusjon
Disallow: /wiki/Wikipedia:Administratorer
Disallow: /wiki/Wikipedia%3AAdministratorer
Disallow: /wiki/Wikipedia-diskusjon:Administratorer
Disallow: /wiki/Wikipedia-diskusjon%3AAdministratorer
Disallow: /wiki/Wikipedia:Sletting
Disallow: /wiki/Wikipedia%3ASletting
Disallow: /wiki/Wikipedia-diskusjon:Sletting
Disallow: /wiki/Wikipedia-diskusjon%3ASletting
Disallow: /wiki/Spesial:
Disallow: /wiki/Spesial%3A
#
# plwiki
# T10067
Disallow: /wiki/Wikipedia:Strony_do_usuni%C4%99cia
Disallow: /wiki/Wikipedia%3AStrony_do_usuni%C4%99cia
Disallow: /wiki/Wikipedia:Do_usuni%C4%99cia
Disallow: /wiki/Wikipedia%3ADo_usuni%C4%99cia
Disallow: /wiki/Wikipedia:SDU/
Disallow: /wiki/Wikipedia%3ASDU/
Disallow: /wiki/Wikipedia:Strony_podejrzane_o_naruszenie_praw_autorskich
Disallow: /wiki/Wikipedia%3AStrony_podejrzane_o_naruszenie_praw_autorskich
#
# ptwiki:
# T7394
Disallow: /wiki/Wikipedia:Páginas_para_eliminar/
Disallow: /wiki/Wikipedia:P%C3%A1ginas_para_eliminar/
Disallow: /wiki/Wikipedia%3AP%C3%A1ginas_para_eliminar/
Disallow: /wiki/Wikipedia_Discussão:Páginas_para_eliminar/
Disallow: /wiki/Wikipedia_Discuss%C3%A3o:P%C3%A1ginas_para_eliminar/
Disallow: /wiki/Wikipedia_Discuss%C3%A3o%3AP%C3%A1ginas_para_eliminar/
#
# rowiki:
# T14546
Disallow: /wiki/Wikipedia:Pagini_de_%C5%9Fters
Disallow: /wiki/Wikipedia%3APagini_de_%C5%9Fters
Disallow: /wiki/Discu%C5%A3ie_Wikipedia:Pagini_de_%C5%9Fters
Disallow: /wiki/Discu%C5%A3ie_Wikipedia%3APagini_de_%C5%9Fters
#
# ruwiki:
Disallow: /wiki/%D0%A1%D0%BF%D0%B5%D1%86%D0%B8%D0%B0%D0%BB%D1%8C%D0%BD%D1%8B%D0%B5:Search
Disallow: /wiki/%D0%A1%D0%BF%D0%B5%D1%86%D0%B8%D0%B0%D0%BB%D1%8C%D0%BD%D1%8B%D0%B5%3ASearch
#
# svwiki:
# T12229
Disallow: /wiki/Wikipedia%3ASidor_f%C3%B6reslagna_f%C3%B6r_radering
Disallow: /wiki/Wikipedia:Sidor_f%C3%B6reslagna_f%C3%B6r_radering
Disallow: /wiki/Wikipedia:Sidor_föreslagna_för_radering
Disallow: /wiki/Användare
Disallow: /wiki/Anv%C3%A4ndare
Disallow: /wiki/Användardiskussion
Disallow: /wiki/Anv%C3%A4ndardiskussion
Disallow: /wiki/Wikipedia:Skyddade_sidnamn
Disallow: /wiki/Wikipedia%3ASkyddade_sidnamn
# T13291
Disallow: /wiki/Wikipedia:Sidor_som_bör_raderas
Disallow: /wiki/Wikipedia:Sidor_som_b%C3%B6r_raderas
Disallow: /wiki/Wikipedia%3ASidor_som_b%C3%B6r_raderas
#
# zhwiki:
# T7104
Disallow: /wiki/Wikipedia:删除投票/侵权
Disallow: /wiki/Wikipedia:%E5%88%A0%E9%99%A4%E6%8A%95%E7%A5%A8/%E4%BE%B5%E6%9D%83
Disallow: /wiki/Wikipedia:删除投票和请求
Disallow: /wiki/Wikipedia:%E5%88%A0%E9%99%A4%E6%8A%95%E7%A5%A8%E5%92%8C%E8%AF%B7%E6%B1%82
Disallow: /wiki/Category:快速删除候选
Disallow: /wiki/Category:%E5%BF%AB%E9%80%9F%E5%88%A0%E9%99%A4%E5%80%99%E9%80%89
Disallow: /wiki/Category:维基百科需要翻译的文章
Disallow: /wiki/Category:%E7%BB%B4%E5%9F%BA%E7%99%BE%E7%A7%91%E9%9C%80%E8%A6%81%E7%BF%BB%E8%AF%91%E7%9A%84%E6%96%87%E7%AB%A0
#
# sister projects
#
# enwikinews:
# T7340
Disallow: /wiki/Portal:Prepared_stories/
Disallow: /wiki/Portal%3APrepared_stories/
#
# itwikinews
# T11138
Disallow: /wiki/Wikinotizie:Richieste_di_cancellazione
Disallow: /wiki/Wikinotizie:Sospette_violazioni_di_copyright
Disallow: /wiki/Categoria:Da_cancellare_subito
Disallow: /wiki/Categoria:Da_cancellare_subito_per_violazione_integrale_copyright
Disallow: /wiki/Wikinotizie:Storie_in_preparazione
#
# enwikiquote:
# T17095
Disallow: /wiki/Wikiquote:Votes_for_deletion/
Disallow: /wiki/Wikiquote%3AVotes_for_deletion/
Disallow: /wiki/Wikiquote_talk:Votes_for_deletion/
Disallow: /wiki/Wikiquote_talk%3AVotes_for_deletion/
Disallow: /wiki/Wikiquote:Votes_for_deletion_archive/
Disallow: /wiki/Wikiquote%3AVotes_for_deletion_archive/
Disallow: /wiki/Wikiquote_talk:Votes_for_deletion_archive/
Disallow: /wiki/Wikiquote_talk%3AVotes_for_deletion_archive/
#
# enwikibooks
Disallow: /wiki/Wikibooks:Votes_for_deletion
#
# working...
Disallow: /wiki/Fundraising_2007/comments
#
Disallow: /wiki/Special:Maintenance
# Do not show banner content or record hits
Disallow: /wiki/Special:BannerLoader
Disallow: /wiki/Special:RecordImpression
#
#
#----------------------------------------------------------#
#
#
#
 # <!-- Please do not remove the space at the start of this line, it breaks the rendering.  http://www.robotstxt.org/orig.html says spaces before comments are OK. --><pre>
#
# Localisable part of robots.txt for en.wikipedia.org
#
# Edit at https://en.wikipedia.org/w/index.php?title=MediaWiki:Robots.txt&action=edit
# Don't add newlines here. All rules set here are active for every user-agent.
#
# Please check any changes using a syntax validator such as http://tool.motoricerca.info/robots-checker.phtml
# Enter https://en.wikipedia.org/robots.txt as the URL to check.
#
# https://bugzilla.wikimedia.org/show_bug.cgi?id=14075
Disallow: /wiki/MediaWiki:Spam-blacklist
Disallow: /wiki/MediaWiki%3ASpam-blacklist
Disallow: /wiki/MediaWiki_talk:Spam-blacklist
Disallow: /wiki/MediaWiki_talk%3ASpam-blacklist
Disallow: /wiki/Wikipedia:WikiProject_Spam
Disallow: /wiki/Wikipedia_talk:WikiProject_Spam
#
# Folks get annoyed when XfD discussions end up the number 1 google hit for
# their name.
# https://phabricator.wikimedia.org/T16075
Disallow: /wiki/Wikipedia:Articles_for_deletion
Disallow: /wiki/Wikipedia%3AArticles_for_deletion
Disallow: /wiki/Wikipedia:Votes_for_deletion
Disallow: /wiki/Wikipedia%3AVotes_for_deletion
Disallow: /wiki/Wikipedia:Pages_for_deletion
Disallow: /wiki/Wikipedia%3APages_for_deletion
Disallow: /wiki/Wikipedia:Miscellany_for_deletion
Disallow: /wiki/Wikipedia%3AMiscellany_for_deletion
Disallow: /wiki/Wikipedia:Miscellaneous_deletion
Disallow: /wiki/Wikipedia%3AMiscellaneous_deletion
Disallow: /wiki/Wikipedia:Categories_for_discussion
Disallow: /wiki/Wikipedia%3ACategories_for_discussion
Disallow: /wiki/Wikipedia:Templates_for_deletion
Disallow: /wiki/Wikipedia%3ATemplates_for_deletion
Disallow: /wiki/Wikipedia:Redirects_for_discussion
Disallow: /wiki/Wikipedia%3ARedirects_for_discussion
Disallow: /wiki/Wikipedia:Deletion_review
Disallow: /wiki/Wikipedia%3ADeletion_review
Disallow: /wiki/Wikipedia:WikiProject_Deletion_sorting
Disallow: /wiki/Wikipedia%3AWikiProject_Deletion_sorting
Disallow: /wiki/Wikipedia:Files_for_deletion
Disallow: /wiki/Wikipedia%3AFiles_for_deletion
Disallow: /wiki/Wikipedia:Files_for_discussion
Disallow: /wiki/Wikipedia%3AFiles_for_discussion
Disallow: /wiki/Wikipedia:Possibly_unfree_files
Disallow: /wiki/Wikipedia%3APossibly_unfree_files
#
# https://phabricator.wikimedia.org/T12288
Disallow: /wiki/Wikipedia_talk:Articles_for_deletion
Disallow: /wiki/Wikipedia_talk%3AArticles_for_deletion
Disallow: /wiki/Wikipedia_talk:Votes_for_deletion
Disallow: /wiki/Wikipedia_talk%3AVotes_for_deletion
Disallow: /wiki/Wikipedia_talk:Pages_for_deletion
Disallow: /wiki/Wikipedia_talk%3APages_for_deletion
Disallow: /wiki/Wikipedia_talk:Miscellany_for_deletion
Disallow: /wiki/Wikipedia_talk%3AMiscellany_for_deletion
Disallow: /wiki/Wikipedia_talk:Miscellaneous_deletion
Disallow: /wiki/Wikipedia_talk%3AMiscellaneous_deletion
Disallow: /wiki/Wikipedia_talk:Templates_for_deletion
Disallow: /wiki/Wikipedia_talk%3ATemplates_for_deletion
Disallow: /wiki/Wikipedia_talk:Categories_for_discussion
Disallow: /wiki/Wikipedia_talk%3ACategories_for_discussion
Disallow: /wiki/Wikipedia_talk:Deletion_review
Disallow: /wiki/Wikipedia_talk%3ADeletion_review
Disallow: /wiki/Wikipedia_talk:WikiProject_Deletion_sorting
Disallow: /wiki/Wikipedia_talk%3AWikiProject_Deletion_sorting
Disallow: /wiki/Wikipedia_talk:Files_for_deletion
Disallow: /wiki/Wikipedia_talk%3AFiles_for_deletion
Disallow: /wiki/Wikipedia_talk:Files_for_discussion
Disallow: /wiki/Wikipedia_talk%3AFiles_for_discussion
Disallow: /wiki/Wikipedia_talk:Possibly_unfree_files
Disallow: /wiki/Wikipedia_talk%3APossibly_unfree_files
#
Disallow: /wiki/Wikipedia:Copyright_problems
Disallow: /wiki/Wikipedia%3ACopyright_problems
Disallow: /wiki/Wikipedia_talk:Copyright_problems
Disallow: /wiki/Wikipedia_talk%3ACopyright_problems
Disallow: /wiki/Wikipedia:Suspected_copyright_violations
Disallow: /wiki/Wikipedia%3ASuspected_copyright_violations
Disallow: /wiki/Wikipedia_talk:Suspected_copyright_violations
Disallow: /wiki/Wikipedia_talk%3ASuspected_copyright_violations
Disallow: /wiki/Wikipedia:Contributor_copyright_investigations
Disallow: /wiki/Wikipedia%3AContributor_copyright_investigations
Disallow: /wiki/Wikipedia:Contributor_copyright_investigations
Disallow: /wiki/Wikipedia%3AContributor_copyright_investigations
Disallow: /wiki/Wikipedia_talk:Contributor_copyright_investigations
Disallow: /wiki/Wikipedia_talk%3AContributor_copyright_investigations
Disallow: /wiki/Wikipedia_talk:Contributor_copyright_investigations
Disallow: /wiki/Wikipedia_talk%3AContributor_copyright_investigations
Disallow: /wiki/Wikipedia:Protected_titles
Disallow: /wiki/Wikipedia%3AProtected_titles
Disallow: /wiki/Wikipedia_talk:Protected_titles
Disallow: /wiki/Wikipedia_talk%3AProtected_titles
Disallow: /wiki/Wikipedia:Articles_for_creation
Disallow: /wiki/Wikipedia%3AArticles_for_creation
Disallow: /wiki/Wikipedia_talk:Articles_for_creation
Disallow: /wiki/Wikipedia_talk%3AArticles_for_creation
Disallow: /wiki/Wikipedia_talk:Article_wizard
Disallow: /wiki/Wikipedia_talk%3AArticle_wizard
#
# https://phabricator.wikimedia.org/T13261
Disallow: /wiki/Wikipedia:Requests_for_arbitration
Disallow: /wiki/Wikipedia%3ARequests_for_arbitration
Disallow: /wiki/Wikipedia_talk:Requests_for_arbitration
Disallow: /wiki/Wikipedia_talk%3ARequests_for_arbitration
Disallow: /wiki/Wikipedia:Requests_for_comment
Disallow: /wiki/Wikipedia%3ARequests_for_comment
Disallow: /wiki/Wikipedia_talk:Requests_for_comment
Disallow: /wiki/Wikipedia_talk%3ARequests_for_comment
Disallow: /wiki/Wikipedia:Requests_for_adminship
Disallow: /wiki/Wikipedia%3ARequests_for_adminship
Disallow: /wiki/Wikipedia_talk:Requests_for_adminship
Disallow: /wiki/Wikipedia_talk%3ARequests_for_adminship
#
# https://phabricator.wikimedia.org/T14111
Disallow: /wiki/Wikipedia:Requests_for_checkuser
Disallow: /wiki/Wikipedia%3ARequests_for_checkuser
Disallow: /wiki/Wikipedia_talk:Requests_for_checkuser
Disallow: /wiki/Wikipedia_talk%3ARequests_for_checkuser
#
# https://phabricator.wikimedia.org/T15398
Disallow: /wiki/Wikipedia:WikiProject_Spam
Disallow: /wiki/Wikipedia%3AWikiProject_Spam
#
# https://phabricator.wikimedia.org/T16793
Disallow: /wiki/Wikipedia:Changing_username
Disallow: /wiki/Wikipedia%3AChanging_username
Disallow: /wiki/Wikipedia:Changing_username
Disallow: /wiki/Wikipedia%3AChanging_username
Disallow: /wiki/Wikipedia_talk:Changing_username
Disallow: /wiki/Wikipedia_talk%3AChanging_username
Disallow: /wiki/Wikipedia_talk:Changing_username
Disallow: /wiki/Wikipedia_talk%3AChanging_username
#
Disallow: /wiki/Wikipedia:Administrators%27_noticeboard
Disallow: /wiki/Wikipedia%3AAdministrators%27_noticeboard
Disallow: /wiki/Wikipedia_talk:Administrators%27_noticeboard
Disallow: /wiki/Wikipedia_talk%3AAdministrators%27_noticeboard
Disallow: /wiki/Wikipedia:Community_sanction_noticeboard
Disallow: /wiki/Wikipedia%3ACommunity_sanction_noticeboard
Disallow: /wiki/Wikipedia_talk:Community_sanction_noticeboard
Disallow: /wiki/Wikipedia_talk%3ACommunity_sanction_noticeboard
Disallow: /wiki/Wikipedia:Bureaucrats%27_noticeboard
Disallow: /wiki/Wikipedia%3ABureaucrats%27_noticeboard
Disallow: /wiki/Wikipedia_talk:Bureaucrats%27_noticeboard
Disallow: /wiki/Wikipedia_talk%3ABureaucrats%27_noticeboard
#
Disallow: /wiki/Wikipedia:Sockpuppet_investigations
Disallow: /wiki/Wikipedia%3ASockpuppet_investigations
Disallow: /wiki/Wikipedia_talk:Sockpuppet_investigations
Disallow: /wiki/Wikipedia_talk%3ASockpuppet_investigations
#
Disallow: /wiki/Wikipedia:Neutral_point_of_view/Noticeboard
Disallow: /wiki/Wikipedia%3ANeutral_point_of_view/Noticeboard
Disallow: /wiki/Wikipedia_talk:Neutral_point_of_view/Noticeboard
Disallow: /wiki/Wikipedia_talk%3ANeutral_point_of_view/Noticeboard
#
Disallow: /wiki/Wikipedia:No_original_research/noticeboard
Disallow: /wiki/Wikipedia%3ANo_original_research/noticeboard
Disallow: /wiki/Wikipedia_talk:No_original_research/noticeboard
Disallow: /wiki/Wikipedia_talk%3ANo_original_research/noticeboard
#
Disallow: /wiki/Wikipedia:Fringe_theories/Noticeboard
Disallow: /wiki/Wikipedia%3AFringe_theories/Noticeboard
Disallow: /wiki/Wikipedia_talk:Fringe_theories/Noticeboard
Disallow: /wiki/Wikipedia_talk%3AFringe_theories/Noticeboard
#
Disallow: /wiki/Wikipedia:Conflict_of_interest/Noticeboard
Disallow: /wiki/Wikipedia%3AConflict_of_interest/Noticeboard
Disallow: /wiki/Wikipedia_talk:Conflict_of_interest/Noticeboard
Disallow: /wiki/Wikipedia_talk%3AConflict_of_interest/Noticeboard
#
Disallow: /wiki/Wikipedia:Long-term_abuse
Disallow: /wiki/Wikipedia%3ALong-term_abuse
Disallow: /wiki/Wikipedia_talk:Long-term_abuse
Disallow: /wiki/Wikipedia_talk%3ALong-term_abuse
Disallow: /wiki/Wikipedia:Long_term_abuse
Disallow: /wiki/Wikipedia%3ALong_term_abuse
Disallow: /wiki/Wikipedia_talk:Long_term_abuse
Disallow: /wiki/Wikipedia_talk%3ALong_term_abuse
#
Disallow: /wiki/Wikipedia:Wikiquette_assistance
Disallow: /wiki/Wikipedia%3AWikiquette_assistance
#
Disallow: /wiki/Wikipedia:Abuse_reports
Disallow: /wiki/Wikipedia%3AAbuse_reports
Disallow: /wiki/Wikipedia_talk:Abuse_reports
Disallow: /wiki/Wikipedia_talk%3AAbuse_reports
Disallow: /wiki/Wikipedia:Abuse_response
Disallow: /wiki/Wikipedia%3AAbuse_response
Disallow: /wiki/Wikipedia_talk:Abuse_response
Disallow: /wiki/Wikipedia_talk%3AAbuse_response
#
Disallow: /wiki/Wikipedia:Reliable_sources/Noticeboard
Disallow: /wiki/Wikipedia%3AReliable_sources/Noticeboard
Disallow: /wiki/Wikipedia_talk:Reliable_sources/Noticeboard
Disallow: /wiki/Wikipedia_talk%3AReliable_sources/Noticeboard
#
Disallow: /wiki/Wikipedia:Suspected_sock_puppets
Disallow: /wiki/Wikipedia%3ASuspected_sock_puppets
Disallow: /wiki/Wikipedia_talk:Suspected_sock_puppets
Disallow: /wiki/Wikipedia_talk%3ASuspected_sock_puppets
#
Disallow: /wiki/Wikipedia:Biographies_of_living_persons/Noticeboard
Disallow: /wiki/Wikipedia%3ABiographies_of_living_persons/Noticeboard
Disallow: /wiki/Wikipedia_talk:Biographies_of_living_persons/Noticeboard
Disallow: /wiki/Wikipedia_talk%3ABiographies_of_living_persons/Noticeboard
#
Disallow: /wiki/Wikipedia:Content_noticeboard
Disallow: /wiki/Wikipedia%3AContent_noticeboard
Disallow: /wiki/Wikipedia_talk:Content_noticeboard
Disallow: /wiki/Wikipedia_talk%3AContent_noticeboard
#
Disallow: /wiki/Template:Editnotices
Disallow: /wiki/Template%3AEditnotices
#
Disallow: /wiki/Wikipedia:Arbitration
Disallow: /wiki/Wikipedia%3AArbitration
Disallow: /wiki/Wikipedia_talk:Arbitration
Disallow: /wiki/Wikipedia_talk%3AArbitration
#
Disallow: /wiki/Wikipedia:Arbitration_Committee
Disallow: /wiki/Wikipedia%3AArbitration_Committee
Disallow: /wiki/Wikipedia_talk:Arbitration_Committee
Disallow: /wiki/Wikipedia_talk%3AArbitration_Committee
#
Disallow: /wiki/Wikipedia:Arbitration_Committee_Elections
Disallow: /wiki/Wikipedia%3AArbitration_Committee_Elections
Disallow: /wiki/Wikipedia_talk:Arbitration_Committee_Elections
Disallow: /wiki/Wikipedia_talk%3AArbitration_Committee_Elections
#
Disallow: /wiki/Wikipedia:Mediation_Committee
Disallow: /wiki/Wikipedia%3AMediation_Committee
Disallow: /wiki/Wikipedia_talk:Mediation_Committee
Disallow: /wiki/Wikipedia_talk%3AMediation_Committee
#
Disallow: /wiki/Wikipedia:Mediation_Cabal/Cases
Disallow: /wiki/Wikipedia%3AMediation_Cabal/Cases
#
Disallow: /wiki/Wikipedia:Requests_for_bureaucratship
Disallow: /wiki/Wikipedia%3ARequests_for_bureaucratship
Disallow: /wiki/Wikipedia_talk:Requests_for_bureaucratship
Disallow: /wiki/Wikipedia_talk%3ARequests_for_bureaucratship
#
Disallow: /wiki/Wikipedia:Administrator_review
Disallow: /wiki/Wikipedia%3AAdministrator_review
Disallow: /wiki/Wikipedia_talk:Administrator_review
Disallow: /wiki/Wikipedia_talk%3AAdministrator_review
#
Disallow: /wiki/Wikipedia:Editor_review
Disallow: /wiki/Wikipedia%3AEditor_review
Disallow: /wiki/Wikipedia_talk:Editor_review
Disallow: /wiki/Wikipedia_talk%3AEditor_review
#
Disallow: /wiki/Wikipedia:Article_Incubator
Disallow: /wiki/Wikipedia%3AArticle_Incubator
Disallow: /wiki/Wikipedia_talk:Article_Incubator
Disallow: /wiki/Wikipedia_talk%3AArticle_Incubator
#
Disallow: /wiki/Category:Noindexed_pages
Disallow: /wiki/Category%3ANoindexed_pages
#
# </pre>

M man/can_fetch.Rd => man/can_fetch.Rd +6 -4
@@ 1,5 1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rep.r
% Please edit documentation in R/can-fetch.r
\name{can_fetch}
\alias{can_fetch}
\title{Test URL path against robots.txt}


@@ 17,7 17,9 @@ can_fetch(obj, path = "/", user_agent = "*")
Test URL path against robots.txt
}
\examples{
library(robotstxt)
can_fetch(rt, "/asthma/asthma_stats/default.htm", "*") # TRUE
can_fetch(rt, "/_borders", "*") # FALSE
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\\n")
gh_rt <- robxp(gh)
can_fetch(gh_rt, "/humans.txt", "*") # TRUE
can_fetch(gh_rt, "/login", "*") # FALSE
can_fetch(gh_rt, "/oembed", "CCBot") # FALSE
}

M man/crawl_delays.Rd => man/crawl_delays.Rd +17 -2
@@ 2,13 2,28 @@
% Please edit documentation in R/cd.r
\name{crawl_delays}
\alias{crawl_delays}
\title{Get all crawl_delay}
\title{Get all agent crawl delay values}
\usage{
crawl_delays(obj)
}
\arguments{
\item{obj}{\code{robxp} object}
}
\value{
data frame of agents and their crawl delays
}
\description{
Get all crawl_delay
Get all agent crawl delay values
}
\note{
\code{-1} will be returned for any listed agent without a crawl delay setting
}
\examples{
gh <- paste0(readLines(system.file("extdata", "github-robots.txt", package="rep")), collapse="\\n")
gh_rt <- robxp(gh)
crawl_delays(gh_rt)

imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\\n")
imdb_rt <- robxp(imdb)
crawl_delays(imdb_rt)
}

M man/robxp.Rd => man/robxp.Rd +2 -3
@@ 13,7 13,6 @@ robxp(x)
Create a robots.txt object
}
\examples{
library(robotstxt)
can_fetch(rt, "/asthma/asthma_stats/default.htm", "*") # TRUE
can_fetch(rt, "/_borders", "*") # FALSE
imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\\n")
rt <- robxp(imdb)
}

M src/RcppExports.cpp => src/RcppExports.cpp +1 -1
@@ 17,7 17,7 @@ BEGIN_RCPP
END_RCPP
}
// rep_crawl_delays
std::vector<float> rep_crawl_delays(SEXP xp);
DataFrame rep_crawl_delays(SEXP xp);
RcppExport SEXP _rep_rep_crawl_delays(SEXP xpSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;

M src/repmain.cpp => src/repmain.cpp +6 -2
@@ 21,18 21,22 @@ SEXP rep_parse(std::string content) {
//' @noRd
//'
// [[Rcpp::export]]
std::vector<float> rep_crawl_delays(SEXP xp) {
DataFrame rep_crawl_delays(SEXP xp) {

  Rcpp::XPtr<Rep::Robots> ptr(xp);

  std::vector<std::string> agents;
  std::vector<float> vals;

  agents.reserve(ptr->agents_.size());
  vals.reserve(ptr->agents_.size());

  for(auto kv : ptr->agents_) {
    agents.push_back(kv.first);
    vals.push_back(kv.second.delay());
  }

  return(vals);
  return(DataFrame::create(_["agent"] = agents, _["crawl_delay"] = vals));

}


M tests/testthat/test-rep.R => tests/testthat/test-rep.R +9 -1
@@ 1,11 1,19 @@
context("basic functionality")
test_that("parsing and testing works", {

  rt <- robxp(robotstxt::get_robotstxt("https://cdc.gov"))
  cdc <- paste0(readLines(system.file("extdata", "cdc-robots.txt", package="rep")), collapse="\n")
  rt <- robxp(cdc)

  expect_that(rt, is_a("robxp"))

  expect_that(can_fetch(rt, "/asthma/asthma_stats/default.htm", "*"), equals(TRUE))
  expect_that(can_fetch(rt, "/_borders", "*"), equals(FALSE))

  imdb <- paste0(readLines(system.file("extdata", "imdb-robots.txt", package="rep")), collapse="\n")
  rt <- robxp(imdb)
  cd <- crawl_delays(rt)

  expect_that(cd, is_a("data.frame"))
  expect_equal(cd$crawl_delay, c(0.1, 3.0, -1.0))

})