~hrbrmstr/hgr

f54a464ee418c5762875ff0ff40daa9c284f826b — boB Rudis 6 years ago 5c188d7
CRAN prep; tests updated; docs updated
M .Rbuildignore => .Rbuildignore +3 -0
@@ 8,3 8,6 @@
^\.codecov\.yml$
^README_files$
^doc$
^appveyor\.yml$
^CONDUCT\.md$
^codecov\.yml$

M .travis.yml => .travis.yml +34 -23
@@ 1,31 1,42 @@
language: r

warnings_are_errors: true

sudo: required

cache: packages

r:
 - oldrel
 - release
 - devel

apt_packages:
  - libv8-dev
  - xclip

env:
 global:
   - CRAN: http://cran.rstudio.com
sudo: false
r_check_revdep: false

matrix:
  include:
    - r: oldrel
      env: TRAVIS_CLIP=xsel DISPLAY=:99.0
      addons: {apt: {packages: [xsel]}}
    - r: release
      env: TRAVIS_CLIP=xsel DISPLAY=:99.0
      addons: {apt: {packages: [xsel]}}
    - r: devel
      env: TRAVIS_CLIP=xsel DISPLAY=:99.0
      addons: {apt: {packages: [xsel]}}
    - r: oldrel
      env: TRAVIS_CLIP=xclip DISPLAY=:99.0
      addons: {apt: {packages: [xclip]}}
    - r: release
      env: TRAVIS_CLIP=xclip DISPLAY=:99.0
      addons: {apt: {packages: [xclip]}}
    - r: devel
      env: TRAVIS_CLIP=xclip DISPLAY=:99.0
      addons: {apt: {packages: [xclip]}}
    - r: release
      env: TRAVIS_CLIP=none DISPLAY=:99.0
    - r: release
      env: TRAVIS_CLIP=nodisplay
      addons: {apt: {packages: [xclip]}}

# Ensure xclip can still run headlessly
before_script:
  - sh -e /etc/init.d/xvfb start
  - sleep 3
- if [ "$TRAVIS_CLIP" == "xclip" ]; then uptime | xclip -i -sel p -f | xclip -i -sel c; xclip -o -sel clipboard; fi

after_success:
  - Rscript -e 'covr::codecov()'

notifications:
  email:
    - bob@rud.is
  irc:
    channels:
      - "104.236.112.222#builds"
    nick: travisci

A CONDUCT.md => CONDUCT.md +25 -0
@@ 0,0 1,25 @@
# Contributor Code of Conduct

As contributors and maintainers of this project, we pledge to respect all people who 
contribute through reporting issues, posting feature requests, updating documentation,
submitting pull requests or patches, and other activities.

We are committed to making participation in this project a harassment-free experience for
everyone, regardless of level of experience, gender, gender identity and expression,
sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.

Examples of unacceptable behavior by participants include the use of sexual language or
imagery, derogatory comments or personal attacks, trolling, public or private harassment,
insults, or other unprofessional conduct.

Project maintainers have the right and responsibility to remove, edit, or reject comments,
commits, code, wiki edits, issues, and other contributions that are not aligned to this 
Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 
from the project team.

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 
opening an issue or contacting one or more of the project maintainers.

This Code of Conduct is adapted from the Contributor Covenant 
(http:contributor-covenant.org), version 1.0.0, available at 
http://contributor-covenant.org/version/1/0/0/

M DESCRIPTION => DESCRIPTION +9 -3
@@ 6,8 6,12 @@ Date: 2017-06-22
Author: Bob Rudis (bob@rud.is)
Maintainer: Bob Rudis <bob@rud.is>
Description: The 'Postlight' 'Mercury' 'API' <https://mercury.postlight.com> takes any web
    article and returns only the relevant content - headline, author, body text, relevant 
    images and more - free from any clutter.
    article and returns only the relevant content - headline, author, body text,  
    images and more - free from any clutter and including only minimal markup. Tools
    are provided to access the 'API' and also further clean up retrieved text through
    the the application of 'XSLT' style sheets. An 'RStudio' 'Addin' is also provided
    which makes it possible to preview the cleaned content from a 'URL' on
    the clipboard.
URL: https://github.com/hrbrmstr/hgr
BugReports: https://github.com/hrbrmstr/hgr/issues
License: AGPL


@@ 26,5 30,7 @@ Imports:
    clipr,
    htmltools,
    jsonlite,
    rstudioapi
    rstudioapi,
    shiny (>= 0.13),
    miniUI (>= 0.1.1)
RoxygenNote: 6.0.1

M NAMESPACE => NAMESPACE +3 -0
@@ 4,11 4,14 @@ S3method(print,hgr)
export(clean_text)
export(jtf_addin)
export(just_the_facts)
export(mini_browser)
import(clipr)
import(htmltools)
import(httr)
import(miniUI)
import(purrr)
import(rstudioapi)
import(shiny)
import(xslt)
importFrom(jsonlite,fromJSON)
importFrom(jsonlite,toJSON)

M R/clean.r => R/clean.r +2 -0
@@ 12,6 12,8 @@
#'     try the XSLT and test for an empty return. If that condition exists, then
#'     it will revert to a plain text conversion with just straight `rvest::html_text()`.
#' @export
#' @examples
#' clean_text(system.file("extdata", "raw.html", package="hgr"))
clean_text <- function(doc) {

  if (!inherits(doc, "html_document")) doc <- xml2::read_html(doc)

M R/hgr-package.R => R/hgr-package.R +6 -0
@@ 4,6 4,11 @@
#' relevant content --- headline, author, body text, relevant images and more --- free
#' from any clutter.
#'
#' Tools are provided to access the 'API' and also further clean up retrieved text
#' through the the application of 'XSLT' style sheets. An 'RStudio' 'Addin' is also
#' provided which makes it possible to preview the cleaned content from a 'URL' on
#' the clipboard.
#'
#' @md
#' @name hgr
#' @docType package


@@ 14,4 19,5 @@
#' @import xslt rstudioapi
#' @importFrom xml2 read_html
#' @importFrom rvest html_text
#' @import shiny miniUI
NULL

M R/mercury.r => R/mercury.r +4 -0
@@ 10,6 10,10 @@
#'     but you can specify it manually as well. Get your key [here](https://mercury.postlight.com).
#' @return `data.frame`
#' @export
#' @examples \dontrun{
#' URL <- "https://www.techworld.com/careers/what-is-r-programming-language-3664613/"
#' just_the_facts(URL)
#' }
just_the_facts <- function(url, mercury_api_key=Sys.getenv("MERCURY_API_KEY")) {

   res <- httr::GET("https://mercury.postlight.com/parser",

A R/mini-browser.r => R/mini-browser.r +57 -0
@@ 0,0 1,57 @@
#' Use hgr as a mini-browser (RStudio Addin)
#'
#' @export
mini_browser <- function() {

  # Get the document context.
  context <- rstudioapi::getActiveDocumentContext()

  # Set the default data to use based on the selection.
  text <- context$selection[[1]]$text
  defaultData <- text

  # Generate UI for the gadget.
  ui <- miniPage(
    gadgetTitleBar("BrowseR"),
    miniContentPanel(
      shiny::div(
        style="width:100%",
        textInput("URL", "Location:", width="70%"),
        submitButton("Go!")
      ),
      shiny::br(),
      htmlOutput("output")
    )
  )


  # Server code for the gadget.
  server <- function(input, output, session) {

    output$output <- renderText({
      goto_url <- input$URL %||% ""
      if (goto_url != "") {
        tmp <- hgr::just_the_facts(goto_url)
        tmp$content
      }

      # data <- reactiveData()
      # if (nzchar(data) > 0) {
      #   tweet_base <- stringi::stri_wrap(data, 134, whitespace_only = TRUE)
      #   paste0(sprintf("%s %d/%d", tweet_base, 1:length(tweet_base), length(tweet_base)), collapse="<br/>\n<br/>\n")
      # } else {
      #   ""
      # }
    })

    # Listen for 'done'.
    observeEvent(input$done, {
      invisible(stopApp())
    })
  }

  # Use a modal dialog as a viewr.
  viewer <- dialogViewer("BrowseR", width = 800, height = 600)
  runGadget(ui, server, viewer = viewer)

}
\ No newline at end of file

M README.Rmd => README.Rmd +19 -14
@@ 4,16 4,29 @@ editor_options:
  chunk_output_type: console
---

`hgr` : Tools to Work with the 'Postlight' 'Mercury' 'API'
[![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/hgr.svg?branch=master)](https://travis-ci.org/hrbrmstr/hgr)
 * Turn on travis for your repo at https://travis-ci.org/hrbrmstr/hgr
[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/hgr?branch=master&svg=true)](https://ci.appveyor.com/project/hrbrmstr/hgr)
[![Coverage Status](https://img.shields.io/codecov/c/github/hrbrmstr/hgr/master.svg)](https://codecov.io/github/hrbrmstr/hgr?branch=master)

Mercury takes any web article and returns only the relevant content — headline, author, body text, relevant images and more — free from any clutter. You need an API key which you can get from [here](https://mercury.postlight.com).
# hgr

Tools to Work with the 'Postlight' 'Mercury' 'API'

## Description

The 'Postlight' 'Mercury' 'API' <https://mercury.postlight.com> takes any web article and returns only the relevant content - headline, author, body text, images and more - free from any clutter and including only minimal markup. Tools are provided to access the 'API' and also further clean up retrieved text through the the application of 'XSLT' style sheets. An 'RStudio' 'Addin' is also provided which makes it possible to preview the cleaned content from a 'URL' on the clipboard.

You need an API key which you can get from [here](https://mercury.postlight.com).

## What's inside the tin?

The following functions are implemented:

- `just_the_facts`:	Retrieve parsed content of a URL processed by the Postlight Mercury API
- `clean_text`:	Remove all HTML/XML tags from an HTML document/atomic character vector

### Installation
## Installation

```{r eval=FALSE}
devtools::install_github("hrbrmstr/hgr")


@@ 23,7 36,7 @@ devtools::install_github("hrbrmstr/hgr")
options(width=120)
```

### Usage
## Usage

```{r message=FALSE, warning=FALSE, error=FALSE}
library(hgr)


@@ 44,14 57,6 @@ plain <- clean_text(doc$content)
substr(plain, 1, 100)
```

### Test Results

```{r message=FALSE, warning=FALSE, error=FALSE}
library(hgr)
library(testthat)

date()

test_dir("tests/")
```
## Code of Conduct

Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.
\ No newline at end of file

A appveyor.yml => appveyor.yml +45 -0
@@ 0,0 1,45 @@
# DO NOT CHANGE the "init" and "install" sections below

# Download script file from GitHub
init:
  ps: |
        $ErrorActionPreference = "Stop"
        Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
        Import-Module '..\appveyor-tool.ps1'

install:
  ps: Bootstrap

cache:
  - C:\RLibrary

# Adapt as necessary starting from here

build_script:
  - travis-tool.sh install_deps

test_script:
  - travis-tool.sh run_tests

on_failure:
  - 7z a failure.zip *.Rcheck\*
  - appveyor PushArtifact failure.zip

artifacts:
  - path: '*.Rcheck\**\*.log'
    name: Logs

  - path: '*.Rcheck\**\*.out'
    name: Logs

  - path: '*.Rcheck\**\*.fail'
    name: Logs

  - path: '*.Rcheck\**\*.Rout'
    name: Logs

  - path: '\*_*.tar.gz'
    name: Bits

  - path: '\*_*.zip'
    name: Bits

A codecov.yml => codecov.yml +1 -0
@@ 0,0 1,1 @@
comment: false

A inst/extdata/raw.html => inst/extdata/raw.html +38 -0
@@ 0,0 1,38 @@
<section id="articleBody" class="articleBody">
<p>R is an open source programming language and software environment, commonly used for statistical computing within data heavy roles such as data mining and statistics.</p>
<p><strong>Are you a programmer looking for a new job? <a href="http://www.techworld.com/jobs/channel-developerstw/">Browse our jobs board here</a>.</strong></p>
<header class="articleHeader">
<figure>
<div>
<meta>
<meta>
<img src="https://cdn1.techworld.com/cmsdata/features/3664613/female_developer_istock_cecilie_arcurs_thumb800.jpg" alt="female developer istock cecilie arcurs">
</div>
<meta>
</figure>
</header>
<p>R has had a resurgence in recent years with a growing number of programmers using its data generation and analysis capabilities within machine learning and other emerging data-dependant technologies.</p>
<p>We discuss why you should learn and use R and how to get skilled up.</p>
<h2>Why should I learn R programming language?</h2>
<p>While R can seem overly complex at the start, for those looking for a programming language with a lot of meat on the bones, R is worth your consideration.</p>
<p>In fact, a number of well-known organisations are taking advantage of R&apos;s impressive statistical features.</p>
<section class="inArticleRelatedList" id="inArticleRelated-6491BBC4-9DE6-470F-A409672B0331C9A6"> </section> <p>Some Facebook employees are using R to analyse user behaviour, while over 500 Google employees are using R to make its advertising more effective, says <a href="http://blog.revolutionanalytics.com/2013/05/companies-using-open-source-r-in-2013.html">Revolution Analytics</a>.</p>
<p>R uses command-line scripting, which is ideal for storing numerous series of complex data-analysis and recycling that analysis&apos; on similar sets of data.</p>
<p>R is totally free and open source, so unlike its rivals such as SAS or Matlab, R can be customised, cloned and even redistributed.</p>
<p>One of the biggest benefits to open source software is that upgrades to the software are much more regular.</p>
<p>This is extremely advantageous for statistical programming languages and environments.</p>
<p>R is available on Windows, Linux and Mac OS X and able to import data from a whole host of programmes including Microsoft Excel, MySQL and Oracle.</p>
<section class="inArticleRelatedList" id="inArticleRelated-128F754A-0C56-4947-ABB06D81B7A432DD"> </section> <p>R really is a data analyst or statistician&apos;s dream - it packs a punch. R is able to handle an incredible amount of data and its two million users can vouch for that.</p>
<p>In fact, one of R&apos;s selling points is that exact community. R&apos;s large and active online community supply a myriad of documentation, tutorials and online query forums.</p>
<p><strong><a href="http://www.techworld.com/jobs/channel-developerstw/">Find your next job in development here</a>.</strong></p>
<h2>How do I learn R programming language?</h2>
<p>If you&apos;re not 100 percent sure that R is for you, you might want to take an introductory&#xA0;course. Online video courses are very popular and won&apos;t break the bank.&#xA0;</p>
<p><a href="https://www.udemy.com/courses/search/?q=R&amp;src=ukw">Udemy</a> offers a range of online classes for R and statistical programming languages as a whole. These can start at around &#xA3;10, so why not give it a shot.&#xA0;</p>
<p>Another popular route for those with a background in programming is to just get stuck in.&#xA0;</p>
<p>Just visit<span>&#xA0;</span><strong><a href="http://www.r-project.org/">r-project.org</a></strong>&#xA0;to install and get started.</p>
<p>While you don&apos;t need any additional downloads to begin working with R, it is a good idea to install&#xA0;<a href="http://www.rstudio.com/ide/">RStudio</a>, the&#xA0;<span>free R integrated development environment (IDE).</span></p>
<p><span>This studio includes useful features to make the learning process a little less daunting from&#xA0;syntax highlighting and code auto-completion.&#xA0;</span></p>
<p><span>You&apos;ll be able to take advantage of lots of online tutorials and documentation, including coding shortcuts,<strong> <a href="http://www.rstudio.com/ide/docs/">here</a></strong>.</span></p>
<p><strong>For a full guide on getting started with R, <a href="https://www.computerworld.com/article/2497143/business-intelligence/business-intelligence-beginner-s-guide-to-r-introduction.html?page=2">see here</a>.</strong></p>
<p>Find your next job with <a href="https://www.techworld.com/jobs/"> techworld jobs</a></p>
</section>

M inst/rstudio/addins.dcf => inst/rstudio/addins.dcf +5 -0
@@ 3,3 3,8 @@ Description: Takes a URL on the clipboard and inserts/executes a call to `just_t
    that includes "printing" the object which launches a browser with the API call result
Binding: jtf_addin
Interactive: false

Name: Minimal Browser
Description: Presents a "browser" interface
Binding: mini_browser
Interactive: true

M man/clean_text.Rd => man/clean_text.Rd +3 -0
@@ 23,3 23,6 @@ the XSLT can be a bit aggressive for some URLs and this function will first
try the XSLT and test for an empty return. If that condition exists, then
it will revert to a plain text conversion with just straight \code{rvest::html_text()}.
}
\examples{
clean_text(system.file("extdata", "raw.html", package="hgr"))
}

M man/hgr.Rd => man/hgr.Rd +6 -0
@@ 10,6 10,12 @@
relevant content --- headline, author, body text, relevant images and more --- free
from any clutter.
}
\details{
Tools are provided to access the 'API' and also further clean up retrieved text
through the the application of 'XSLT' style sheets. An 'RStudio' 'Addin' is also
provided which makes it possible to preview the cleaned content from a 'URL' on
the clipboard.
}
\author{
Bob Rudis (bob@rud.is)
}

M man/just_the_facts.Rd => man/just_the_facts.Rd +6 -0
@@ 27,3 27,9 @@ but you can specify it manually as well. Get your key \href{https://mercury.post
relevant content --- headline, author, body text, relevant images and more --- free
from any clutter.
}
\examples{
\dontrun{
URL <- "https://www.techworld.com/careers/what-is-r-programming-language-3664613/"
just_the_facts(URL)
}
}

A man/mini_browser.Rd => man/mini_browser.Rd +11 -0
@@ 0,0 1,11 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mini-browser.r
\name{mini_browser}
\alias{mini_browser}
\title{Use hgr as a mini-browser (RStudio Addin)}
\usage{
mini_browser()
}
\description{
Use hgr as a mini-browser (RStudio Addin)
}