~hrbrmstr/curlparse

1abd213df6fc4ac003546a4733e029e87675e594 — hrbrmstr 4 years ago 746da27 master
add repos
M NAMESPACE => NAMESPACE +1 -1
@@ 18,4 18,4 @@ importFrom(Rcpp,sourceCpp)
importFrom(magrittr,"%>%")
importFrom(stringi,stri_detect_regex)
importFrom(stringi,stri_opts_regex)
useDynLib(curlparse)
useDynLib(curlparse, .registration = TRUE)

M R/RcppExports.R => R/RcppExports.R +11 -11
@@ 8,7 8,7 @@
#' @return data frame (tibble)
#' @export
parse_curl <- function(urls) {
    .Call('_curlparse_parse_curl', PACKAGE = 'curlparse', urls)
    .Call(`_curlparse_parse_curl`, urls)
}

#' Parse a character vector of URLs into component parts (`urltools` compatibility function)


@@ 18,7 18,7 @@ parse_curl <- function(urls) {
#' @return data frame (not a tibble)
#' @export
url_parse <- function(urls) {
    .Call('_curlparse_url_parse', PACKAGE = 'curlparse', urls)
    .Call(`_curlparse_url_parse`, urls)
}

#' Extract member components from a URL string


@@ 28,54 28,54 @@ url_parse <- function(urls) {
#' @return character vector of the extracted URL component
#' @export
scheme <- function(urls) {
    .Call('_curlparse_scheme', PACKAGE = 'curlparse', urls)
    .Call(`_curlparse_scheme`, urls)
}

#' @rdname scheme
#' @export
user <- function(urls) {
    .Call('_curlparse_user', PACKAGE = 'curlparse', urls)
    .Call(`_curlparse_user`, urls)
}

#' @rdname scheme
#' @export
password <- function(urls) {
    .Call('_curlparse_password', PACKAGE = 'curlparse', urls)
    .Call(`_curlparse_password`, urls)
}

#' @rdname scheme
#' @export
host <- function(urls) {
    .Call('_curlparse_host', PACKAGE = 'curlparse', urls)
    .Call(`_curlparse_host`, urls)
}

#' @rdname scheme
#' @export
port <- function(urls) {
    .Call('_curlparse_port', PACKAGE = 'curlparse', urls)
    .Call(`_curlparse_port`, urls)
}

#' @rdname scheme
#' @export
path <- function(urls) {
    .Call('_curlparse_path', PACKAGE = 'curlparse', urls)
    .Call(`_curlparse_path`, urls)
}

#' @rdname scheme
#' @export
url_options <- function(urls) {
    .Call('_curlparse_url_options', PACKAGE = 'curlparse', urls)
    .Call(`_curlparse_url_options`, urls)
}

#' @rdname scheme
#' @export
query <- function(urls) {
    .Call('_curlparse_query', PACKAGE = 'curlparse', urls)
    .Call(`_curlparse_query`, urls)
}

#' @rdname scheme
#' @export
fragment <- function(urls) {
    .Call('_curlparse_fragment', PACKAGE = 'curlparse', urls)
    .Call(`_curlparse_fragment`, urls)
}


M R/curlparse-package.R => R/curlparse-package.R +1 -1
@@ 9,5 9,5 @@
#' @author Bob Rudis (bob@@rud.is)
#' @importFrom stringi stri_detect_regex stri_opts_regex
#' @importFrom Rcpp sourceCpp
#' @useDynLib curlparse
#' @useDynLib curlparse, .registration = TRUE
"_PACKAGE"

M README.Rmd => README.Rmd +2 -0
@@ 1,5 1,7 @@
---
output: rmarkdown::github_document
editor_options: 
  chunk_output_type: console
---
```{r pkg-knitr-opts, include=FALSE}
hrbrpkghelpr::global_opts()

M README.md => README.md +12 -8
@@ 5,7 5,7 @@ developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.re
[![Signed
by](https://img.shields.io/badge/Keybase-Verified-brightgreen.svg)](https://keybase.io/hrbrmstr)
![Signed commit
%](https://img.shields.io/badge/Signed_Commits-80.0%25-lightgrey.svg)
%](https://img.shields.io/badge/Signed_Commits-83.3%25-lightgrey.svg)
[![Linux build
Status](https://travis-ci.org/hrbrmstr/curlparse.svg?branch=master)](https://travis-ci.org/hrbrmstr/curlparse)
[![Windows build


@@ 14,7 14,7 @@ status](https://ci.appveyor.com/api/projects/status/github/hrbrmstr/curlparse?sv
Status](https://codecov.io/gh/hrbrmstr/curlparse/branch/master/graph/badge.svg)](https://codecov.io/gh/hrbrmstr/curlparse)
![Minimal R
Version](https://img.shields.io/badge/R%3E%3D-3.2.0-blue.svg)
![License](https://img.shields.io/badge/License-MIT-blue.svg)
![License](https://img.shields.io/badge/License-Z-blue.svg)

# curlparse



@@ 60,10 60,14 @@ The following functions are implemented:
## Installation

``` r
remotes::install_git("https://git.rud.is/hrbrmstr/curlparse.git")
# or
remotes::install_git("https://git.sr.ht/~hrbrmstr/curlparse")
# or
remotes::install_gitlab("hrbrmstr/curlparse")
# or
remotes::install_bitbucket("hrbrmstr/curlparse")
# or
remotes::install_github("hrbrmstr/curlparse")
```



@@ 182,9 186,9 @@ microbenchmark(

mb
## Unit: microseconds
##       expr     min       lq     mean  median       uq      max neval
##  curlparse 650.357 685.5585 942.4718 774.454 935.5125 7926.342   500
##   urltools 510.676 542.7305 770.9576 591.866 746.7820 4595.759   500
##       expr     min       lq     mean   median       uq      max neval cld
##  curlparse 794.686 836.9005 893.4829 878.3295 919.0675 4492.718   500   b
##   urltools 670.631 705.1925 764.8708 738.3700 782.3535 4834.614   500  a

autoplot(mb)
```


@@ 364,9 368,9 @@ all(

| Lang         | \# Files | (%) | LoC |  (%) | Blank lines |  (%) | \# Lines |  (%) |
| :----------- | -------: | --: | --: | ---: | ----------: | ---: | -------: | ---: |
| C++          |        2 | 0.2 | 286 | 0.68 |          65 | 0.48 |       58 | 0.30 |
| Rmd          |        1 | 0.1 |  85 | 0.20 |          56 | 0.41 |       68 | 0.35 |
| R            |        6 | 0.6 |  46 | 0.11 |          14 | 0.10 |       67 | 0.35 |
| C++          |        2 | 0.2 | 280 | 0.68 |          65 | 0.48 |       58 | 0.30 |
| Rmd          |        1 | 0.1 |  85 | 0.21 |          56 | 0.41 |       70 | 0.36 |
| R            |        6 | 0.6 |  46 | 0.11 |          14 | 0.10 |       67 | 0.34 |
| Bourne Shell |        1 | 0.1 |   2 | 0.00 |           0 | 0.00 |        0 | 0.00 |

## Code of Conduct

M man/figures/README-unnamed-chunk-4-1.png => man/figures/README-unnamed-chunk-4-1.png +0 -0
M man/figures/README-unnamed-chunk-5-1.png => man/figures/README-unnamed-chunk-5-1.png +0 -0
M src/RcppExports.cpp => src/RcppExports.cpp +22 -22
@@ 6,122 6,122 @@
using namespace Rcpp;

// parse_curl
DataFrame parse_curl(CharacterVector urls);
DataFrame parse_curl(StringVector urls);
RcppExport SEXP _curlparse_parse_curl(SEXP urlsSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< CharacterVector >::type urls(urlsSEXP);
    Rcpp::traits::input_parameter< StringVector >::type urls(urlsSEXP);
    rcpp_result_gen = Rcpp::wrap(parse_curl(urls));
    return rcpp_result_gen;
END_RCPP
}
// url_parse
DataFrame url_parse(CharacterVector urls);
DataFrame url_parse(StringVector urls);
RcppExport SEXP _curlparse_url_parse(SEXP urlsSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< CharacterVector >::type urls(urlsSEXP);
    Rcpp::traits::input_parameter< StringVector >::type urls(urlsSEXP);
    rcpp_result_gen = Rcpp::wrap(url_parse(urls));
    return rcpp_result_gen;
END_RCPP
}
// scheme
CharacterVector scheme(CharacterVector urls);
StringVector scheme(StringVector urls);
RcppExport SEXP _curlparse_scheme(SEXP urlsSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< CharacterVector >::type urls(urlsSEXP);
    Rcpp::traits::input_parameter< StringVector >::type urls(urlsSEXP);
    rcpp_result_gen = Rcpp::wrap(scheme(urls));
    return rcpp_result_gen;
END_RCPP
}
// user
CharacterVector user(CharacterVector urls);
StringVector user(StringVector urls);
RcppExport SEXP _curlparse_user(SEXP urlsSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< CharacterVector >::type urls(urlsSEXP);
    Rcpp::traits::input_parameter< StringVector >::type urls(urlsSEXP);
    rcpp_result_gen = Rcpp::wrap(user(urls));
    return rcpp_result_gen;
END_RCPP
}
// password
CharacterVector password(CharacterVector urls);
StringVector password(StringVector urls);
RcppExport SEXP _curlparse_password(SEXP urlsSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< CharacterVector >::type urls(urlsSEXP);
    Rcpp::traits::input_parameter< StringVector >::type urls(urlsSEXP);
    rcpp_result_gen = Rcpp::wrap(password(urls));
    return rcpp_result_gen;
END_RCPP
}
// host
CharacterVector host(CharacterVector urls);
StringVector host(StringVector urls);
RcppExport SEXP _curlparse_host(SEXP urlsSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< CharacterVector >::type urls(urlsSEXP);
    Rcpp::traits::input_parameter< StringVector >::type urls(urlsSEXP);
    rcpp_result_gen = Rcpp::wrap(host(urls));
    return rcpp_result_gen;
END_RCPP
}
// port
CharacterVector port(CharacterVector urls);
StringVector port(StringVector urls);
RcppExport SEXP _curlparse_port(SEXP urlsSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< CharacterVector >::type urls(urlsSEXP);
    Rcpp::traits::input_parameter< StringVector >::type urls(urlsSEXP);
    rcpp_result_gen = Rcpp::wrap(port(urls));
    return rcpp_result_gen;
END_RCPP
}
// path
CharacterVector path(CharacterVector urls);
StringVector path(StringVector urls);
RcppExport SEXP _curlparse_path(SEXP urlsSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< CharacterVector >::type urls(urlsSEXP);
    Rcpp::traits::input_parameter< StringVector >::type urls(urlsSEXP);
    rcpp_result_gen = Rcpp::wrap(path(urls));
    return rcpp_result_gen;
END_RCPP
}
// url_options
CharacterVector url_options(CharacterVector urls);
StringVector url_options(StringVector urls);
RcppExport SEXP _curlparse_url_options(SEXP urlsSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< CharacterVector >::type urls(urlsSEXP);
    Rcpp::traits::input_parameter< StringVector >::type urls(urlsSEXP);
    rcpp_result_gen = Rcpp::wrap(url_options(urls));
    return rcpp_result_gen;
END_RCPP
}
// query
CharacterVector query(CharacterVector urls);
StringVector query(StringVector urls);
RcppExport SEXP _curlparse_query(SEXP urlsSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< CharacterVector >::type urls(urlsSEXP);
    Rcpp::traits::input_parameter< StringVector >::type urls(urlsSEXP);
    rcpp_result_gen = Rcpp::wrap(query(urls));
    return rcpp_result_gen;
END_RCPP
}
// fragment
CharacterVector fragment(CharacterVector urls);
StringVector fragment(StringVector urls);
RcppExport SEXP _curlparse_fragment(SEXP urlsSEXP) {
BEGIN_RCPP
    Rcpp::RObject rcpp_result_gen;
    Rcpp::RNGScope rcpp_rngScope_gen;
    Rcpp::traits::input_parameter< CharacterVector >::type urls(urlsSEXP);
    Rcpp::traits::input_parameter< StringVector >::type urls(urlsSEXP);
    rcpp_result_gen = Rcpp::wrap(fragment(urls));
    return rcpp_result_gen;
END_RCPP

M src/curlparse-main.cpp => src/curlparse-main.cpp +39 -45
@@ 5,7 5,7 @@

using namespace Rcpp;

String lc_url_get(CURLU *url, CURLUPart what, unsigned int flags = 0) {
static inline String lc_url_get(CURLU *url, CURLUPart what, unsigned int flags = 0) {

  char *thing;
  CURLUcode rc = curl_url_get(url, what, &thing, flags);


@@ 27,29 27,27 @@ String lc_url_get(CURLU *url, CURLUPart what, unsigned int flags = 0) {
//' @return data frame (tibble)
//' @export
// [[Rcpp::export]]
DataFrame parse_curl(CharacterVector urls) {
DataFrame parse_curl(StringVector urls) {

  unsigned int input_size = urls.size();
  R_xlen_t input_size = urls.size();

  CharacterVector scheme_vec(input_size);
  CharacterVector user_vec(input_size);
  CharacterVector password_vec(input_size);
  CharacterVector host_vec(input_size);
  CharacterVector port_vec(input_size);
  CharacterVector path_vec(input_size);
  CharacterVector options_vec(input_size);
  CharacterVector query_vec(input_size);
  CharacterVector fragment_vec(input_size);
  StringVector scheme_vec(input_size);
  StringVector user_vec(input_size);
  StringVector password_vec(input_size);
  StringVector host_vec(input_size);
  StringVector port_vec(input_size);
  StringVector path_vec(input_size);
  StringVector options_vec(input_size);
  StringVector query_vec(input_size);
  StringVector fragment_vec(input_size);

  CURLUcode rc;
  CURLU *url;

  for (unsigned int i = 0; i < input_size; i++) {
  for (R_xlen_t i = 0; i < input_size; i++) {

    url = curl_url();
    rc = curl_url_set(
      url, CURLUPART_URL, Rcpp::as<std::string>(urls[i]).c_str(), 0
    );
    rc = curl_url_set(url, CURLUPART_URL, urls[i], 0);

    if (!rc) {



@@ 94,7 92,7 @@ DataFrame parse_curl(CharacterVector urls) {
    _["stringsAsFactors"] = false
  );

  out.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame");
  out.attr("class") = StringVector::create("tbl_df", "tbl", "data.frame");

  return(out);



@@ 108,26 106,24 @@ DataFrame parse_curl(CharacterVector urls) {
//' @return data frame (not a tibble)
//' @export
// [[Rcpp::export]]
DataFrame url_parse(CharacterVector urls) {
DataFrame url_parse(StringVector urls) {

  unsigned int input_size = urls.size();
  R_xlen_t input_size = urls.size();

  CharacterVector scheme_vec(input_size);
  CharacterVector host_vec(input_size);
  CharacterVector port_vec(input_size);
  CharacterVector path_vec(input_size);
  CharacterVector query_vec(input_size);
  CharacterVector fragment_vec(input_size);
  StringVector scheme_vec(input_size);
  StringVector host_vec(input_size);
  StringVector port_vec(input_size);
  StringVector path_vec(input_size);
  StringVector query_vec(input_size);
  StringVector fragment_vec(input_size);

  CURLUcode rc;
  CURLU *url;

  for (unsigned int i = 0; i < input_size; i++) {
  for (R_xlen_t i = 0; i < input_size; i++) {

    url = curl_url();
    rc = curl_url_set(
      url, CURLUPART_URL, Rcpp::as<std::string>(urls[i]).c_str(), 0
    );
    rc = curl_url_set(url, CURLUPART_URL, urls[i], 0);

    if (!rc) {



@@ 168,20 164,18 @@ DataFrame url_parse(CharacterVector urls) {
}


CharacterVector lc_part(CharacterVector urls, CURLUPart what, unsigned int flags = 0) {
StringVector lc_part(StringVector urls, CURLUPart what, unsigned int flags = 0) {

  unsigned int input_size = urls.size();
  CharacterVector output(input_size);
  R_xlen_t input_size = urls.size();
  StringVector output(input_size);

  CURLUcode rc;
  CURLU *url;

  for (unsigned int i = 0; i < input_size; i++) {
  for (R_xlen_t i = 0; i < input_size; i++) {

    url = curl_url();
    rc = curl_url_set(
      url, CURLUPART_URL, Rcpp::as<std::string>(urls[i]).c_str(), 0
    );
    rc = curl_url_set(url, CURLUPART_URL, urls[i], 0);

    output[i] = (!rc) ? lc_url_get(url, what, flags) : NA_STRING;



@@ 200,62 194,62 @@ CharacterVector lc_part(CharacterVector urls, CURLUPart what, unsigned int flags
//' @return character vector of the extracted URL component
//' @export
// [[Rcpp::export]]
CharacterVector scheme(CharacterVector urls) {
StringVector scheme(StringVector urls) {
  return(lc_part(urls, CURLUPART_SCHEME, CURLU_DEFAULT_SCHEME));
}

//' @rdname scheme
//' @export
// [[Rcpp::export]]
CharacterVector user(CharacterVector urls) {
StringVector user(StringVector urls) {
  return(lc_part(urls, CURLUPART_USER));
}

//' @rdname scheme
//' @export
// [[Rcpp::export]]
CharacterVector password(CharacterVector urls) {
StringVector password(StringVector urls) {
  return(lc_part(urls, CURLUPART_PASSWORD));
}

//' @rdname scheme
//' @export
// [[Rcpp::export]]
CharacterVector host(CharacterVector urls) {
StringVector host(StringVector urls) {
  return(lc_part(urls, CURLUPART_HOST));
}

//' @rdname scheme
//' @export
// [[Rcpp::export]]
CharacterVector port(CharacterVector urls) {
StringVector port(StringVector urls) {
  return(lc_part(urls, CURLUPART_PORT, CURLU_DEFAULT_PORT));
}

//' @rdname scheme
//' @export
// [[Rcpp::export]]
CharacterVector path(CharacterVector urls) {
StringVector path(StringVector urls) {
  return(lc_part(urls, CURLUPART_PATH, CURLU_URLDECODE));
}

//' @rdname scheme
//' @export
// [[Rcpp::export]]
CharacterVector url_options(CharacterVector urls) {
StringVector url_options(StringVector urls) {
  return(lc_part(urls, CURLUPART_OPTIONS));
}

//' @rdname scheme
//' @export
// [[Rcpp::export]]
CharacterVector query(CharacterVector urls) {
StringVector query(StringVector urls) {
  return(lc_part(urls, CURLUPART_QUERY, CURLU_URLENCODE));
}

//' @rdname scheme
//' @export
// [[Rcpp::export]]
CharacterVector fragment(CharacterVector urls) {
StringVector fragment(StringVector urls) {
  return(lc_part(urls, CURLUPART_FRAGMENT));
}

M tools/option_table.txt => tools/option_table.txt +0 -7
@@ 261,10 261,3 @@
  CURLOPT_DOH_URL = 10000 + 279
  CURLOPT_UPLOAD_BUFFERSIZE = 0 + 280
  CURLOPT_UPKEEP_INTERVAL_MS = 0 + 281
  CURLOPT_CURLU = 10000 + 282
  CURLOPT_TRAILERFUNCTION = 20000 + 283
  CURLOPT_TRAILERDATA = 10000 + 284
  CURLOPT_HTTP09_ALLOWED = 0 + 285
  CURLOPT_ALTSVC_CTRL = 0 + 286
  CURLOPT_ALTSVC = 10000 + 287
  CURLOPT_MAXAGE_CONN = 0 + 288