~hrbrmstr/vershist

71a3c9fc173d25dbc2f5a1608ea9aa89388a4337 — hrbrmstr 3 years ago aadbcc0
nginx apache
3 files changed, 106 insertions(+), 66 deletions(-)

M R/apache-httpd.R
M R/nginx.R
M man/apache_httpd_version_history.Rd
M R/apache-httpd.R => R/apache-httpd.R +61 -25
@@ 1,6 1,6 @@
#' Retrieve Apache httpd Version Release History
#'
#' Reads <https://archive.apache.org/dist/httpd/> to build a data frame of
#' Reads <https://github.com/apache/httpd> releases to build a data frame of
#' Apache `httpd` version release numbers and dates with semantic version
#' strings parsed and separate fields added. The data frame is also arranged in
#' order from lowest version to latest version and the `vers` column is an


@@ 10,32 10,68 @@
#' @export
apache_httpd_version_history <- function() {

  ap <- readr::read_lines("https://archive.apache.org/dist/httpd/")
  page <- gh::gh("/repos/apache/httpd/tags")

  apd <- xml2::read_html(paste0(ap[grepl('"(httpd-|apache_)[[:digit:]]+\\.[[:digit:]]+\\.[[:digit:]]+\\.tar', ap)], collapse="\n"))
  purrr::map_df(
    page, ~{
      list(
        vers = .x$name,
        rls_date = gh::gh(.x$commit$url)$commit$author$date # kinda dangerous
      )
    }) -> xdf

  rvest::html_text(apd) %>%
    stri_split_lines() %>%
    unlist() %>%
    stri_match_first_regex("([[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2})") %>%
    .[,2] -> rls_dates
  sgh_next <- purrr::safely(gh::gh_next) # to stop on gh_next() error

  rvest::html_nodes(apd, "a") %>%
    rvest::html_attr("href") %>%
    stri_match_first_regex("[-_]([[:digit:]]+\\.[[:digit:]]+\\.[[:digit:]]+)\\.tar") %>%
    .[,2] -> vers
  while(TRUE) {
    page <- sgh_next(page)
    if (is.null(page$result)) break;
    page <- page$result
    dplyr::bind_rows(
      xdf,
      purrr::map_df(
        page, ~{
          list(
            vers = .x$name,
            rls_date = gh::gh(.x$commit$url)$commit$author$date # kinda dangerous
          )
        })
    ) -> xdf
  }

  dplyr::tibble(
    vers = vers,
    rls_date = rls_dates
  ) %>%
    dplyr::distinct(vers, .keep_all=TRUE) %>%
    mutate(rls_date = as.Date(rls_date)) %>%
    mutate(rls_year = lubridate::year(rls_date)) %>%
    dplyr::bind_cols(
      semver::parse_version(.$vers) %>%
        dplyr::as_tibble()
  dplyr::mutate(xdf, vers = stri_replace_first_fixed(vers, "v", "")) %>%
    dplyr::mutate(rls_date = as.Date(stri_sub(rls_date, 1, 10))) %>%
    dplyr::mutate(rls_year = lubridate::year(rls_date)) %>%
    tidyr::separate(vers, c("major", "minor", "patch", "build"), remove=FALSE) %>%
    dplyr::mutate(prerelease = ifelse(
      stri_detect_regex(build, "[[:alpha:]]"),
      stri_extract_first_regex(build, "[[:alpha:]][[:alnum:]]+"),
      ""
    )) %>%
    dplyr::mutate(build = stri_replace_first_regex(build, "[[:alpha:]][[:alnum:]]+", "")) %>%
    dplyr::mutate_at(.vars=c("major", "minor", "patch", "build"), .funs=c(as.integer)) %>%
    dplyr::add_row(
      vers = "2.0.0",
      rls_date = as.Date("2001-02-09"),
      rls_year = 2001,
      major = 2L, minor = 0L, patch = 0L,
      prerelease = NA_character_, build = NA
    ) %>%
    dplyr::arrange(major, minor, patch) %>%
    dplyr::mutate(vers = factor(vers, levels = vers))
}
    dplyr::add_row(
      vers = "1.3.37",
      rls_date = as.Date("2006-07-27"),
      rls_year = 2006,
      major = 1L, minor = 3L, patch = 37L,
      prerelease = NA_character_, build = NA
    ) %>%
    dplyr::add_row(
      vers = "1.3.41",
      rls_date = as.Date("2009-10-03"),
      rls_year = 2009,
      major = 1L, minor = 3L, patch = 41L,
      prerelease = NA_character_, build = NA
    ) %>%
    dplyr::arrange(rls_date) %>%
    dplyr::mutate(vers = factor(vers, levels=vers)) %>%
    dplyr::select(vers, rls_date, rls_year, major, minor, patch, prerelease, build) -> out

}
\ No newline at end of file

M R/nginx.R => R/nginx.R +44 -40
@@ 10,51 10,55 @@
#' @export
nginx_version_history <- function() {

  nginx_changes_url <-
    "https://raw.githubusercontent.com/nginx/nginx/master/docs/xml/nginx/changes.xml"
  page <- gh::gh("/repos/nginx/nginx/tags")

  doc <- suppressWarnings(xml2::read_xml(nginx_changes_url))
  purrr::map_df(
    page, ~{
      list(
        vers = sub("^release-", "", .x$name),
        rls_date = gh::gh(.x$commit$url)$commit$author$date # kinda dangerous
      )
    }) -> xdf

  dplyr::tibble(
    vers = rvest::xml_nodes(doc, xpath="//changes") %>%
      xml2::xml_attr("ver"),
    ts = rvest::xml_nodes(doc, xpath="//changes") %>%
      xml2::xml_attr("date") %>%
      as.Date(),
    year = lubridate::year(ts)
  ) -> c1
  sgh_next <- purrr::safely(gh::gh_next) # to stop on gh_next() error

  c(
    readr::read_lines("https://nginx.org/en/CHANGES-1.0"),
    readr::read_lines("https://nginx.org/en/CHANGES-1.2"),
    readr::read_lines("https://nginx.org/en/CHANGES-1.4"),
    readr::read_lines("https://nginx.org/en/CHANGES-1.6"),
    readr::read_lines("https://nginx.org/en/CHANGES-1.8"),
    readr::read_lines("https://nginx.org/en/CHANGES-1.10"),
    readr::read_lines("https://nginx.org/en/CHANGES-1.12"),
    readr::read_lines("https://nginx.org/en/CHANGES-1.14")
  ) -> nl
  while(TRUE) {
    page <- sgh_next(page)
    if (is.null(page$result)) break;
    page <- page$result
    dplyr::bind_rows(
      xdf,
      purrr::map_df(
        page, ~{
          list(
            vers = sub("^release-", "", .x$name),
            rls_date = gh::gh(.x$commit$url)$commit$author$date # kinda dangerous
          )
        })
    ) -> xdf
  }

  read.csv(
    stringsAsFactors = FALSE,
    text = paste0(
      c("v,d", nl[grepl("^Changes", nl)] %>%
          gsub("Changes with nginx ", "", .) %>%
          gsub("[[:space:]]{3,}", ",", .)), collapse="\n")
  ) %>%
    dplyr::as_tibble() %>%
    dplyr::mutate(d = lubridate::dmy(d)) %>%
    dplyr::select(vers=1, ts=2) %>%
    dplyr::mutate(year = lubridate::year(ts)) -> c2

  dplyr::bind_rows(c1, c2) %>%
    dplyr::distinct() %>%
    dplyr::bind_cols(
      semver::parse_version(.$vers) %>%
        dplyr::as_tibble()
  dplyr::mutate(xdf, vers = stri_replace_first_fixed(vers, "v", "")) %>%
    dplyr::mutate(rls_date = as.Date(stri_sub(rls_date, 1, 10))) %>%
    dplyr::mutate(rls_year = lubridate::year(rls_date)) %>%
    tidyr::separate(vers, c("major", "minor", "patch", "build"), remove=FALSE) %>%
    dplyr::mutate(prerelease = ifelse(
      stri_detect_regex(build, "[[:alpha:]]"),
      stri_extract_first_regex(build, "[[:alpha:]][[:alnum:]]+"),
      ""
    )) %>%
    dplyr::mutate(build = stri_replace_first_regex(build, "[[:alpha:]][[:alnum:]]+", "")) %>%
    dplyr::mutate_at(.vars=c("major", "minor", "patch", "build"), .funs=c(as.integer)) %>%
    dplyr::bind_rows(
      tibble(
        vers = c("1.2.1"),
        rls_date = as.Date(c("2012-06-07")),
        rls_year = lubridate::year(rls_date),
        major = 1L, minor = 2L, patch = 1L,
        prerelease = NA, build = NA
      )
    ) %>%
    dplyr::arrange(major, minor, patch) %>%
    dplyr::mutate(vers = factor(vers, levels=vers)) %>%
    dplyr::rename(rls_date = ts, rls_year = year)

    dplyr::select(vers, rls_date, rls_year, major, minor, patch, prerelease, build) -> out
}
\ No newline at end of file

M man/apache_httpd_version_history.Rd => man/apache_httpd_version_history.Rd +1 -1
@@ 7,7 7,7 @@
apache_httpd_version_history()
}
\description{
Reads \url{https://archive.apache.org/dist/httpd/} to build a data frame of
Reads \url{https://github.com/apache/httpd} releases to build a data frame of
Apache \code{httpd} version release numbers and dates with semantic version
strings parsed and separate fields added. The data frame is also arranged in
order from lowest version to latest version and the \code{vers} column is an