~hrbrmstr/epidata

7e33830169237ff6cb9e871dccbaaa5688fc2573 — Bob Rudis 3 years ago 9536adc
*WIP* ; Added new functions to cover new hidden API data sources; Fixed some broken functions due to hidden API changes.
M DESCRIPTION => DESCRIPTION +1 -1
@@ 29,4 29,4 @@ Imports:
    tidyr,
    readr,
    stringi
RoxygenNote: 5.0.1
RoxygenNote: 6.0.1

M NAMESPACE => NAMESPACE +7 -0
@@ 1,17 1,23 @@
# Generated by roxygen2: do not edit by hand

export(get_annual_wages_and_work_hours)
export(get_black_white_wage_gap)
export(get_college_wage_premium)
export(get_employment_to_population_ratio)
export(get_gender_wage_gap)
export(get_health_insurance_coverage)
export(get_hispanic_white_wage_gap)
export(get_labor_force_participation_rate)
export(get_long_term_unemployment)
export(get_median_and_mean_wages)
export(get_non_high_school_wage_penalty)
export(get_pension_coverage)
export(get_productivity_and_hourly_compensation)
export(get_underemployment)
export(get_unemployment)
export(get_unemployment_by_state)
export(get_union_coverage)
export(get_wage_decomposition)
export(get_wage_ratios)
export(get_wages_by_education)
export(get_wages_by_percentile)


@@ 30,6 36,7 @@ importFrom(readr,type_convert)
importFrom(rvest,html_text)
importFrom(stats,setNames)
importFrom(stringi,"%s+%")
importFrom(stringi,stri_replace_all_fixed)
importFrom(stringi,stri_replace_all_regex)
importFrom(stringi,stri_trans_tolower)
importFrom(tidyr,gather)

M NEWS.md => NEWS.md +5 -1
@@ 1,5 1,9 @@
0.1.0 
0.2.0
* WIP
* Added new `get_` functions for new data sources provided by the EPI
* Fixed issues with different return values for some hidden API calls

0.1.0 
* Passes CRAN checks
* Complete EPI SWADL hidden API coverage
* Initial release

M R/epidata-package.R => R/epidata-package.R +1 -1
@@ 10,7 10,7 @@
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @importFrom purrr map map_df map_chr keep discard %||%
#' @importFrom stringi stri_replace_all_regex stri_trans_tolower %s+%
#' @importFrom stringi stri_replace_all_regex stri_trans_tolower %s+% stri_replace_all_fixed
#' @importFrom dplyr %>% as_data_frame mutate_all
#' @import httr
#' @importFrom readr type_convert

A R/health.r => R/health.r +47 -0
@@ 0,0 1,47 @@
#' Retreive Health Insurance Coverage
#'
#' Employer-sponsored health insurance (ESI) coverage shows the share of workers who
#' received health insurance from their own job for which their employer paid for at
#' least some of their health insurance coverage.
#'
#' Population sample: Private-sector workers age 18–64 & at least 20 hours/week and 26 weeks/year
#'
#' @param by \code{NULL} or character string with any combination of \code{g} (Gender),
#'   \code{r} (Race), \code{e} (Education), \code{d} (Percentile), \code{l} (Entry-level)
#'   i.e. if you want to retrieve unemployment data by gender and race, you would set this
#'   parameter to "\code{gr}".
#' @return \code{tbl_df} with data filtered by the selected criteria.
#' @references \href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @note Data source: CPS ASEC
#' @export
#' @examples
#' get_health_insurance_coverage()
#'
#' get_health_insurance_coverage("r")
#'
#' get_health_insurance_coverage("gr")
get_health_insurance_coverage <- function(by=NULL) {

  params <- list(subject="healthcov")

  if (!is.null(by)) {
    params <- make_params(params, by, c("g", "r", "e", "d", "l"))
  }
  names(params) <- gsub("^l$", "el", names(params))

  res <- epi_query(params)

  cols <- stringi::stri_trans_tolower(res$columns$name)
  cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
                                            rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
                                          "_")
  out <- setNames(as_data_frame(res$data), cols)
  out <- dplyr::mutate_all(out, "clean_cols")
  out <- suppressMessages(readr::type_convert(out))

  cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
  message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

  out

}
\ No newline at end of file

A R/pension.r => R/pension.r +47 -0
@@ 0,0 1,47 @@
#' Retreive Pension Coverage
#'
#' Employer-provided pension coverage shows the share of workers included in an
#' employer-provided plan for which the employer paid for at least some of their pension
#' coverage.
#'
#' Population sample: Private-sector workers age 18–64 & at least 20 hours/week and 26 weeks/year
#'
#' @param by \code{NULL} or character string with any combination of \code{g} (Gender),
#'   \code{r} (Race), \code{e} (Education), \code{d} (Percentile), \code{l} (Entry-level)
#'   i.e. if you want to retrieve pension data by gender and race, you would set this
#'   parameter to "\code{gr}".
#' @return \code{tbl_df} with data filtered by the selected criteria.
#' @references \href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @note Data source: CPS ASEC
#' @export
#' @examples
#' get_health_insurance_coverage()
#'
#' get_health_insurance_coverage("r")
#'
#' get_health_insurance_coverage("gr")
get_pension_coverage <- function(by=NULL) {

  params <- list(subject="pensioncov")

  if (!is.null(by)) {
    params <- make_params(params, by, c("g", "r", "e", "d", "l"))
  }
  names(params) <- gsub("^l$", "el", names(params))

  res <- epi_query(params)

  cols <- stringi::stri_trans_tolower(res$columns$name)
  cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
                                            rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
                                          "_")
  out <- setNames(as_data_frame(res$data), cols)
  out <- dplyr::mutate_all(out, "clean_cols")
  out <- suppressMessages(readr::type_convert(out))

  cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
  message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

  out

}
\ No newline at end of file

A R/productivity.r => R/productivity.r +44 -0
@@ 0,0 1,44 @@
#' Retreive Productivity and hourly compensation
#'
#' Productivity is how much workers produce per hour, or the growth of output of goods and
#' services minus depreciation per hour worked. Compensation is made up of both nonwage
#' payments and wages.
#'
#' Wages are in 2015 dollars. Median compensation is calculated using hourly wage medians
#' from the CPS ORG and compensation from NIPA.
#'
#' Population sample: All workers & Production and nonsupervisory workers
#'
#' @param by \code{NULL} or character string of \code{g} (Gender)
#' @return \code{tbl_df} with data filtered by the selected criteria.
#' @references \href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @note Data source: NIPA (compensation) | BLS Productivity Data
#' @export
#' @examples
#' get_productivity_and_hourly_compensation()
#'
#' get_productivity_and_hourly_compensation("g")
get_productivity_and_hourly_compensation <- function(by=NULL) {

  params <- list(subject="prodpay")

  if (!is.null(by)) {
    params <- make_params(params, by, "g")
  }

  res <- epi_query(params)

  cols <- stringi::stri_trans_tolower(res$columns$name)
  cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
                                            rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
                                          "_")
  out <- setNames(as_data_frame(res$data), cols)
  out <- dplyr::mutate_all(out, "clean_cols")
  out <- suppressMessages(readr::type_convert(out))

  cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
  message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

  out

}

A R/union.r => R/union.r +32 -0
@@ 0,0 1,32 @@
#' Retreive Union Coverage
#'
#' The union coverage rate shows the percentage of the workforce covered by a collective
#' bargaining agreement.
#'
#' @return \code{tbl_df}
#' @references \href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @note Data source: CPS ORG | Hirsch and Macpherson (2003)
#' @export
#' @examples
#' get_union_coverage()
get_union_coverage <- function() {

  params <- list(subject="unioncov")

  res <- epi_query(params)

  cols <- stringi::stri_trans_tolower(res$columns$name)
  cols <- stringi::stri_replace_all_regex(cols, "[\\(\\)]", "")
  cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
                                            rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
                                          "_")
  out <- setNames(as_data_frame(res$data), cols)
  out <- dplyr::mutate_all(out, "clean_cols")
  out <- suppressMessages(readr::type_convert(out))

  cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
  message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

  out

}

M R/utils.r => R/utils.r +2 -0
@@ 12,6 12,8 @@ make_params <- function(params, by, ok="") {

clean_cols <- function(x) {

  x <- stringi::stri_replace_all_fixed(x, ",", "")

  if (any(grepl("%", x))) {
    as.numeric(stringi::stri_replace_all_fixed(x, "%", ""))/100
  } else if (any(grepl("\\$", x))) {

A R/wage_decomposition.r => R/wage_decomposition.r +42 -0
@@ 0,0 1,42 @@
#' Retreive Wage Decomposition
#'
#' Wage inequality data shows the overall wage inequality and the within-group and
#' between-group wage inequality over time. These measures allow an examination of how
#' much of the change in overall wage inequality in particular periods was due to changes
#' in within-group and between-group wage inequality.
#'
#' Population sample: Wage and salary workers age 18–64
#'
#' @param by \code{NULL} or character string of \code{g} (Gender)
#' @return \code{tbl_df} with data filtered by the selected criteria.
#' @references \href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @note Data source: CPS ORG
#' @export
#' @examples
#' get_wages_by_percentile()
#'
#' get_wages_by_percentile("g")
get_wage_decomposition <- function(by=NULL) {

  params <- list(subject="wageineq")

  if (!is.null(by)) {
    params <- make_params(params, by, "g")
  }

  res <- epi_query(params)

  cols <- stringi::stri_trans_tolower(res$columns$name)
  cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
                                            rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
                                          "_")
  out <- setNames(as_data_frame(res$data), cols)
  out <- dplyr::mutate_all(out, "clean_cols")
  out <- suppressMessages(readr::type_convert(out))

  cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
  message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

  out

}

M R/wages.r => R/wages.r +8 -7
@@ 5,9 5,10 @@
#' the arithmetic mean of hourly wages; or, the sum of all workers' hourly wages divided
#' by the number of workers.
#'
#' @param by \code{NULL} or character string with any combination of \code{g} (Gender) or
#'   \code{r} (Race), i.e. if you want to retrieve
#'   unemployment data by gender and race, you would set this parameter to "\code{gr}".
#' @param by \code{NULL} or character string with any combination of \code{g} (Gender),
#'   \code{r} (Race), \code{e} (Education), \code{d} (Percentile), \code{l} (Entry-level)
#'   i.e. if you want to wage data by gender and race, you would set this
#'   parameter to "\code{gr}".
#' @return \code{tbl_df} with data filtered by the selected criteria.
#' @references \href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @export


@@ 19,11 20,11 @@
#' get_median_and_mean_wages("gr")
get_median_and_mean_wages <- function(by=NULL) {

  params <- list(preset="wage-avg")
  params <- list(subject="wage-avg")

  if (!is.null(by)) {
    params <- make_params(params, by, c("g", "r"))
    params <- c(params, list(subject="wage", d="50,mean"))
    params <- make_params(params, by, c("g", "r", "e", "d", "l"))
    names(params) <- gsub("^l$", "el", names(params))
  }

  res <- epi_query(params)


@@ 36,7 37,7 @@ get_median_and_mean_wages <- function(by=NULL) {
  out <- dplyr::mutate_all(out, "clean_cols")
  out <- suppressMessages(readr::type_convert(out))

  cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
  cite <- "Economic Policy Institute"
  message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

  out

A R/wages_and_hours.r => R/wages_and_hours.r +34 -0
@@ 0,0 1,34 @@
#' Retreive CPS ASEC Annual Wages and Work Hours
#'
#' Annual, weekly, and hourly wages and work hours show the average wages and work hours
#' of wage and salary workers using data from the CPS ASEC (also known as the March CPS).
#' Note that this data is not directly comparable to the CPS ORG data in median/average
#' hourly wage.
#'
#' @return \code{tbl_df}
#' @references \href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @note CPS ASEC | Murphy and Welch (1989)
#' @export
#' @examples
#' get_annual_wages_and_work_hours()
get_annual_wages_and_work_hours <- function() {

  params <- list(subject="hours")

  res <- epi_query(params)

  cols <- stringi::stri_trans_tolower(res$columns$name)
  cols <- stringi::stri_replace_all_regex(cols, "[\\(\\)]", "")
  cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
                                            rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
                                          "_")
  out <- setNames(as_data_frame(res$data), cols)
  out <- dplyr::mutate_all(out, "clean_cols")
  out <- suppressMessages(readr::type_convert(out))

  cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
  message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

  out

}

M README.Rmd => README.Rmd +10 -2
@@ 14,6 14,7 @@ time and among demographic groups. Data is usually updated monthly.

The following functions are implemented:

- `get_annual_wages_and_work_hours`: Retreive CPS ASEC Annual Wages and Work Hours
- `get_black_white_wage_gap`:	Retreive the percent by which hourly wages of black workers 
   are less than hourly wages of white workers
- `get_college_wage_premium`:	Retreive the percent by which hourly wages of college graduates 


@@ 22,6 23,7 @@ The following functions are implemented:
   population that is employed
- `get_gender_wage_gap`:	Retreive the percent by which hourly wages of female workers are 
   less than hourly wages of male workers
- `get_health_insurance_coverage`: Retreive Health Insurance Coverage
- `get_hispanic_white_wage_gap`:	Retreive the percent by which hourly wages of Hispanic 
   workers are less than hourly wages of white workers
- `get_labor_force_participation_rate`:	Retreive the share of the civilian noninstitutional


@@ 29,15 31,18 @@ The following functions are implemented:
- `get_long_term_unemployment`:	Retreive the share of the labor force that has been unemployed 
   for six months or longer
- `get_median_and_mean_wages`:	Retreive the hourly wage in the middle of the wage distribution
- `get_pension_coverage`:  Retreive Pension Coverage
- `get_non_high_school_wage_penalty`:	Retreive the percent by which hourly wages of workers 
   without a high school diploma (or equivalent) are less than wages of otherwise equivalent 
   workers who have graduated from high school
- `get_underemployment`:	Retreive the share of the labor force that is "underemployed"
- `get_unemployment`:	Retreive the share of the labor force without a job
- `get_unemployment_by_state`:	Retreive the share of the labor force without a job (by state)
- `get_union_coverage`:  Retreive Union Coverage
- `get_wages_by_education`:	Retreive the average hourly wages of workers disaggregated by the 
   highest level of education attained
- `get_wages_by_percentile`:	Retreive wages at ten distinct points in the wage distribution
- `get_wage_decomposition`:  Retreive Wage Decomposition
- `get_wage_ratios`:	Retreive the level of inequality within the hourly wage distribution.

### Installation


@@ 71,6 76,7 @@ get_median_and_mean_wages("gr")
library(tidyverse)
library(epidata)
library(ggrepel)
library(hrbrthemes)

unemployment <- get_unemployment()
wages <- get_median_and_mean_wages()


@@ 87,6 93,8 @@ group_by(unemployment, date=as.integer(lubridate::year(date))) %>%

cols <- ggthemes::tableau_color_pal()(3)

update_geom_font_defaults(font_rc)

ggplot(df, aes(rate, median)) +
  geom_path(color=cols[1], arrow=arrow(type="closed", length=unit(10, "points"))) +
  geom_point() +


@@ 96,13 104,13 @@ ggplot(df, aes(rate, median)) +
                   color=c(cols[2],
                           rep("#2b2b2b", (nrow(df)-2)),
                           cols[3]),
                   family="Hind Medium") +
                   family=font_rc) +
  scale_x_continuous(name="Unemployment Rate", expand=c(0,0.001), label=scales::percent) +
  scale_y_continuous(name="Median Wage", expand=c(0,0.25), label=scales::dollar) +
  labs(title="U.S. Unemployment Rate vs Median Wage Since 1978",
       subtitle="Wage data is in 2015 USD",
       caption="Source: EPI analysis of Current Population Survey Outgoing Rotation Group microdata") +
  hrbrmisc::theme_hrbrmstr(grid="XY")
  theme_ipsum_rc(grid="XY")
```

### Test Results

M README.md => README.md +52 -44
@@ 7,20 7,25 @@ The [Economic Policy Institute](http://www.epi.org/data/) provides researchers, 

The following functions are implemented:

-   `get_annual_wages_and_work_hours`: Retreive CPS ASEC Annual Wages and Work Hours
-   `get_black_white_wage_gap`: Retreive the percent by which hourly wages of black workers are less than hourly wages of white workers
-   `get_college_wage_premium`: Retreive the percent by which hourly wages of college graduates exceed those of otherwise equivalent high school graduates
-   `get_employment_to_population_ratio`: Retreive the share of the civilian noninstitutional population that is employed
-   `get_gender_wage_gap`: Retreive the percent by which hourly wages of female workers are less than hourly wages of male workers
-   `get_health_insurance_coverage`: Retreive Health Insurance Coverage
-   `get_hispanic_white_wage_gap`: Retreive the percent by which hourly wages of Hispanic workers are less than hourly wages of white workers
-   `get_labor_force_participation_rate`: Retreive the share of the civilian noninstitutional population that is in the labor force
-   `get_long_term_unemployment`: Retreive the share of the labor force that has been unemployed for six months or longer
-   `get_median_and_mean_wages`: Retreive the hourly wage in the middle of the wage distribution
-   `get_pension_coverage`: Retreive Pension Coverage
-   `get_non_high_school_wage_penalty`: Retreive the percent by which hourly wages of workers without a high school diploma (or equivalent) are less than wages of otherwise equivalent workers who have graduated from high school
-   `get_underemployment`: Retreive the share of the labor force that is "underemployed"
-   `get_unemployment`: Retreive the share of the labor force without a job
-   `get_unemployment_by_state`: Retreive the share of the labor force without a job (by state)
-   `get_union_coverage`: Retreive Union Coverage
-   `get_wages_by_education`: Retreive the average hourly wages of workers disaggregated by the highest level of education attained
-   `get_wages_by_percentile`: Retreive wages at ten distinct points in the wage distribution
-   `get_wage_decomposition`: Retreive Wage Decomposition
-   `get_wage_ratios`: Retreive the level of inequality within the hourly wage distribution.

### Installation


@@ 44,58 49,58 @@ packageVersion("epidata")
get_black_white_wage_gap()
```

    ## # A tibble: 43 × 8
    ## # A tibble: 44 x 8
    ##     date white_median white_average black_median black_average gap_median gap_average gap_regression_based
    ##    <int>        <dbl>         <dbl>        <dbl>         <dbl>      <dbl>       <dbl>                <dbl>
    ## 1   1973        17.19         19.68        13.50         15.43      0.215       0.216                0.124
    ## 2   1974        16.73         19.22        13.34         15.19      0.203       0.210                0.105
    ## 3   1975        16.54         19.23        13.43         15.14      0.188       0.213                0.107
    ## 4   1976        16.73         19.39        13.45         15.74      0.196       0.188                0.089
    ## 5   1977        16.71         19.33        13.41         15.51      0.198       0.197                0.095
    ## 6   1978        16.72         19.43        13.38         15.72      0.200       0.191                0.092
    ## 7   1979        16.88         19.64        13.85         16.09      0.180       0.181                0.090
    ## 8   1980        16.58         19.23        13.50         15.73      0.185       0.182                0.092
    ## 9   1981        16.22         19.10        13.33         15.64      0.178       0.181                0.087
    ## 10  1982        16.47         19.26        13.17         15.45      0.200       0.198                0.103
    ## # ... with 33 more rows
    ##  1  1973        17.41         19.93        13.67         15.63      0.215       0.216                0.120
    ##  2  1974        16.94         19.46        13.51         15.38      0.203       0.210                0.107
    ##  3  1975        16.75         19.47        13.60         15.33      0.188       0.213                0.105
    ##  4  1976        16.94         19.63        13.62         15.94      0.196       0.188                0.089
    ##  5  1977        16.93         19.57        13.58         15.71      0.198       0.197                0.094
    ##  6  1978        16.93         19.68        13.55         15.92      0.200       0.191                0.092
    ##  7  1979        17.10         19.89        14.02         16.29      0.180       0.181                0.090
    ##  8  1980        16.79         19.47        13.67         15.93      0.185       0.182                0.092
    ##  9  1981        16.42         19.34        13.50         15.84      0.178       0.181                0.087
    ## 10  1982        16.68         19.51        13.34         15.65      0.200       0.198                0.103
    ## # ... with 34 more rows

``` r
get_underemployment()
```

    ## # A tibble: 416 × 2
    ## # A tibble: 325 x 2
    ##          date   all
    ##        <date> <dbl>
    ## 1  1989-12-01 0.093
    ## 2  1990-01-01 0.093
    ## 3  1990-02-01 0.093
    ## 4  1990-03-01 0.094
    ## 5  1990-04-01 0.094
    ## 6  1990-05-01 0.094
    ## 7  1990-06-01 0.094
    ## 8  1990-07-01 0.094
    ## 9  1990-08-01 0.095
    ##  1 1989-12-01 0.093
    ##  2 1990-01-01 0.093
    ##  3 1990-02-01 0.093
    ##  4 1990-03-01 0.094
    ##  5 1990-04-01 0.094
    ##  6 1990-05-01 0.094
    ##  7 1990-06-01 0.094
    ##  8 1990-07-01 0.094
    ##  9 1990-08-01 0.095
    ## 10 1990-09-01 0.095
    ## # ... with 406 more rows
    ## # ... with 315 more rows

``` r
get_median_and_mean_wages("gr")
```

    ## # A tibble: 43 × 25
    ## # A tibble: 44 x 25
    ##     date median average men_median men_average women_median women_average white_median white_average black_median
    ##    <int>  <dbl>   <dbl>      <dbl>       <dbl>        <dbl>         <dbl>        <dbl>         <dbl>        <dbl>
    ## 1   1973  16.53   19.05      19.89       22.32        12.47         14.30        17.19         19.68        13.50
    ## 2   1974  16.17   18.67      19.63       21.89        12.39         14.04        16.73         19.22        13.34
    ## 3   1975  16.05   18.64      19.76       21.81        12.43         14.14        16.54         19.23        13.43
    ## 4   1976  16.15   18.87      19.41       22.05        12.56         14.53        16.73         19.39        13.45
    ## 5   1977  16.07   18.77      19.84       22.05        12.51         14.36        16.71         19.33        13.41
    ## 6   1978  16.36   18.83      20.04       22.18        12.56         14.44        16.72         19.43        13.38
    ## 7   1979  16.15   19.06      20.30       22.46        12.66         14.63        16.88         19.64        13.85
    ## 8   1980  16.07   18.66      19.98       22.00        12.60         14.46        16.58         19.23        13.50
    ## 9   1981  15.66   18.52      19.52       21.82        12.53         14.44        16.22         19.10        13.33
    ## 10  1982  15.75   18.65      19.30       21.96        12.61         14.68        16.47         19.26        13.17
    ## # ... with 33 more rows, and 15 more variables: black_average <dbl>, hispanic_median <dbl>, hispanic_average <dbl>,
    ##  1  1973  16.74   19.30      20.14       22.60        12.63         14.48        17.41         19.93        13.67
    ##  2  1974  16.37   18.91      19.88       22.17        12.54         14.22        16.94         19.46        13.51
    ##  3  1975  16.26   18.87      20.01       22.09        12.59         14.32        16.75         19.47        13.60
    ##  4  1976  16.36   19.11      19.65       22.33        12.72         14.71        16.94         19.63        13.62
    ##  5  1977  16.28   19.00      20.09       22.33        12.66         14.54        16.93         19.57        13.58
    ##  6  1978  16.57   19.07      20.29       22.46        12.72         14.62        16.93         19.68        13.55
    ##  7  1979  16.36   19.30      20.55       22.75        12.82         14.82        17.10         19.89        14.02
    ##  8  1980  16.28   18.89      20.24       22.28        12.76         14.65        16.79         19.47        13.67
    ##  9  1981  15.85   18.75      19.77       22.09        12.69         14.62        16.42         19.34        13.50
    ## 10  1982  15.95   18.89      19.54       22.24        12.76         14.87        16.68         19.51        13.34
    ## # ... with 34 more rows, and 15 more variables: black_average <dbl>, hispanic_median <dbl>, hispanic_average <dbl>,
    ## #   white_men_median <dbl>, white_men_average <dbl>, black_men_median <dbl>, black_men_average <dbl>,
    ## #   hispanic_men_median <dbl>, hispanic_men_average <dbl>, white_women_median <dbl>, white_women_average <dbl>,
    ## #   black_women_median <dbl>, black_women_average <dbl>, hispanic_women_median <dbl>, hispanic_women_average <dbl>


@@ 106,6 111,7 @@ get_median_and_mean_wages("gr")
library(tidyverse)
library(epidata)
library(ggrepel)
library(hrbrthemes)

unemployment <- get_unemployment()
wages <- get_median_and_mean_wages()


@@ 113,17 119,17 @@ wages <- get_median_and_mean_wages()
glimpse(wages)
```

    ## Observations: 43
    ## Observations: 44
    ## Variables: 3
    ## $ date    <int> 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 198...
    ## $ median  <dbl> 16.53, 16.17, 16.05, 16.15, 16.07, 16.36, 16.15, 16.07, 15.66, 15.75, 15.71, 15.71, 15.80, 16.27, 1...
    ## $ average <dbl> 19.05, 18.67, 18.64, 18.87, 18.77, 18.83, 19.06, 18.66, 18.52, 18.65, 18.67, 18.75, 18.96, 19.41, 1...
    ## $ median  <dbl> 16.74, 16.37, 16.26, 16.36, 16.28, 16.57, 16.36, 16.28, 15.85, 15.95, 15.91, 15.91, 16.00, 16.47, 1...
    ## $ average <dbl> 19.30, 18.91, 18.87, 19.11, 19.00, 19.07, 19.30, 18.89, 18.75, 18.89, 18.91, 18.99, 19.20, 19.66, 1...

``` r
glimpse(unemployment)
```

    ## Observations: 456
    ## Observations: 457
    ## Variables: 2
    ## $ date <date> 1978-12-01, 1979-01-01, 1979-02-01, 1979-03-01, 1979-04-01, 1979-05-01, 1979-06-01, 1979-07-01, 1979-...
    ## $ all  <dbl> 0.061, 0.061, 0.060, 0.060, 0.059, 0.059, 0.059, 0.058, 0.058, 0.058, 0.059, 0.059, 0.059, 0.059, 0.05...


@@ 137,6 143,8 @@ group_by(unemployment, date=as.integer(lubridate::year(date))) %>%

cols <- ggthemes::tableau_color_pal()(3)

update_geom_font_defaults(font_rc)

ggplot(df, aes(rate, median)) +
  geom_path(color=cols[1], arrow=arrow(type="closed", length=unit(10, "points"))) +
  geom_point() +


@@ 146,16 154,16 @@ ggplot(df, aes(rate, median)) +
                   color=c(cols[2],
                           rep("#2b2b2b", (nrow(df)-2)),
                           cols[3]),
                   family="Hind Medium") +
                   family=font_rc) +
  scale_x_continuous(name="Unemployment Rate", expand=c(0,0.001), label=scales::percent) +
  scale_y_continuous(name="Median Wage", expand=c(0,0.25), label=scales::dollar) +
  labs(title="U.S. Unemployment Rate vs Median Wage Since 1978",
       subtitle="Wage data is in 2015 USD",
       caption="Source: EPI analysis of Current Population Survey Outgoing Rotation Group microdata") +
  hrbrmisc::theme_hrbrmstr(grid="XY")
  theme_ipsum_rc(grid="XY")
```

<img src="README_files/figure-markdown_github/unnamed-chunk-4-1.png" width="960" />
<img src="README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-4-1.png" width="960" />

### Test Results



@@ 166,13 174,13 @@ library(testthat)
date()
```

    ## [1] "Thu Jan  5 13:37:48 2017"
    ## [1] "Tue Aug  1 18:06:05 2017"

``` r
test_dir("tests/")
```

    ## testthat results ========================================================================================================
    ## OK: 15 SKIPPED: 0 FAILED: 0
    ## OK: 21 SKIPPED: 0 FAILED: 0
    ## 
    ## DONE ===================================================================================================================

A README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-4-1.png => README_files/figure-markdown_github-ascii_identifiers/unnamed-chunk-4-1.png +0 -0

M man/epidata.Rd => man/epidata.Rd +0 -1
@@ 15,4 15,3 @@ demographic groups. Data is usually updated monthly.
\author{
Bob Rudis (bob@rud.is)
}


A man/get_annual_wages_and_work_hours.Rd => man/get_annual_wages_and_work_hours.Rd +26 -0
@@ 0,0 1,26 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/wages_and_hours.r
\name{get_annual_wages_and_work_hours}
\alias{get_annual_wages_and_work_hours}
\title{Retreive CPS ASEC Annual Wages and Work Hours}
\usage{
get_annual_wages_and_work_hours()
}
\value{
\code{tbl_df}
}
\description{
Annual, weekly, and hourly wages and work hours show the average wages and work hours
of wage and salary workers using data from the CPS ASEC (also known as the March CPS).
Note that this data is not directly comparable to the CPS ORG data in median/average
hourly wage.
}
\note{
CPS ASEC | Murphy and Welch (1989)
}
\examples{
get_annual_wages_and_work_hours()
}
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}

M man/get_black_white_wage_gap.Rd => man/get_black_white_wage_gap.Rd +0 -1
@@ 37,4 37,3 @@ get_black_white_wage_gap("g")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M man/get_college_wage_premium.Rd => man/get_college_wage_premium.Rd +0 -1
@@ 27,4 27,3 @@ get_college_wage_premium("g")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M man/get_employment_to_population_ratio.Rd => man/get_employment_to_population_ratio.Rd +0 -1
@@ 27,4 27,3 @@ get_employment_to_population_ratio("grae")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M man/get_gender_wage_gap.Rd => man/get_gender_wage_gap.Rd +0 -1
@@ 36,4 36,3 @@ get_gender_wage_gap("r")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


A man/get_health_insurance_coverage.Rd => man/get_health_insurance_coverage.Rd +38 -0
@@ 0,0 1,38 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/health.r
\name{get_health_insurance_coverage}
\alias{get_health_insurance_coverage}
\title{Retreive Health Insurance Coverage}
\usage{
get_health_insurance_coverage(by = NULL)
}
\arguments{
\item{by}{\code{NULL} or character string with any combination of \code{g} (Gender),
\code{r} (Race), \code{e} (Education), \code{d} (Percentile), \code{l} (Entry-level)
i.e. if you want to retrieve unemployment data by gender and race, you would set this
parameter to "\code{gr}".}
}
\value{
\code{tbl_df} with data filtered by the selected criteria.
}
\description{
Employer-sponsored health insurance (ESI) coverage shows the share of workers who
received health insurance from their own job for which their employer paid for at
least some of their health insurance coverage.
}
\details{
Population sample: Private-sector workers age 18–64 & at least 20 hours/week and 26 weeks/year
}
\note{
Data source: CPS ASEC
}
\examples{
get_health_insurance_coverage()

get_health_insurance_coverage("r")

get_health_insurance_coverage("gr")
}
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}

M man/get_hispanic_white_wage_gap.Rd => man/get_hispanic_white_wage_gap.Rd +0 -1
@@ 37,4 37,3 @@ get_hispanic_white_wage_gap("g")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M man/get_labor_force_participation_rate.Rd => man/get_labor_force_participation_rate.Rd +0 -1
@@ 27,4 27,3 @@ get_labor_force_participation_rate("grae")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M man/get_long_term_unemployment.Rd => man/get_long_term_unemployment.Rd +0 -1
@@ 27,4 27,3 @@ get_long_term_unemployment("grae")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M man/get_median_and_mean_wages.Rd => man/get_median_and_mean_wages.Rd +4 -4
@@ 7,9 7,10 @@
get_median_and_mean_wages(by = NULL)
}
\arguments{
\item{by}{\code{NULL} or character string with any combination of \code{g} (Gender) or
\code{r} (Race), i.e. if you want to retrieve
unemployment data by gender and race, you would set this parameter to "\code{gr}".}
\item{by}{\code{NULL} or character string with any combination of \code{g} (Gender),
\code{r} (Race), \code{e} (Education), \code{d} (Percentile), \code{l} (Entry-level)
i.e. if you want to wage data by gender and race, you would set this
parameter to "\code{gr}".}
}
\value{
\code{tbl_df} with data filtered by the selected criteria.


@@ 30,4 31,3 @@ get_median_and_mean_wages("gr")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M man/get_non_high_school_wage_penalty.Rd => man/get_non_high_school_wage_penalty.Rd +0 -1
@@ 30,4 30,3 @@ get_non_high_school_wage_penalty("g")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


A man/get_pension_coverage.Rd => man/get_pension_coverage.Rd +38 -0
@@ 0,0 1,38 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pension.r
\name{get_pension_coverage}
\alias{get_pension_coverage}
\title{Retreive Pension Coverage}
\usage{
get_pension_coverage(by = NULL)
}
\arguments{
\item{by}{\code{NULL} or character string with any combination of \code{g} (Gender),
\code{r} (Race), \code{e} (Education), \code{d} (Percentile), \code{l} (Entry-level)
i.e. if you want to retrieve pension data by gender and race, you would set this
parameter to "\code{gr}".}
}
\value{
\code{tbl_df} with data filtered by the selected criteria.
}
\description{
Employer-provided pension coverage shows the share of workers included in an
employer-provided plan for which the employer paid for at least some of their pension
coverage.
}
\details{
Population sample: Private-sector workers age 18–64 & at least 20 hours/week and 26 weeks/year
}
\note{
Data source: CPS ASEC
}
\examples{
get_health_insurance_coverage()

get_health_insurance_coverage("r")

get_health_insurance_coverage("gr")
}
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}

A man/get_productivity_and_hourly_compensation.Rd => man/get_productivity_and_hourly_compensation.Rd +36 -0
@@ 0,0 1,36 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/productivity.r
\name{get_productivity_and_hourly_compensation}
\alias{get_productivity_and_hourly_compensation}
\title{Retreive Productivity and hourly compensation}
\usage{
get_productivity_and_hourly_compensation(by = NULL)
}
\arguments{
\item{by}{\code{NULL} or character string of \code{g} (Gender)}
}
\value{
\code{tbl_df} with data filtered by the selected criteria.
}
\description{
Productivity is how much workers produce per hour, or the growth of output of goods and
services minus depreciation per hour worked. Compensation is made up of both nonwage
payments and wages.
}
\details{
Wages are in 2015 dollars. Median compensation is calculated using hourly wage medians
from the CPS ORG and compensation from NIPA.

Population sample: All workers & Production and nonsupervisory workers
}
\note{
Data source: NIPA (compensation) | BLS Productivity Data
}
\examples{
get_productivity_and_hourly_compensation()

get_productivity_and_hourly_compensation("g")
}
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}

M man/get_underemployment.Rd => man/get_underemployment.Rd +0 -1
@@ 31,4 31,3 @@ get_underemployment("grae")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M man/get_unemployment.Rd => man/get_unemployment.Rd +0 -1
@@ 30,4 30,3 @@ get_unemployment("grae")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M man/get_unemployment_by_state.Rd => man/get_unemployment_by_state.Rd +0 -1
@@ 26,4 26,3 @@ get_unemployment_by_state("r")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


A man/get_union_coverage.Rd => man/get_union_coverage.Rd +24 -0
@@ 0,0 1,24 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/union.r
\name{get_union_coverage}
\alias{get_union_coverage}
\title{Retreive Union Coverage}
\usage{
get_union_coverage()
}
\value{
\code{tbl_df}
}
\description{
The union coverage rate shows the percentage of the workforce covered by a collective
bargaining agreement.
}
\note{
Data source: CPS ORG | Hirsch and Macpherson (2003)
}
\examples{
get_union_coverage()
}
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}

A man/get_wage_decomposition.Rd => man/get_wage_decomposition.Rd +34 -0
@@ 0,0 1,34 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/wage_decomposition.r
\name{get_wage_decomposition}
\alias{get_wage_decomposition}
\title{Retreive Wage Decomposition}
\usage{
get_wage_decomposition(by = NULL)
}
\arguments{
\item{by}{\code{NULL} or character string of \code{g} (Gender)}
}
\value{
\code{tbl_df} with data filtered by the selected criteria.
}
\description{
Wage inequality data shows the overall wage inequality and the within-group and
between-group wage inequality over time. These measures allow an examination of how
much of the change in overall wage inequality in particular periods was due to changes
in within-group and between-group wage inequality.
}
\details{
Population sample: Wage and salary workers age 18–64
}
\note{
Data source: CPS ORG
}
\examples{
get_wages_by_percentile()

get_wages_by_percentile("g")
}
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}

M man/get_wage_ratios.Rd => man/get_wage_ratios.Rd +0 -1
@@ 39,4 39,3 @@ get_wage_ratios("gr")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M man/get_wages_by_education.Rd => man/get_wages_by_education.Rd +0 -1
@@ 30,4 30,3 @@ get_wages_by_education("gr")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M man/get_wages_by_percentile.Rd => man/get_wages_by_percentile.Rd +0 -1
@@ 29,4 29,3 @@ get_wages_by_percentile("gr")
\references{
\href{http://www.epi.org/data/}{Economic Policy Institute Data Library}
}


M tests/testthat/test-epidata.R => tests/testthat/test-epidata.R +7 -1
@@ 3,20 3,26 @@ test_that("we can do something", {

  testthat::skip_on_cran()

  expect_that(get_annual_wages_and_work_hours(), is_a("data.frame"))
  expect_that(get_black_white_wage_gap(), is_a("data.frame"))
  expect_that(get_college_wage_premium(), is_a("data.frame"))
  expect_that(get_employment_to_population_ratio(), is_a("data.frame"))
  expect_that(get_gender_wage_gap(), is_a("data.frame"))
  expect_that(get_health_insurance_coverage(), is_a("data.frame"))
  expect_that(get_hispanic_white_wage_gap(), is_a("data.frame"))
  expect_that(get_labor_force_participation_rate(), is_a("data.frame"))
  expect_that(get_long_term_unemployment(), is_a("data.frame"))
  expect_that(get_median_and_mean_wages(), is_a("data.frame"))
  expect_that(get_non_high_school_wage_penalty(), is_a("data.frame"))
  expect_that(get_pension_coverage(), is_a("data.frame"))
  expect_that(get_productivity_and_hourly_compensation(), is_a("data.frame"))
  expect_that(get_underemployment(), is_a("data.frame"))
  expect_that(get_unemployment(), is_a("data.frame"))
  expect_that(get_unemployment_by_state(), is_a("data.frame"))
  expect_that(get_wage_ratios(), is_a("data.frame"))
  expect_that(get_union_coverage(), is_a("data.frame"))
  expect_that(get_wages_by_education(), is_a("data.frame"))
  expect_that(get_wages_by_percentile(), is_a("data.frame"))
  expect_that(get_wage_decomposition(), is_a("data.frame"))
  expect_that(get_wage_ratios(), is_a("data.frame"))

})