~hrbrmstr/epidata

ref: e12c13d229ba5f2ca9a4300ed38971676e68b20f epidata/R/wages.r -rw-r--r-- 5.1 KiB
e12c13d2hrbrmstr fixed CRAN checks; added 4 new functions; added UA 1 year, 7 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#' Retreive the hourly wage in the middle of the wage distribution
#'
#' The median wage is the hourly wage in the middle of the wage distribution;
#' 50 percent of wage earners earn less and 50 percent earn more. The average wage is
#' the arithmetic mean of hourly wages; or, the sum of all workers' hourly wages divided
#' by the number of workers.
#'
#' @param by \code{NULL} or character string with any combination of \code{g} (Gender),
#'   \code{r} (Race), \code{e} (Education), \code{d} (Percentile), \code{l} (Entry-level)
#'   i.e. if you want to wage data by gender and race, you would set this
#'   parameter to "\code{gr}".
#' @return \code{tbl_df} with data filtered by the selected criteria.
#' @references \href{https://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @export
#' @examples
#' get_median_and_mean_wages()
#'
#' get_median_and_mean_wages("r")
#'
#' get_median_and_mean_wages("gr")
get_median_and_mean_wages <- function(by=NULL) {

  params <- list(subject="wage-avg")

  if (!is.null(by)) {
    params <- make_params(params, by, c("g", "r", "e", "d", "l"))
    names(params) <- gsub("^l$", "el", names(params))
  }

  res <- epi_query(params)

  cols <- stringi::stri_trans_tolower(res$columns$name)
  cols <- stringi::stri_replace_all_regex(cols, "[\\('\\)]", "")
  cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
                                            rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
                                          "_")
  out <- setNames(as_data_frame(res$data), cols)
  out <- dplyr::mutate_all(out, "clean_cols")
  out <- suppressMessages(readr::type_convert(out))

  cite <- "Economic Policy Institute"
  message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

  out

}

#' Retreive the average hourly wages of workers disaggregated by the highest level of education attained
#'
#' Wages by education are the average hourly wages of workers disaggregated by the highest
#' level of education attained. Employment shares provide the distribution of educational
#' attainment for workers of each gender, racial, and ethnic group as a share of total
#' employed for each group.
#'
#' @param by \code{NULL} or character string with any combination of \code{g} (Gender) or
#'   \code{r} (Race), i.e. if you want to retrieve
#'   unemployment data by gender and race, you would set this parameter to "\code{gr}".
#' @return \code{tbl_df} with data filtered by the selected criteria.
#' @references \href{https://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @export
#' @examples
#' get_wages_by_education()
#'
#' get_wages_by_education("r")
#'
#' get_wages_by_education("gr")
get_wages_by_education <- function(by=NULL) {

  params <- list(preset="wage-education")

  if (!is.null(by)) {
    params <- make_params(params, by, c("g", "r"))
    params <- c(params, list(subject="wage", e="*", d="50,mean", m="share"))
  }

  res <- epi_query(params)

  cols <- stringi::stri_trans_tolower(res$columns$name)
  cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
                                            rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
                                          "_")
  out <- setNames(as_data_frame(res$data), cols)
  out <- dplyr::mutate_all(out, "clean_cols")
  out <- suppressMessages(readr::type_convert(out))

  cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
  message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

  out

}

#' Retreive wages at ten distinct points in the wage distribution
#'
#' Wage percentiles are wages at ten distinct points in the wage distribution: deciles
#' and the 95th percentile. The 95–50 and 50–10 wage ratios show how much greater wages
#' are at the top than the middle, and at the middle than the bottom, respectively.
#'
#' @param by \code{NULL} or character string with any combination of \code{g} (Gender) or
#'   \code{r} (Race), i.e. if you want to retrieve
#'   unemployment data by gender and race, you would set this parameter to "\code{gr}".
#' @return \code{tbl_df} with data filtered by the selected criteria.
#' @references \href{https://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @export
#' @examples
#' get_wages_by_percentile()
#'
#' get_wages_by_percentile("r")
#'
#' get_wages_by_percentile("gr")
get_wages_by_percentile <- function(by=NULL) {

  params <- list(preset="wage-percentiles")

  if (!is.null(by)) {
    params <- make_params(params, by, c("g", "r"))
    params <- c(params, list(subject="wage", d="*"))
  }

  res <- epi_query(params)

  cols <- stringi::stri_trans_tolower(res$columns$name)
  cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
                                            rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
                                          "_")
  out <- setNames(as_data_frame(res$data), cols)
  out <- dplyr::mutate_all(out, "clean_cols")
  out <- suppressMessages(readr::type_convert(out))

  cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
  message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

  out

}