## ~hrbrmstr/epidata

ref: 47b2d52f37909c0b01ad02edd12decb043a1dfb6 epidata/R/gaps.r -rw-r--r-- 11.6 KiB

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
#' Retreive the percent by which hourly wages of female workers are less than hourly wages of male workers
#'
#' The gender wage gap is the percent by which hourly wages of female workers are less than
#' hourly wages of male workers. It is also often expressed as a wage ratio (women's
#' share of men's wages) by subtracting the gap from 100 percent.
#'
#' \itemize{
#' \item{A median gender wage gap of 17.3 percent means that a typical woman is paid 17.3
#'       percent less per hour than a typical man.}
#' \item{An average gender wage gap of 19.7 percent means that on average women are paid
#'       19.7 percent less per hour than men.}
#' \item{A regression-based gender wage gap of 21.7 percent means that on average women
#'       are paid 21.7 percent less per hour than men, all else held equal (controlling for
#'       gender, race and ethnicity, education, experience, and geographic location).}
#' }
#'
#' @param by \code{NULL} or \code{r} for a parition by race
#' @return \code{tbl_df} with data filtered by the selected criteria.
#' @references \href{https://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @export
#' @examples
#' get_gender_wage_gap()
#'
#' get_gender_wage_gap("r")
get_gender_wage_gap <- function(by=NULL) {

params <- list(subject="wagegap-mf")

if (!is.null(by)) params <- make_params(params, by, c("r"))

res <- epi_query(params)

cols <- stringi::stri_trans_tolower(res$columns$name)
cols <- stringi::stri_replace_all_regex(cols, "[\$$'\$$]", "")
cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
"_")
out <- setNames(as_data_frame(res$data), cols) out <- dplyr::mutate_all(out, "clean_cols") out <- suppressMessages(readr::type_convert(out)) cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>")) message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite)) out } #' Retreive the percent by which hourly wages of black workers are less than hourly wages of white workers #' #' The black-white wage gap is the percent by which hourly wages of black workers are less #' than hourly wages of white workers. It is also often expressed as a wage ratio (black #' workers' share of white workers' wages) by subtracting the gap from 100 percent. #' #' \itemize{ #' \item{A median black-white wage gap of 26.2 percent means that a typical black worker #' is paid 26.2 percent less per hour than a typical white worker.} #' \item{An average black-white wage gap of 26.6 percent means that on average black #' workers are paid 26.6 percent less per hour than white workers.} #' \item{A regression-based black-white wage gap of 15.2 percent means that on average #' black workers are paid 15.2 percent less per hour than white workers, all else #' held equal (controlling for gender, race and ethnicity, education, experience, #' and geographic location).} #' } #' #' @param by \code{NULL} or \code{g} for a parition by gender #' @return \code{tbl_df} with data filtered by the selected criteria. #' @references \href{https://www.epi.org/data/}{Economic Policy Institute Data Library} #' @export #' @examples #' get_black_white_wage_gap() #' #' get_black_white_wage_gap("g") get_black_white_wage_gap <- function(by=NULL) { params <- list(subject="wagegap-bw") if (!is.null(by)) params <- make_params(params, by, c("g")) res <- epi_query(params) cols <- stringi::stri_trans_tolower(res$columns$name) cols <- stringi::stri_replace_all_regex(cols, "[\$$'\$$]", "") cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+% rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+", "_") out <- setNames(as_data_frame(res$data), cols)
out <- dplyr::mutate_all(out, "clean_cols")

cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

out

}

#' Retreive the percent by which hourly wages of Hispanic workers are less than hourly wages of white workers
#'
#' The Hispanic-white wage gap is the percent by which hourly wages of Hispanic workers
#' are less than hourly wages of white workers. It is also often expressed as a wage ratio
#' (Hispanic workers' share of white workers' wages) by subtracting the gap from 100 percent.
#'
#' \itemize{
#' \item{A median Hispanic-white wage gap of 29.6 percent means that a typical Hispanic
#'       worker is paid 29.6 percent less per hour than a typical white worker.}
#' \item{An average Hispanic-white wage gap of 30.1 percent means that on average Hispanic
#'       workers are paid 30.1 percent less per hour than white workers.}
#' \item{A regression-based Hispanic-white wage gap of 11.1 percent means that on average
#'       Hispanic workers are paid 11.1 percent less per hour than white workers, all
#'       else held equal (controlling for gender, race and ethnicity, education,
#'       experience, and geographic location).}
#' }
#'
#' @param by \code{NULL} or \code{g} for a parition by gender
#' @return \code{tbl_df} with data filtered by the selected criteria.
#' @references \href{https://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @export
#' @examples
#' get_hispanic_white_wage_gap()
#'
#' get_hispanic_white_wage_gap("g")
get_hispanic_white_wage_gap <- function(by=NULL) {

params <- list(subject="wagegap-hw")

if (!is.null(by)) params <- make_params(params, by, c("g"))

res <- epi_query(params)

cols <- stringi::stri_trans_tolower(res$columns$name)
cols <- stringi::stri_replace_all_regex(cols, "[\$$'\$$]", "")
cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
"_")
out <- setNames(as_data_frame(res$data), cols) out <- dplyr::mutate_all(out, "clean_cols") out <- suppressMessages(readr::type_convert(out)) cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>")) message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite)) out } #' Retreive the percent by which hourly wages of college graduates exceed those of otherwise #' equivalent high school graduates #' #' A regression-based college wage premium of 56.1 percent means that on average workers #' with a college degree are paid 56.1 percent more per hour than workers whose highest #' education credential is a high school diploma, all else held equal (controlling for #' gender, race and ethnicity, education, experience, and geographic location). #' #' @param by \code{NULL} or \code{g} for a parition by gender #' @return \code{tbl_df} with data filtered by the selected criteria. #' @references \href{https://www.epi.org/data/}{Economic Policy Institute Data Library} #' @export #' @examples #' get_college_wage_premium() #' #' get_college_wage_premium("g") get_college_wage_premium <- function(by=NULL) { params <- list(subject="wagegap-coll") if (!is.null(by)) params <- make_params(params, by, c("g")) res <- epi_query(params) cols <- stringi::stri_trans_tolower(res$columns$name) cols <- stringi::stri_replace_all_regex(cols, "[\$$'\$$]", "") cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+% rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+", "_") out <- setNames(as_data_frame(res$data), cols)
out <- dplyr::mutate_all(out, "clean_cols")

cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))

out

}

#' Retreive the percent by which hourly wages of workers without a high school diploma
#' (or equivalent) are less than wages of otherwise equivalent workers who have graduated
#' from high school
#'
#' A regression-based non-high school wage penalty of 21.8 percent means that on average
#' workers without a high school diploma are paid 21.8 percent less per hour than workers
#' with a high school diploma, all else held equal (controlling for gender, race and
#' ethnicity, education, experience, and geographic location).
#'
#' @param by \code{NULL} or \code{g} for a parition by gender
#' @return \code{tbl_df} with data filtered by the selected criteria.
#' @references \href{https://www.epi.org/data/}{Economic Policy Institute Data Library}
#' @export
#' @examples \dontrun{
#' get_non_high_school_wage_penalty()
#'
#' get_non_high_school_wage_penalty("g")
#' }
get_non_high_school_wage_penalty <- function(by=NULL) {

params <- list(subject="wagegap-hs")

if (!is.null(by)) params <- make_params(params, by, c("g"))

res <- epi_query(params)

cols <- stringi::stri_trans_tolower(res$columns$name)
cols <- stringi::stri_replace_all_regex(cols, "[\$$'\$$]", "")
cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+%
rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+",
"_")
out <- setNames(as_data_frame(res$data), cols) out <- dplyr::mutate_all(out, "clean_cols") out <- suppressMessages(readr::type_convert(out)) cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>")) message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite)) out } #' Retreive the level of inequality within the hourly wage distribution. #' #' The 95–50 and 50–10 wage ratios are representations of the level of inequality within #' the hourly wage distribution. The larger the ratio, the greater the gap between the #' top and the middle or the middle and the bottom of the wage distribution. #' #' \itemize{ #' \item{A 50–10 wage ratio of 1.91 means that workers at the 50th percentile of the wage #' distribution are paid 1.91 times more per hour than the workers at the 10th percentile.} #' \item{A 95–50 wage ratio of 3.28 means that workers at the 95th percentile of the wage #' distribution are paid 3.28 times more per hour than the workers at the 50th percentile.} #' } #' #' @param by \code{NULL} or character string with any combination of \code{g} (Gender) or #' \code{r} (Race), i.e. if you want to retrieve #' unemployment data by gender and race, you would set this parameter to "\code{gr}". #' @return \code{tbl_df} with data filtered by the selected criteria. #' @references \href{https://www.epi.org/data/}{Economic Policy Institute Data Library} #' @export #' @examples \dontrun{ #' get_wage_ratios() #' #' get_wage_ratios("r") #' #' get_wage_ratios("gr") #' } get_wage_ratios <- function(by=NULL) { params <- list(preset="wage-ratios") if (!is.null(by)) { params <- make_params(params, by, c("g", "r")) params <- c(params, list(subject="wage", d="10,50,95,5010,9550,mean")) } res <- epi_query(params) cols <- stringi::stri_trans_tolower(res$columns$name) cols <- stringi::stri_replace_all_regex(cols, "[\$$'\$$]", "") cols <- stringi::stri_replace_all_regex(cols, "[[:space:]" %s+% rawToChar(as.raw(c(0xe2, 0x80, 0x93))) %s+% "-]+", "_") out <- setNames(as_data_frame(res$data), cols)
out <- dplyr::mutate_all(out, "clean_cols")
cite <- html_text(read_html(res$meta$source %||% "<p>Economic Policy Institute</p>"))
message(sprintf('Note: %s\nCitation: "%s"', res$meta$notes %||% "None", cite))