library(tidyverse)
library(ggplot2)
library(readxl)
::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE) knitr
Challenge 6 Instructions
Challenge Overview
Today’s challenge is to:
- read in a data set, and describe the data set using both words and any supporting information (e.g., tables, etc)
- tidy data (as needed, including sanity checks)
- mutate variables as needed (including sanity checks)
- create at least one graph including time (evolution)
- try to make them “publication” ready (optional)
- Explain why you choose the specific graph type
- Create at least one graph depicting part-whole or flow relationships
- try to make them “publication” ready (optional)
- Explain why you choose the specific graph type
###Read In Data
<- read_excel("~/GIT/601_Fall_2022/posts/_data/debt_in_trillions.xlsx") debt
Error: `path` does not exist: '~/GIT/601_Fall_2022/posts/_data/debt_in_trillions.xlsx'
#print debt dataset to confirm successful import
print(debt)
Error in print(debt): object 'debt' not found
###Data Summary
#get debt dataset dimension
dim(debt)
Error in eval(expr, envir, enclos): object 'debt' not found
#get column names
colnames(debt)
Error in is.data.frame(x): object 'debt' not found
#table of 'year and quater' column to get range of years data was collected
table(select(debt, 'Year and Quarter'))
Error in select(debt, "Year and Quarter"): object 'debt' not found
The debt_in_trillions dataset is composed of 8 variables (columns) and 74 rows. The variables consist of:
“Year and Quarter”: this is a nominal qualitative variable of Years spanning from 2003 to 2021, with each year further split into quarters
“Mortgage”: This is a continuous quantitative variable, measured in of trillions of dollars, likely for home mortgage debt
“HE Revolving”: This is a continuous quantitative variable, measured in of trillions of dollars, likely for revolving loan debt
“Auto Loan”: This is a continuous quantitative variable, measured in of trillions of dollars, likely for automotive debt
“Credit Card”: This is a continuous quantitative variable, measured in of trillions of dollars, likely for credit card debt
“Student Loan”: This is a continuous quantitative variable, measured in of trillions of dollars, likely for student loan debt
“Other”: This is a continuous quantitative variable, measured in of trillions of dollars, likely as a catch-all category to account for miscellaneous sources of debts that are still somewhat common, if less so than the other types of loan debts
“Total”: This is a continuous quantitative variable, measured in of trillions of dollars, quantifying the total accrued debt across that particular year-quarter
For this dataset it may be surmised that this it is a catalog of commonly accrued debts, measured each annual quarter for a series of years, by citizens in a nation that has a dollar-based currency system. This does narrow down the potential countries, though it may be the United States or another nation that does provide reduced or no-cost higher education, due to the presence and amount of student loan debt accrued quarterly.
##Tidying/Wrangling Data
<- rename(debt, "Year_and_Quarter" = 'Year and Quarter', "HE_Revolving" = 'HE Revolving', "Auto_Loan" = 'Auto Loan', "Credit_Card" = 'Credit Card', "Student_Loan" = 'Student Loan') debt2
Error in rename(debt, Year_and_Quarter = "Year and Quarter", HE_Revolving = "HE Revolving", : object 'debt' not found
#average data per quarter into annual averages, can filter by "03:"... to isolate quarterly values by year
#Do this for one column, then can copy and edit code for other columns
#filter with grepl to get quarterly values for each year
<- dplyr::filter(debt2, grepl("03:Q", Year_and_Quarter)) values_2003
Error in dplyr::filter(debt2, grepl("03:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("04:Q", Year_and_Quarter)) values_2004
Error in dplyr::filter(debt2, grepl("04:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("05:Q", Year_and_Quarter)) values_2005
Error in dplyr::filter(debt2, grepl("05:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("06:Q", Year_and_Quarter)) values_2006
Error in dplyr::filter(debt2, grepl("06:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("07:Q", Year_and_Quarter)) values_2007
Error in dplyr::filter(debt2, grepl("07:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("08:Q", Year_and_Quarter)) values_2008
Error in dplyr::filter(debt2, grepl("08:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("09:Q", Year_and_Quarter)) values_2009
Error in dplyr::filter(debt2, grepl("09:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("10:Q", Year_and_Quarter)) values_2010
Error in dplyr::filter(debt2, grepl("10:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("11:Q", Year_and_Quarter)) values_2011
Error in dplyr::filter(debt2, grepl("11:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("12:Q", Year_and_Quarter)) values_2012
Error in dplyr::filter(debt2, grepl("12:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("13:Q", Year_and_Quarter)) values_2013
Error in dplyr::filter(debt2, grepl("13:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("14:Q", Year_and_Quarter)) values_2014
Error in dplyr::filter(debt2, grepl("14:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("15:Q", Year_and_Quarter)) values_2015
Error in dplyr::filter(debt2, grepl("15:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("16:Q", Year_and_Quarter)) values_2016
Error in dplyr::filter(debt2, grepl("16:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("17:Q", Year_and_Quarter)) values_2017
Error in dplyr::filter(debt2, grepl("17:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("18:Q", Year_and_Quarter)) values_2018
Error in dplyr::filter(debt2, grepl("18:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("19:Q", Year_and_Quarter)) values_2019
Error in dplyr::filter(debt2, grepl("19:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("20:Q", Year_and_Quarter)) values_2020
Error in dplyr::filter(debt2, grepl("20:Q", Year_and_Quarter)): object 'debt2' not found
<- dplyr::filter(debt2, grepl("21:Q", Year_and_Quarter)) values_2021
Error in dplyr::filter(debt2, grepl("21:Q", Year_and_Quarter)): object 'debt2' not found
#get means of quarterly values for all years
<- summarize_all(select(values_2003, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2003
Error in select(values_2003, Mortgage:Total): object 'values_2003' not found
<- summarize_all(select(values_2004, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2004
Error in select(values_2004, Mortgage:Total): object 'values_2004' not found
<- summarize_all(select(values_2005, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2005
Error in select(values_2005, Mortgage:Total): object 'values_2005' not found
<- summarize_all(select(values_2006, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2006
Error in select(values_2006, Mortgage:Total): object 'values_2006' not found
<- summarize_all(select(values_2007, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2007
Error in select(values_2007, Mortgage:Total): object 'values_2007' not found
<- summarize_all(select(values_2008, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2008
Error in select(values_2008, Mortgage:Total): object 'values_2008' not found
<- summarize_all(select(values_2009, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2009
Error in select(values_2009, Mortgage:Total): object 'values_2009' not found
<- summarize_all(select(values_2010, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2010
Error in select(values_2010, Mortgage:Total): object 'values_2010' not found
<- summarize_all(select(values_2011, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2011
Error in select(values_2011, Mortgage:Total): object 'values_2011' not found
<- summarize_all(select(values_2012, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2012
Error in select(values_2012, Mortgage:Total): object 'values_2012' not found
<- summarize_all(select(values_2013, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2013
Error in select(values_2013, Mortgage:Total): object 'values_2013' not found
<- summarize_all(select(values_2014, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2014
Error in select(values_2014, Mortgage:Total): object 'values_2014' not found
<- summarize_all(select(values_2015, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2015
Error in select(values_2015, Mortgage:Total): object 'values_2015' not found
<- summarize_all(select(values_2016, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2016
Error in select(values_2016, Mortgage:Total): object 'values_2016' not found
<- summarize_all(select(values_2017, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2017
Error in select(values_2017, Mortgage:Total): object 'values_2017' not found
<- summarize_all(select(values_2018, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2018
Error in select(values_2018, Mortgage:Total): object 'values_2018' not found
<- summarize_all(select(values_2019, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2019
Error in select(values_2019, Mortgage:Total): object 'values_2019' not found
<- summarize_all(select(values_2020, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2020
Error in select(values_2020, Mortgage:Total): object 'values_2020' not found
<- summarize_all(select(values_2021, Mortgage:Total), mean, na.rm = TRUE) annual_avgs_2021
Error in select(values_2021, Mortgage:Total): object 'values_2021' not found
#check work
print(annual_avgs_2003)
Error in print(annual_avgs_2003): object 'annual_avgs_2003' not found
#vertically merge annual_avgs_2003:21 data sets with rbind()
<- rbind(annual_avgs_2003, annual_avgs_2004, annual_avgs_2005, annual_avgs_2006, annual_avgs_2007, annual_avgs_2009, annual_avgs_2010, annual_avgs_2011, annual_avgs_2012, annual_avgs_2013, annual_avgs_2014, annual_avgs_2015, annual_avgs_2016, annual_avgs_2017, annual_avgs_2018, annual_avgs_2019, annual_avgs_2020, annual_avgs_2021) avg_debt
Error in rbind(annual_avgs_2003, annual_avgs_2004, annual_avgs_2005, annual_avgs_2006, : object 'annual_avgs_2003' not found
#check work with print
print(avg_debt)
Error in print(avg_debt): object 'avg_debt' not found
#make tibble of years 2003-2021
<- as_tibble_col(c('2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021')) Year
#rename 'value' column as 'Year'
<- rename(Year, "Year" = 'value')
Year
#check rename with print
print(Year)
# A tibble: 18 × 1
Year
<chr>
1 2003
2 2004
3 2005
4 2006
5 2007
6 2008
7 2009
8 2010
9 2012
10 2013
11 2014
12 2015
13 2016
14 2017
15 2018
16 2019
17 2020
18 2021
#join new year tibble column to avg_debt to match years
<- cbind(Year, avg_debt) annual_avg_debt
Error in data.frame(..., check.names = FALSE): object 'avg_debt' not found
#check work
view(annual_avg_debt)
Error in view(annual_avg_debt): object 'annual_avg_debt' not found
#check avgs match date by test with of annual avgs
annual_avgs_2003
Error in eval(expr, envir, enclos): object 'annual_avgs_2003' not found
#check avgs match date by test with of annual avgs
annual_avgs_2010
Error in eval(expr, envir, enclos): object 'annual_avgs_2010' not found
#check avgs match date by test with of annual avgs
annual_avgs_2015
Error in eval(expr, envir, enclos): object 'annual_avgs_2015' not found
#check avgs match date by test with of annual avgs
annual_avgs_2021
Error in eval(expr, envir, enclos): object 'annual_avgs_2021' not found
###Graphing: Evolution
#plot student loan debt over time
ggplot(annual_avg_debt, aes(x = Year, y = Student_Loan)) + geom_point()
Error in ggplot(annual_avg_debt, aes(x = Year, y = Student_Loan)): object 'annual_avg_debt' not found
I wanted to see if student loan debt increased or decreased in its annually accumulated amounts over time, at this juncture it may be noted that the amount of each yearly accumulation did not decrease at any point from 2003 to 2021.
###Graphing: Part of Whole
#pivot_longer for ability to graph info with color key, condense values in Mortgage:Other columns to 1 of dollar values and 1 of names
<- pivot_longer(annual_avg_debt, Mortgage:Total, names_to = "Debt_Type", values_to = "Dollar_Amount") pivoted_data
Error in pivot_longer(annual_avg_debt, Mortgage:Total, names_to = "Debt_Type", : object 'annual_avg_debt' not found
#sanity check: started with 18 rows and 8 columns
dim(annual_avg_debt)
Error in eval(expr, envir, enclos): object 'annual_avg_debt' not found
#sanity check: (8-7) + 2 = 3 columns
#18*7 = 126 rows
dim(pivoted_data)
Error in eval(expr, envir, enclos): object 'pivoted_data' not found
#check work
print(pivoted_data)
Error in print(pivoted_data): object 'pivoted_data' not found
#pivot to arrange data into more plotable format
ggplot(pivoted_data, aes(x = Year, y = Dollar_Amount)) + geom_point(aes(color = Debt_Type))
Error in ggplot(pivoted_data, aes(x = Year, y = Dollar_Amount)): object 'pivoted_data' not found
I chose a scatterplot to display the different debt amounts as averaged through the years and how each varied over time in comparison and as a part of the total amount. The plot allowed me to use fill to denote each type of debt by color, and this distinction clearly displays each debt in comparison against the much greater total.