library(tidyverse)
library(ggplot2)
library(readxl)
::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE) knitr
Challenge 6
Read in data and clean data
- fed_rate ⭐⭐
<- read_csv("_data/FedFundsRate.csv")
fed <- fed %>% select(-c(4:6)) %>% fill(`Real GDP (Percent Change)`, .direction = "down") %>%
fed_clean fill(`Inflation Rate`, .direction = "down") %>%
fill(`Effective Federal Funds Rate`, .direction = "down") %>%
mutate(date = str_c(Month, Year, Day, sep= "-"), date = myd(date)) %>%
select(-c(1:3))
fed
fed_clean
I delete some columns I don’t need, mutate the date type column and fill the columns. This is a clear and easy dataset to plot the graphs of annual trend.
Time Dependent Visualization
%>% ggplot(aes(x = date, y = `Effective Federal Funds Rate` )) +
fed_clean geom_line(color = "indianred3",
size=0.8 ) +
labs(title = "Annual Trend in the Effective Federal Funds Rate",
x = "Year",
y = "Effective Federal Funds Rate") +
theme_minimal()
This graph is the annual trend of effective federal funds rate from 1954 to 2017. From 1954 to about 1982, the general trend was increasing then went down. We can see in about 1982, the rate was extremely high.
<- fed_clean %>% pivot_longer(cols = -date, names_to = "variable", values_to = "value")
fed_long fed_long
%>% filter(variable%in%c("Effective", "GDP", "Unemployment", "inflation")) %>%
fed_long ggplot(., aes(x=date, y=value, color = variable))+
geom_point(size = 0) +
geom_line() +
facet_grid(rows = vars(variable))
Error in `combine_vars()`:
! Faceting variables must have at least one value
Read in data and clean data
- AB_NYC ⭐⭐⭐⭐⭐
<- read_csv("_data/hotel_bookings.csv")
hotel hotel
<- hotel %>% mutate(arrival_date = str_c( arrival_date_month, arrival_date_year, arrival_date_day_of_month, sep= "-"), arrival_date = myd(arrival_date)) %>%
hotel_clean select(-c(4:7)) %>%
mutate_at(c(1,2,9,10), as.factor)%>%
mutate(state = case_when
== "1" ~ "canceled",
(is_canceled == "0" ~ "regular")) %>%
is_canceled select(-is_canceled) %>%
mutate(stays_nights = rowSums(across(c(3:4)))) %>%
select(-c(3:4)) %>%
mutate(Guests_number = rowSums(across(c(3:5))))%>%
select(-c(3:5)) %>%
mutate(market_segment = str_remove(market_segment, "TA")) %>%
mutate(market_segment = str_remove(market_segment, "/TO")) %>%
select(-c(distribution_channel,company)) %>% rename("average_daily_rate" = adr)
hotel_clean
I clean the dataset above. It may not useful for plotting graphs in below codes, but I just practice some.
Visualizing Part-Whole Relationships
<- function(x){
not_outlier return(x > quantile(x, 0.25) - 1.5* IQR(x) & x< quantile(x, 0.75) + 1.5* IQR(x))
}
<- hotel_clean %>% filter(not_outlier(average_daily_rate)) %>% arrange(arrival_date)
hotel_clean
<- hotel_clean %>% filter(country == "PRT")
hotel_clean2
%>%ggplot(aes(x = "", y = hotel, fill = hotel)) +
hotel_clean geom_bar(width = 1,
stat = "identity") +
coord_polar("y",
start = 0,
direction = -1) +
theme_void()+
labs(title = "The Proportion of Resort Hotel and City Hotel in PRT",
x = "",
y = "")
It is little bit abstract to plot the part - whole graphs for me.
This graph shows the proportion of city hotel and resort hotel in PRT, in which we can see the number of resort hotel is much more than city hotel.
<-hotel_clean %>% group_by(hotel) %>%count(hotel)
hotel_clean3
%>%ggplot(aes(x = "", y = n, fill = hotel)) +
hotel_clean3 geom_bar(width = 1,
stat = "identity") +
coord_polar("y",
start = 0,
direction = -1) +
theme_void()+
labs(title = "The Proportion of Resort Hotel and City Hotel in All Countries",
x = "",
y = "")
This graph shows the proportion of city hotel and resort hotel in all countries, in which we can see the number of resort hotel is still much more than city hotel. But the proportion of city hotel is a little bit bigger than that in PRT.
<- read_excel("_data/debt_in_trillions.xlsx")
debt
<- debt %>% mutate(date = parse_date_time(`Year and Quarter`, orders = "yq")) debt
ggplot(debt, aes(x=date, y=Total)) +
geom_point() +
geom_line() +
scale_y_continuous(labels = scales :: label_number(suffix = "Trillion"))
ggplot(debt, aes(x=date, y=Total)) +
geom_point() +
geom_line() +
scale_y_continuous(limits=c(0, max(debt$Total)) , labels = scales :: label_number(suffix = "Trillion"))
<- debt %>% pivot_longer(
debt_long cols = Mortgage : Other,
names_to = "Loan",
values_to = "total"
%>%
) select(-Total) %>%
mutate(Loan = as.factor(Loan))
debt
debt_long
ggplot(debt_long, aes(x=date, y = total, color= Loan)) +
geom_line() +
geom_point(size=.5) +
theme(legend.position = "right") +
scale_y_continuous(labels = scales::label_number(suffix = "Trillion"))
ggplot(debt_long, aes(x=date, y = total, fill= Loan)) +
geom_bar(position = "stack", stat = "identity") +
theme(legend.position = "top") +
scale_y_continuous(labels = scales::label_number(suffix = "Trillion"))
<- debt_long %>%
debt_long mutate(Loan = fct_relevel(Loan, "Mortgage", "Auto Loan", "HE Revolving", "Student Loan", "Credit Card", "Other"))
ggplot(debt_long, aes(x=date, y = total, fill= Loan)) +
#sum up all
geom_bar(position = "stack", stat = "identity") +
#the position of legend
theme(legend.position = "top") +
#add "Trillion" in y label
scale_y_continuous(labels = scales::label_number(suffix = "Trillion")) +
#make legend in 1 line
guides(fill = guide_legend(nrow = 1)) +
#replace space to change line
scale_fill_discrete(labels =
str_replace(levels(debt_long$Loan), " ", "\n"))