Challenge 6

challenge_6
hotel_bookings
air_bnb
fed_rate
debt
usa_households
abc_poll
Visualizing Time and Relationships
Author

Shuqi Hong

Published

June 16, 2023

library(tidyverse)
library(ggplot2)
library(readxl)


knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)

Read in data and clean data

  • fed_rate ⭐⭐
fed <- read_csv("_data/FedFundsRate.csv")
fed_clean <- fed %>% select(-c(4:6)) %>% fill(`Real GDP (Percent Change)`, .direction = "down") %>% 
  fill(`Inflation Rate`, .direction = "down") %>% 
  fill(`Effective Federal Funds Rate`, .direction = "down") %>% 
  mutate(date = str_c(Month, Year, Day, sep= "-"), date = myd(date)) %>%
  select(-c(1:3))

fed
fed_clean

I delete some columns I don’t need, mutate the date type column and fill the columns. This is a clear and easy dataset to plot the graphs of annual trend.

Time Dependent Visualization

fed_clean %>% ggplot(aes(x = date, y = `Effective Federal Funds Rate` )) +
  geom_line(color = "indianred3", 
            size=0.8 ) +
  labs(title = "Annual Trend in the Effective Federal Funds Rate",
       x = "Year",
       y = "Effective Federal Funds Rate") +
  theme_minimal() 

This graph is the annual trend of effective federal funds rate from 1954 to 2017. From 1954 to about 1982, the general trend was increasing then went down. We can see in about 1982, the rate was extremely high.

fed_long <- fed_clean %>% pivot_longer(cols = -date, names_to = "variable", values_to = "value")
fed_long
fed_long %>% filter(variable%in%c("Effective", "GDP", "Unemployment", "inflation")) %>% 
  ggplot(., aes(x=date, y=value, color = variable))+
  geom_point(size = 0) +
  geom_line() +
  facet_grid(rows = vars(variable))
Error in `combine_vars()`:
! Faceting variables must have at least one value

Read in data and clean data

  • AB_NYC ⭐⭐⭐⭐⭐
hotel <- read_csv("_data/hotel_bookings.csv")
hotel
hotel_clean <- hotel %>% mutate(arrival_date = str_c( arrival_date_month, arrival_date_year, arrival_date_day_of_month, sep= "-"), arrival_date = myd(arrival_date)) %>%
  select(-c(4:7)) %>% 
  mutate_at(c(1,2,9,10), as.factor)%>% 
  mutate(state = case_when
         (is_canceled == "1" ~ "canceled",
          is_canceled == "0" ~ "regular")) %>% 
  select(-is_canceled) %>% 
  mutate(stays_nights = rowSums(across(c(3:4)))) %>% 
  select(-c(3:4)) %>% 
  mutate(Guests_number = rowSums(across(c(3:5))))%>% 
  select(-c(3:5)) %>%
  mutate(market_segment = str_remove(market_segment, "TA")) %>%
  mutate(market_segment = str_remove(market_segment, "/TO")) %>% 
  select(-c(distribution_channel,company)) %>% rename("average_daily_rate" = adr)
  
hotel_clean

I clean the dataset above. It may not useful for plotting graphs in below codes, but I just practice some.

Visualizing Part-Whole Relationships

not_outlier <- function(x){
  return(x > quantile(x, 0.25) - 1.5* IQR(x) & x< quantile(x, 0.75) + 1.5* IQR(x))
}

hotel_clean <- hotel_clean %>% filter(not_outlier(average_daily_rate)) %>% arrange(arrival_date)

hotel_clean2 <- hotel_clean %>% filter(country == "PRT")

hotel_clean %>%ggplot(aes(x = "", y = hotel, fill = hotel)) +
    geom_bar(width = 1, 
             stat = "identity") +
    coord_polar("y", 
                start = 0, 
                direction = -1) +
    theme_void()+
  labs(title = "The Proportion of Resort Hotel and City Hotel in PRT",
       x = "",
       y = "")

It is little bit abstract to plot the part - whole graphs for me.

This graph shows the proportion of city hotel and resort hotel in PRT, in which we can see the number of resort hotel is much more than city hotel.

hotel_clean3 <-hotel_clean %>% group_by(hotel) %>%count(hotel)

hotel_clean3 %>%ggplot(aes(x = "", y = n, fill = hotel)) +
    geom_bar(width = 1, 
             stat = "identity") +
    coord_polar("y", 
                start = 0, 
                direction = -1) +
    theme_void()+
  labs(title = "The Proportion of Resort Hotel and City Hotel in All Countries",
       x = "",
       y = "")

This graph shows the proportion of city hotel and resort hotel in all countries, in which we can see the number of resort hotel is still much more than city hotel. But the proportion of city hotel is a little bit bigger than that in PRT.

debt <- read_excel("_data/debt_in_trillions.xlsx")

debt <- debt %>% mutate(date = parse_date_time(`Year and Quarter`, orders = "yq"))
ggplot(debt, aes(x=date, y=Total)) +
  geom_point() +
  geom_line() +
  scale_y_continuous(labels = scales :: label_number(suffix = "Trillion"))

ggplot(debt, aes(x=date, y=Total)) +
  geom_point() +
  geom_line() +
  scale_y_continuous(limits=c(0, max(debt$Total)) , labels = scales :: label_number(suffix = "Trillion"))

 debt_long <- debt %>% pivot_longer(
   cols = Mortgage : Other,
   names_to = "Loan",
   values_to = "total"  
 ) %>%
     select(-Total) %>%
     mutate(Loan = as.factor(Loan))
debt
debt_long
ggplot(debt_long, aes(x=date, y = total, color= Loan)) +
  geom_line() +
  geom_point(size=.5) +
  theme(legend.position = "right") +
  scale_y_continuous(labels = scales::label_number(suffix = "Trillion"))

ggplot(debt_long, aes(x=date, y = total, fill= Loan)) +
  geom_bar(position = "stack", stat = "identity") +
  theme(legend.position = "top") +
  scale_y_continuous(labels = scales::label_number(suffix = "Trillion"))

debt_long <- debt_long %>%
  mutate(Loan = fct_relevel(Loan, "Mortgage", "Auto Loan", "HE Revolving", "Student Loan", "Credit Card", "Other"))

ggplot(debt_long, aes(x=date, y = total, fill= Loan)) +
  #sum up all
  geom_bar(position = "stack", stat = "identity") +
  #the position of legend
  theme(legend.position = "top") +
  #add "Trillion" in y label
  scale_y_continuous(labels = scales::label_number(suffix = "Trillion")) + 
  #make legend in 1 line
  guides(fill = guide_legend(nrow = 1)) +
  #replace space to change line
  scale_fill_discrete(labels = 
                        str_replace(levels(debt_long$Loan), " ", "\n"))