challenge_4
abc_poll
eggs
fed_rates
hotel_bookings
debt
More data wrangling: pivoting
Author

Rahul Somu

Published

April 10, 2023

Code
library(tidyverse)
library(tidyr)

knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)

Challenge Overview

Combine Year-Month-Date as one column to help us filter based on date. Based on date, all the variable values can be filter. Duplicates can be removed.

Below is the code for the above code

Code
getwd()
[1] "/Users/rahulsomu/Documents/DACSS_601/601_repo/posts"
Code
# read in the data
fed_funds <- read.csv("/Users/rahulsomu/Documents/DACSS_601/601_repo/posts/_data/FedFundsRate.csv",col.names = c("Year",    "Month",    "Day",  "Federal_Funds_Target_Rate",    "Federal_Funds_Upper_Target",   "Federal_Funds_Lower_Target",   "Effective_Federal_Funds_Rate", "Real_GDP_Percent_Change"   ,"Unemployment_Rate","Inflation_Rate"))
colnames(fed_funds)
 [1] "Year"                         "Month"                       
 [3] "Day"                          "Federal_Funds_Target_Rate"   
 [5] "Federal_Funds_Upper_Target"   "Federal_Funds_Lower_Target"  
 [7] "Effective_Federal_Funds_Rate" "Real_GDP_Percent_Change"     
 [9] "Unemployment_Rate"            "Inflation_Rate"              
Code
# Pivot the data into tidy format
fed_funds_tidy <- pivot_longer(fed_funds, 
                               cols = c("Federal_Funds_Target_Rate", 
                                        "Federal_Funds_Upper_Target", 
                                        "Federal_Funds_Lower_Target", 
                                        "Effective_Federal_Funds_Rate", 
                                        "Real_GDP_Percent_Change", 
                                        "Unemployment_Rate", 
                                        "Inflation_Rate"), 
                               names_to = "Variable", 
                               values_to = "Value")

# Mutate variables as needed
fed_funds_tidy <- fed_funds_tidy %>% 
  mutate(Date = as.Date(paste(Year, Month, Day, sep = "-")),
         Year = NULL,
         Month = NULL,
         Day = NULL,
         Variable = gsub("\\(.*\\)", "", Variable),
         Variable = gsub("Real GDP", "GDP", Variable),
         Variable = tolower(Variable),
         Variable = gsub(" ", "_", Variable))

# Check for any missing values
sum(is.na(fed_funds_tidy))
[1] 3196
Code
# Check for any duplicated rows
any(duplicated(fed_funds_tidy))
[1] FALSE
Code
fed_funds_tidy
# A tibble: 6,328 × 3
   Variable                     Value Date      
   <chr>                        <dbl> <date>    
 1 federal_funds_target_rate     NA   1954-07-01
 2 federal_funds_upper_target    NA   1954-07-01
 3 federal_funds_lower_target    NA   1954-07-01
 4 effective_federal_funds_rate   0.8 1954-07-01
 5 real_gdp_percent_change        4.6 1954-07-01
 6 unemployment_rate              5.8 1954-07-01
 7 inflation_rate                NA   1954-07-01
 8 federal_funds_target_rate     NA   1954-08-01
 9 federal_funds_upper_target    NA   1954-08-01
10 federal_funds_lower_target    NA   1954-08-01
# … with 6,318 more rows