Challenge 4

challenge_4

abc_poll

eggs

fed_rates

hotel_bookings

debt

More data wrangling: pivoting

Author

Rahul Somu

Published

April 10, 2023

Code

library(tidyverse)
library(tidyr)

knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)

Challenge Overview

Combine Year-Month-Date as one column to help us filter based on date. Based on date, all the variable values can be filter. Duplicates can be removed.

Below is the code for the above code

Code

getwd()

[1] "/Users/rahulsomu/Documents/DACSS_601/601_repo/posts"

Code

# read in the data
fed_funds <- read.csv("/Users/rahulsomu/Documents/DACSS_601/601_repo/posts/_data/FedFundsRate.csv",col.names = c("Year",    "Month",    "Day",  "Federal_Funds_Target_Rate",    "Federal_Funds_Upper_Target",   "Federal_Funds_Lower_Target",   "Effective_Federal_Funds_Rate", "Real_GDP_Percent_Change"   ,"Unemployment_Rate","Inflation_Rate"))
colnames(fed_funds)

 [1] "Year"                         "Month"                       
 [3] "Day"                          "Federal_Funds_Target_Rate"   
 [5] "Federal_Funds_Upper_Target"   "Federal_Funds_Lower_Target"  
 [7] "Effective_Federal_Funds_Rate" "Real_GDP_Percent_Change"     
 [9] "Unemployment_Rate"            "Inflation_Rate"

Code

# Pivot the data into tidy format
fed_funds_tidy <- pivot_longer(fed_funds, 
                               cols = c("Federal_Funds_Target_Rate", 
                                        "Federal_Funds_Upper_Target", 
                                        "Federal_Funds_Lower_Target", 
                                        "Effective_Federal_Funds_Rate", 
                                        "Real_GDP_Percent_Change", 
                                        "Unemployment_Rate", 
                                        "Inflation_Rate"), 
                               names_to = "Variable", 
                               values_to = "Value")

# Mutate variables as needed
fed_funds_tidy <- fed_funds_tidy %>% 
  mutate(Date = as.Date(paste(Year, Month, Day, sep = "-")),
         Year = NULL,
         Month = NULL,
         Day = NULL,
         Variable = gsub("\\(.*\\)", "", Variable),
         Variable = gsub("Real GDP", "GDP", Variable),
         Variable = tolower(Variable),
         Variable = gsub(" ", "_", Variable))

# Check for any missing values
sum(is.na(fed_funds_tidy))

[1] 3196

Code

# Check for any duplicated rows
any(duplicated(fed_funds_tidy))

[1] FALSE

Code

fed_funds_tidy

# A tibble: 6,328 × 3
   Variable                     Value Date      
   <chr>                        <dbl> <date>    
 1 federal_funds_target_rate     NA   1954-07-01
 2 federal_funds_upper_target    NA   1954-07-01
 3 federal_funds_lower_target    NA   1954-07-01
 4 effective_federal_funds_rate   0.8 1954-07-01
 5 real_gdp_percent_change        4.6 1954-07-01
 6 unemployment_rate              5.8 1954-07-01
 7 inflation_rate                NA   1954-07-01
 8 federal_funds_target_rate     NA   1954-08-01
 9 federal_funds_upper_target    NA   1954-08-01
10 federal_funds_lower_target    NA   1954-08-01
# … with 6,318 more rows