HW3

hw3

Karla Barrett-Dexter

Millenial Migration Data

Author

Karla Barrett-Dexter

Published

December 11, 2022

Code

library(tidyverse)

knitr::opts_chunk$set(echo = TRUE)

Read in the data

Code

MillenialMigration <- read_csv("_data/od.csv")

Error: '_data/od.csv' does not exist in current working directory ('C:/Users/srika/OneDrive/Desktop/601_Fall_2022/posts').

Code

MillenialMigration

Error in eval(expr, envir, enclos): object 'MillenialMigration' not found

Code

#comment to test changes
#2nd comment to test changes

Tidy Data

As a continuation from HW2, I renamed the columns again for ease of understanding and additionally, I separated the race/parental column and created two separate columns for each variable because I was interested in exploring these data points independently of each other.I found it was taking a very long time to run this code every time I returned to work on the assignment, so I decided to export the new CSV file and am writing in all the code used to get to this step as comments to show my work and continuing with the new file to avoid the time issues I was having running the code.

Code

#Changed column names for ease of understanding
MillenialMigration <- MillenialMigration %>%
  rename(Origin_Zone = o_cz,
         Origin_City = o_cz_name,
         Origin_State = o_state_name,
         Dest_Zone = d_cz,
         Dest_City = d_cz_name,
         Dest_State = d_state_name,
         Num_Migrators = n,
         N_from_Origin = n_tot_o,
         N_from_Dest = n_tot_d,
         Race_ParentalIncome = pool,
         Num_Migrators = n)

Error in rename(., Origin_Zone = o_cz, Origin_City = o_cz_name, Origin_State = o_state_name, : object 'MillenialMigration' not found

Code

MillenialMigration

Error in eval(expr, envir, enclos): object 'MillenialMigration' not found

Code

#I wanted to separate the Race and Parental Income data, in order to analyze the data separately in future iterations. I could not figure out a way to separate the two  without putting a character in between. I used the following code to update the Race_ParentalIncome column to have an underscore in it. This was probably  not the most efficient way to accomplish this outcome and it took me quite a while to get the code right.In addition, I found it was taking a very long time to run this code every time I returned to work on the assignment, so I decided to export the new CSV file and am writing in all the code used to get to this step as comments to show my work and continuing with the new file to avoid the time issues I was having running the code. 
#MillenialMigration_ <- MillenialMigration %>%
 # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "AsianQ1", "Asian_Q1"))%>%
  # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "AsianQ2", "Asian_Q2"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "AsianQ3", "Asian_Q3"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "AsianQ4", "Asian_Q4"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "AsianQ5", "Asian_Q5"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "BlackQ1", "Black_Q1"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "BlackQ2", "Black_Q2"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "BlackQ3", "Black_Q3"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "BlackQ4", "Black_Q4"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "BlackQ5", "Black_Q5"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "HispanicQ1", "Hispanic_Q1"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "HispanicQ2", "Hispanic_Q2"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "HispanicQ3", "Hispanic_Q3"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "HispanicQ4", "Hispanic_Q4"))%>%
 # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "HispanicQ5", "Hispanic_Q5"))%>%
 # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "OtherQ1", "Other_Q1"))%>%
 # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "OtherQ2", "Other_Q2"))%>%
 # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "OtherQ3", "Other_Q3"))%>%
 # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "OtherQ4", "Other_Q4"))%>%
 # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "OtherQ5", "Other_Q5"))%>%
 # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "WhiteQ1", "White_Q1"))%>%
 # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "WhiteQ2", "White_Q2"))%>%
 # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "WhiteQ3", "White_Q3"))%>%
 # mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "WhiteQ4", "White_Q4"))%>%
 #  mutate(Race_ParentalIncome = stringr::str_replace(Race_ParentalIncome, "WhiteQ5", "White_Q5"))

#I used the following code to check my work
#MigratorsByRace_Income <- MillenialMigration_ %>%
 # group_by(Race_ParentalIncome) %>%
  #summarise(Freq = sum(Num_Migrators))
#print(n=30, MigratorsByRace_Income)

#The following code was used to separate the column and create two new columns, one for Race and one for Parental Income. 
#MillenialMigration_Sep <- separate(MillenialMigration_, Race_ParentalIncome, into = c("Race", "Parental_Income"), sep = "_")
#MillenialMigration_Sep

#The following code was used to create a new CSV file with the separated columns.
#write.csv(MillenialMigration_Sep, file = "C:\\Users\\kbarr\\OneDrive\\Documents\\GitHub\\601_Fall_2022\\posts\\MillenialMigration_Sep.csv", row.names = FALSE)

MillenialMigration_Sep <- read_csv("_data/MillenialMigration_Sep.csv")

Error: '_data/MillenialMigration_Sep.csv' does not exist in current working directory ('C:/Users/srika/OneDrive/Desktop/601_Fall_2022/posts').

Code

MillenialMigration_Sep

Error in eval(expr, envir, enclos): object 'MillenialMigration_Sep' not found

Narrative

This dataset show migrations patterns for people born between the years 1984 and 1992. I found the dataset through data-is-plural.com, which led me to migrationpatterns.org, where I downloaded the data from. The data was originally taken from the US Census, tax, and HUD information. The origin zone, city, and state is the location of an individual at age 16 and the destination commute zone, city, and state is the location of an individual at age 26. The dataset also includes information on race/ethnicity and parental income.

Descriptive Statistics

I found the mean, median, and standard deviation for each numerical variable and the frequencies for each categorical variable, which I found more interesting to keep exploring.

The same 10 states experienced the most migration in and out: CA TX NY FL PA OH IL GA NC MI

California was the top state for both migration in and out.

Code

#Mean, median, and standard deviation for numerical variables
summarise(MillenialMigration_Sep, mean.TotalMigrators=mean(Num_Migrators, na.rm=TRUE), mean.NfromOrigin=mean(N_from_Origin, na.rm=TRUE), mean.NFromDest=mean(N_from_Dest, na.rm=TRUE))

Error in summarise(MillenialMigration_Sep, mean.TotalMigrators = mean(Num_Migrators, : object 'MillenialMigration_Sep' not found

Code

summarise(MillenialMigration_Sep, median.TotalMigrators=median(Num_Migrators, na.rm=TRUE), median.NfromOrigin=median(N_from_Origin, na.rm=TRUE), median.NFromDest=median(N_from_Dest, na.rm=TRUE))

Error in summarise(MillenialMigration_Sep, median.TotalMigrators = median(Num_Migrators, : object 'MillenialMigration_Sep' not found

Code

summarise(MillenialMigration_Sep, SD.TotalMigrators=sd(Num_Migrators, na.rm=TRUE), sd.NfromOrigin=sd(N_from_Origin, na.rm=TRUE), sd.NFromDest=sd(N_from_Dest, na.rm=TRUE))

Error in summarise(MillenialMigration_Sep, SD.TotalMigrators = sd(Num_Migrators, : object 'MillenialMigration_Sep' not found

Code

#Frequencies for categorical variables

#Number of migrators to a destination, in descending order
Migration_DestState <- MillenialMigration_Sep %>%
  group_by(Dest_State) %>%
  summarise(Num_Migrators = sum(Num_Migrators))

Error in group_by(., Dest_State): object 'MillenialMigration_Sep' not found

Code

Migration_DestState <- Migration_DestState %>% arrange(desc(Num_Migrators))

Error in arrange(., desc(Num_Migrators)): object 'Migration_DestState' not found

Code

Migration_DestState

Error in eval(expr, envir, enclos): object 'Migration_DestState' not found

Code

#Number of migrators from a destination, in descending order
Migration_OriginState <- MillenialMigration_Sep %>%
  group_by(Origin_State) %>%
  summarise(Num_Migrators = sum(Num_Migrators))

Error in group_by(., Origin_State): object 'MillenialMigration_Sep' not found

Code

Migration_OriginState <- Migration_OriginState %>% arrange(desc(Num_Migrators))

Error in arrange(., desc(Num_Migrators)): object 'Migration_OriginState' not found

Code

Migration_OriginState

Error in eval(expr, envir, enclos): object 'Migration_OriginState' not found

Code

#Number of migrators by income quintile
MigratorsByIncome <- MillenialMigration_Sep %>%
  group_by(Parental_Income) %>%
  summarise(Num_Migrators= sum(Num_Migrators))

Error in group_by(., Parental_Income): object 'MillenialMigration_Sep' not found

Code

MigratorsByIncome

Error in eval(expr, envir, enclos): object 'MigratorsByIncome' not found

Code

#Number of migrators by race
MigratorsByRace <- MillenialMigration_Sep %>%
  group_by(Race) %>%
  summarise(Num_Migrators= sum(Num_Migrators))

Error in group_by(., Race): object 'MillenialMigration_Sep' not found

Code

MigratorsByRace

Error in eval(expr, envir, enclos): object 'MigratorsByRace' not found

Visualizations

I made two simple point plots and bar graphs to look at the total number of migrators by parental income quintile and race. The two key takeaways are that the highest number of migrators were white and in the highest income quartile.

I also created a plot point to show the total number of migrators to and from each state. This graph is not all that useful and I will continue to consider how it can be improved.

Code

#I used the following to create simple point plot for number of migrators by income and race
ggplot(MigratorsByIncome, aes(Parental_Income, Num_Migrators)) + geom_point(color="blue", shape=0) + labs(title = "Migrators by Income")

Error in ggplot(MigratorsByIncome, aes(Parental_Income, Num_Migrators)): object 'MigratorsByIncome' not found

Code

ggplot(data = MigratorsByIncome)+
  geom_bar(mapping = aes(x = Parental_Income, y = Num_Migrators, fill=Parental_Income), stat = "identity")+ labs(title = "Migrators by Income")

Error in ggplot(data = MigratorsByIncome): object 'MigratorsByIncome' not found

Code

ggplot(MigratorsByRace, aes(Race, Num_Migrators)) + geom_point() + labs(title = "Migrators by Race")

Error in ggplot(MigratorsByRace, aes(Race, Num_Migrators)): object 'MigratorsByRace' not found

Code

ggplot(data = MigratorsByRace)+
  geom_bar(mapping = aes(x = Race, y = Num_Migrators, fill=Race), stat = "identity")+ labs(title = "Migrators by Race")

Error in ggplot(data = MigratorsByRace): object 'MigratorsByRace' not found

Code

#I used the following code to create a new table with the total migrators in (dest) and out (origin) of each state
Migration_OriginState <- Migration_OriginState %>%
  rename(Num_MigratorsOut = Num_Migrators,
         State = Origin_State)

Error in rename(., Num_MigratorsOut = Num_Migrators, State = Origin_State): object 'Migration_OriginState' not found

Code

Migration_DestState <- Migration_DestState %>% 
  rename(Num_Migrators_In = Num_Migrators,
         State = Dest_State)

Error in rename(., Num_Migrators_In = Num_Migrators, State = Dest_State): object 'Migration_DestState' not found

Code

Migration_In_And_Out <- inner_join(Migration_OriginState, Migration_DestState, by="State")

Error in inner_join(Migration_OriginState, Migration_DestState, by = "State"): object 'Migration_OriginState' not found

Code

Migration_In_And_Out

Error in eval(expr, envir, enclos): object 'Migration_In_And_Out' not found

Code

#I used the following code to create a point plot to compare the number of migrators going in and out of each state
ggplot(Migration_In_And_Out, aes(Num_MigratorsOut, Num_Migrators_In)) + geom_point(mapping = aes(color=State))

Error in ggplot(Migration_In_And_Out, aes(Num_MigratorsOut, Num_Migrators_In)): object 'Migration_In_And_Out' not found

Groupings

I decided to further explore the migration patterns for California, as it was the state that saw the most movement in and out. I looked at the migration by Race and found that White and Hispanic people migrated the most, with a slight variation between in (White more in) and out (Hispanic more out).

Code

MillenialMigration_to_CA <- MillenialMigration_Sep %>%
  filter(Dest_State== "California")

Error in filter(., Dest_State == "California"): object 'MillenialMigration_Sep' not found

Code

MillenialMigration_from_CA <- MillenialMigration_Sep %>%
  filter(Origin_State== "California")

Error in filter(., Origin_State == "California"): object 'MillenialMigration_Sep' not found

Code

CA_Migrators_By_Race_To <- MillenialMigration_to_CA %>%
  group_by(Race) %>%
  summarise(Num.Migrators.To.CA = sum(Num_Migrators))

Error in group_by(., Race): object 'MillenialMigration_to_CA' not found

Code

CA_Migrators_By_Race_From <- MillenialMigration_from_CA %>%
  group_by(Race) %>%
  summarise(Num.Migrators.From.CA = sum(Num_Migrators))

Error in group_by(., Race): object 'MillenialMigration_from_CA' not found

Code

CA_Migrators_By_Race_From

Error in eval(expr, envir, enclos): object 'CA_Migrators_By_Race_From' not found

Code

CA_Migrators_By_Race_To

Error in eval(expr, envir, enclos): object 'CA_Migrators_By_Race_To' not found

Code

CA_Migration_In_And_Out <- inner_join(CA_Migrators_By_Race_To, CA_Migrators_By_Race_From, by="Race")

Error in inner_join(CA_Migrators_By_Race_To, CA_Migrators_By_Race_From, : object 'CA_Migrators_By_Race_To' not found

Code

CA_Migration_In_And_Out

Error in eval(expr, envir, enclos): object 'CA_Migration_In_And_Out' not found

Code

ggplot(data = CA_Migration_In_And_Out)+
  geom_bar(mapping = aes(x = Race, y = Num.Migrators.To.CA, fill=Race), stat = "identity")+ labs(title = "Migrators to CA by Race")

Error in ggplot(data = CA_Migration_In_And_Out): object 'CA_Migration_In_And_Out' not found

Code

ggplot(data = CA_Migration_In_And_Out)+
  geom_bar(mapping = aes(x = Race, y = Num.Migrators.From.CA, fill=Race), stat = "identity")+ labs(title = "Migrators From CA by Race")

Error in ggplot(data = CA_Migration_In_And_Out): object 'CA_Migration_In_And_Out' not found

Limitations

Some limitations and questions I am still exploring are: -How was the sample chosen? -How much migration is not captured by census data? -Is this data useful for municipalities to prepare for new citizens? -What are the income ranges for the parental income quintiles? -Can I change the Y axis scaling? The bar chart and plot point graph for total migrators by income have different Y axis labels.