library(tidyverse)
library(ggplot2)
library(googlesheets4)
library(lubridate)
library(stringr)
::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE) knitr
FinalProject
Reading In the Data
#creating a vector of new column names
<- c("incident_id", "incident_date", "state", "city_or_county", "address", "number_killed", "number_injured", "delete")
mass_names
#creating a function to read in the data sets with new column names, skip the first row, and remove the "operation" column which contains links to news articles in original data source, and creating a "Year" column for ease of analysis
<-function(sheet_name){read_sheet("https://docs.google.com/spreadsheets/d/1rCnIYPQSkcZDCulp5KXAxmZUBad4QtrERi4_7tUMXqs/edit#gid=10931567",
read_shootingssheet=sheet_name,
col_names=mass_names,
skip=1) %>%
mutate("YearSheet"=sheet_name) %>%
mutate(Year=recode(YearSheet, "MassShootings2014"="2014", "MassShootings2015"="2015", "MassShootings2016"="2016", "MassShootings2017"="2017", "MassShootings2018"="2018", "MassShootings2019"="2019", "MassShootings2020"="2020", "MassShootings2021"="2021", "MassShootings2022"="2022")) %>%
select(-delete, -YearSheet)
}
#using purrr/map_dfr to join data sheets for 2014 through 2021, applying the function read_shootings for consistent formatting
<- map_dfr(
mass_shootings_all sheet_names("https://docs.google.com/spreadsheets/d/1rCnIYPQSkcZDCulp5KXAxmZUBad4QtrERi4_7tUMXqs/edit#gid=10931567")[1:9],
read_shootings)
Error in `gs4_auth()`:
! Can't get Google credentials.
ℹ Are you running googlesheets4 in a non-interactive session? Consider:
• Call `gs4_deauth()` to prevent the attempt to get credentials.
• Call `gs4_auth()` directly with all necessary specifics.
ℹ See gargle's "Non-interactive auth" vignette for more details:
ℹ <https://gargle.r-lib.org/articles/non-interactive-auth.html>
#sanity check
mass_shootings_all
Error in eval(expr, envir, enclos): object 'mass_shootings_all' not found
The number of rows in the df is equal to the sum of the rows from the original google sheets data (-9 for column names in google sheets)
#Can now use "year" column to easily analyze data by year
filter(mass_shootings_all, Year=="2014")
Error in filter(mass_shootings_all, Year == "2014"): object 'mass_shootings_all' not found
#Counting number of shootings per year and generating a new table
<-mass_shootings_all %>%
mass_shootings_all_histgroup_by(Year) %>%
summarise(Count = n())
Error in group_by(., Year): object 'mass_shootings_all' not found
#creating plot of shootings/year
ggplot(mass_shootings_all_hist, aes(x=Year, y=Count))+
geom_bar(stat="identity")+
labs(title="Mass Shootings 2014-2022*", caption="*2022 data goes up to August 27, 2022")
Error in ggplot(mass_shootings_all_hist, aes(x = Year, y = Count)): object 'mass_shootings_all_hist' not found
#converting S3: POSIXc to date format
$incident_date_new<-as.Date(mass_shootings_all$incident_date) mass_shootings_all
Error in as.Date(mass_shootings_all$incident_date): object 'mass_shootings_all' not found
mass_shootings_all
Error in eval(expr, envir, enclos): object 'mass_shootings_all' not found
#creating a month column and converting to factors
<-mass_shootings_all %>%
mass_shootings_allmutate(month=as.factor(month(incident_date_new)))
Error in mutate(., month = as.factor(month(incident_date_new))): object 'mass_shootings_all' not found
#creating a new table with month data
<-mass_shootings_all %>%
mass_shootings_all_monthsgroup_by(month) %>%
summarise(Count = n())
Error in group_by(., month): object 'mass_shootings_all' not found
mass_shootings_all_months
Error in eval(expr, envir, enclos): object 'mass_shootings_all_months' not found
#creating plot by month
ggplot(mass_shootings_all_months, aes(x=month, y=Count))+geom_bar(stat="identity")+labs(title="Mass Shootings 2014-2022 By Month")
Error in ggplot(mass_shootings_all_months, aes(x = month, y = Count)): object 'mass_shootings_all_months' not found
In addition to mass shootings increasing over time, it appears that shootings could be correlated with temperature/season given the data set when filtered by month is highest in summer months an lowest in winter months.
I am curious if a less seasonally varying state would have the same distrubtion. Below I create the same plot for FL and MA
#Doing above for 1 State
#creating a new table with month data for FL
<-filter(mass_shootings_all, state=="Florida") mass_shootings_all_florida
Error in filter(mass_shootings_all, state == "Florida"): object 'mass_shootings_all' not found
mass_shootings_all_florida
Error in eval(expr, envir, enclos): object 'mass_shootings_all_florida' not found
<-mass_shootings_all_florida %>%
mass_shootings_all_months_FLgroup_by(month, .drop=FALSE) %>%
summarise(Count = n())
Error in group_by(., month, .drop = FALSE): object 'mass_shootings_all_florida' not found
mass_shootings_all_months_FL
Error in eval(expr, envir, enclos): object 'mass_shootings_all_months_FL' not found
ggplot(mass_shootings_all_months_FL, aes(x=month, y=Count))+geom_bar(stat="identity")+labs(title="Mass Shootings 2014-2022 By Month in Florida")
Error in ggplot(mass_shootings_all_months_FL, aes(x = month, y = Count)): object 'mass_shootings_all_months_FL' not found
#Doing same as FL for MA
<-filter(mass_shootings_all, state=="Massachusetts") mass_shootings_all_mass
Error in filter(mass_shootings_all, state == "Massachusetts"): object 'mass_shootings_all' not found
mass_shootings_all_mass
Error in eval(expr, envir, enclos): object 'mass_shootings_all_mass' not found
<-mass_shootings_all_mass %>%
mass_shootings_all_months_MAgroup_by(month, .drop=FALSE) %>%
summarise(Count = n())
Error in group_by(., month, .drop = FALSE): object 'mass_shootings_all_mass' not found
mass_shootings_all_months_MA
Error in eval(expr, envir, enclos): object 'mass_shootings_all_months_MA' not found
ggplot(mass_shootings_all_months_MA, aes(x=month, y=Count))+geom_bar(stat="identity")+labs(title="Mass Shootings 2014-2022 By Month in Massachusetts")
Error in ggplot(mass_shootings_all_months_MA, aes(x = month, y = Count)): object 'mass_shootings_all_months_MA' not found
Going forward, I think I will try to create these plots for different states to see if this trend holds true across different states. I am also curious if i can find a dataset with typical temp ranges/state and seeing if there is correlation between temp variation and mass shootings.Am also curious to figure out what kind of distribution best describes the graph with all states.
Whats going on??? rant: There are a number of confounding factors that could explain the apparent correlation with season/temp- people more/less likely to leave the house based on weather, more public gatherings during seasons with higher temps… also wondering if covid affects this. I could create the same graph by state and year (but there probaly isnt enough events to see a correlation, but maybe for a state with a high population?) And wondering if the average number killed also increases with higher temperatures as there may be more opportunities/gatherings of people
n_distinct(mass_shootings_all, "Year")
Error in list2(...): object 'mass_shootings_all' not found