Plotly, Gganimate, and Functions
This is just data from final project that I cut and pasted in
connecticut <- read_csv("/Users/nelsonfarrell/Downloads/Real_Estate_Sales_2001-2019_GL.csv")
county_town <- read_csv(("/Users/nelsonfarrell/Documents/501 Stats/Connecticut County:Towns.csv"))
county_town <- county_town %>%
select("subregion", "town")
connecticut <- rename(connecticut, "town" = "Town")
connecticut <-
left_join(connecticut, county_town, by = "town")
connecticut <- select(connecticut,
"List Year",
"Date Recorded",
"town",
"Assessed Value",
"Sale Amount",
"Residential Type",
"subregion")
connecticut <- connecticut %>%
na.omit(`Residential Type`)
connecticut <- rename(connecticut,
"list_year" = "List Year",
"sale_date" = "Date Recorded",
"assessed_price" = "Assessed Value",
"sale_price" = "Sale Amount",
"property_type" = "Residential Type",
"county" = "subregion")
connecticut$sale_date <- as_date(connecticut$sale_date,
format = "%m/%d/%Y")
connecticut <- connecticut %>%
mutate(sale_year = year(sale_date))
connecticut <- connecticut %>%
filter(sale_year > 2007)
connecticut <- connecticut %>%
filter(sale_price < quantile(connecticut$sale_price, .975) &
sale_price > quantile(connecticut$sale_price, .025))
meh <-connecticut %>%
group_by(year = lubridate::floor_date(sale_date, "year"),
property_type) %>%
summarize(mean = mean(sale_price))
meh5 <-ggplot(meh,
aes(year,
mean,
color = property_type)) +
geom_line() +
transition_reveal(year) +
labs(title = "Mean Sale Price",
x = "Year",
y = "Price ($)") +
guides(color = guide_legend(title = "Property Type"))
animate(meh5)
meh2 <-ggplot(meh,
aes(year,
mean,
color = property_type)) +
geom_point() +
transition_time(year) +
labs(title = "Mean Sale Price",
x = "Year",
y = "Price ($)") +
guides(color = guide_legend(title = "Property Type"))
animate(meh2)
plotly <- connecticut %>%
group_by(property_type,
month = lubridate::floor_date(sale_date, "month")) %>%
summarize(mean_monthy_sale_price = mean(sale_price)) %>%
ggplot(aes(x = month,
y = mean_monthy_sale_price)) +
geom_point((aes(color = property_type))) +
geom_smooth() +
ggtitle("Mean Monthly Sale Price of Property Types") +
ylab("Mean Sale Price ($)") +
xlab("Year") +
theme_linedraw() +
guides(color = guide_legend(title = "Property Type"))
ggplotly(plotly)
This function finds the mean sale price of the groups entered
# A tibble: 2,184 × 3
# Groups: town [169]
town sale_year mean
<chr> <dbl> <dbl>
1 Andover 2008 258944.
2 Andover 2009 240323.
3 Andover 2010 227498.
4 Andover 2011 241943.
5 Andover 2012 235318.
6 Andover 2013 233322.
7 Andover 2014 219865
8 Andover 2015 210757.
9 Andover 2016 249672.
10 Andover 2017 226692.
# … with 2,174 more rows
Function to find the delta mean in ($) and in (%) of the entire column “town”
delta_mean_sale_price <- function(df, ...) {
a <- df %>%
group_by(...) %>%
dplyr::summarize(mean_a = mean(sale_price)) %>%
filter(sale_year == "2008")
b <- df %>%
group_by(...) %>%
dplyr::summarize(mean_b = mean(sale_price)) %>%
filter(sale_year == "2020")
a %>%
left_join(b, by = "town") %>%
mutate(delta_mean_town = mean_b - mean_a) %>%
mutate(delta_mean_percent = (delta_mean_town/mean_a)*100) %>%
mutate(across(contains("mean"), round, 2)) %>%
select("town",
"delta_mean_town",
"delta_mean_percent")
}
connecticut %>%
delta_mean_sale_price(town, sale_year)
# A tibble: 169 × 3
# Groups: town [169]
town delta_mean_town delta_mean_percent
<chr> <dbl> <dbl>
1 Andover -3282. -1.27
2 Ansonia -5165. -2.39
3 Ashford -548. -0.27
4 Avon -82496. -18.6
5 Barkhamsted -5822. -2.06
6 Beacon Falls -41526. -12.2
7 Berlin 7320. 2.56
8 Bethany -78237. -17.9
9 Bethel 58070. 19.2
10 Bethlehem -45199. -12.7
# … with 159 more rows
Function to find the delta mean in ($) of specific town or county
test <- function(df, col_name, value) {
a <- df %>%
filter({{col_name}} == {{value}} & sale_year == "2008") %>%
summarise(town_mean_08 = mean(sale_price))
b <- df %>%
filter({{col_name}} == {{value}} & sale_year == "2020") %>%
summarise(delta_mean = mean(sale_price))
b - a
}
test(connecticut, county, "fairfield")
delta_mean
1 49997.28
Function to return the towns with the five highest change in mean sale price
max_mean <- function(df, ...) {
a <- df %>%
group_by(...) %>%
dplyr::summarize(mean_a = mean(sale_price)) %>%
filter(sale_year == "2008")
b <- df %>%
group_by(...) %>%
dplyr::summarize(mean_b = mean(sale_price)) %>%
filter(sale_year == "2020")
a %>%
left_join(b, by = "town") %>%
mutate(delta_mean_town = mean_b - mean_a) %>%
mutate(percent_change_mean = (delta_mean_town/mean_a)*100) %>%
mutate(across(contains("mean"),
round,
2)) %>%
select("town",
"delta_mean_town",
"percent_change_mean") %>%
arrange(desc(delta_mean_town)) %>%
head(n = 5)
}
connecticut %>%
max_mean(town, sale_year)
# A tibble: 5 × 3
# Groups: town [5]
town delta_mean_town percent_change_mean
<chr> <dbl> <dbl>
1 Goshen 256402. 117.
2 Cornwall 188251. 54.0
3 Salisbury 157199. 32.5
4 Washington 155603. 32.3
5 Warren 123385 31.4
Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".
For attribution, please cite this work as
Farrell (2022, April 6). Data Analytics and Computational Social Science: Challenge. Retrieved from https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomjnfarrell211887159/
BibTeX citation
@misc{farrell2022challenge, author = {Farrell, Joseph}, title = {Data Analytics and Computational Social Science: Challenge}, url = {https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomjnfarrell211887159/}, year = {2022} }