Connecticut Real Estate Sales Data
connecticut <- rename(connecticut, "town" = "Town")
connecticut <-
left_join(connecticut, county_town, by = "town")
connecticut <- select(connecticut,
"List Year",
"Date Recorded",
"town",
"Assessed Value",
"Sale Amount",
"Residential Type",
"subregion")
connecticut <- connecticut %>%
na.omit(`Residential Type`)
connecticut %>%
count(`Residential Type`) %>%
rename("Number of Properties Sold" = "n") %>%
formattable(align = c("l","r"), list(`Number of Properties Sold` = color_bar("lightblue")))
Residential Type | Number of Properties Sold |
---|---|
Condo | 105420 |
Four Family | 2150 |
Single Family | 401611 |
Three Family | 12586 |
Two Family | 26408 |
residential_type
.
connecticut <- rename(connecticut,
"list_year" = "List Year",
"sale_date" = "Date Recorded",
"assessed_price" = "Assessed Value",
"sale_price" = "Sale Amount",
"residential_type" = "Residential Type",
"county" = "subregion")
connecticut %>%
group_by(year = lubridate::floor_date(sale_date, "year")) %>%
summarize(mean_year = mean(sale_price)) %>%
ggplot(aes(x = year, y = mean_year)) +
geom_line(aes()) +
geom_smooth() +
ggtitle("Overall Mean Sale Price of Connecticut Properties: 2008-2020") +
ylab("Sale Price") +
xlab("Year") +
theme_linedraw()
options(scipen = 999) # remove scientific notation
connecticut %>%
group_by(residential_type, month = lubridate::floor_date(sale_date, "month")) %>%
summarize(mean_monthy_sale_price = mean(sale_price)) %>%
ggplot(aes(x = month, y = mean_monthy_sale_price)) +
geom_point((aes(color = residential_type))) +
geom_smooth() +
ggtitle("Mean Monthly Sale Price of Residential Types") +
ylab("Mean Sale Price ($)") +
xlab("Year") +
theme_linedraw() +
scale_color_brewer(palette = "Dark2")
options(scipen = 999) # remove scientific notation
connecticut %>%
group_by(residential_type, year = lubridate::floor_date(sale_date, "year")) %>%
summarize(mean_year_type = mean(sale_price)) %>%
ggplot(aes(x = year, y = mean_year_type, group = residential_type)) +
geom_point((aes(color = residential_type))) +
geom_smooth() +
ggtitle("Yearly Mean Sale Price: Residential Types") +
ylab("Mean Sale Price ($)") +
xlab("Year") +
theme_linedraw() +
facet_wrap(vars(residential_type))
con_map <- map_data("county",
"connecticut")
con_map <- rename(con_map,
"county" = "subregion")
map_vis
: grouping by county and creating a “county_mean” column
mapping
object with con_map
data
map_vis <- inner_join(map_vis, con_map, by = "county")
ggplot(map_vis, aes(x = long, y = lat)) +
geom_polygon(aes(fill = county_mean, group = county)) +
geom_text(data = county_names, aes(long, lat, label = county), size =3) +
scale_fill_gradient(low = "bisque4", high = "bisque2", name = "Average Sale Price (USD)") +
ggtitle("Overall Average Sale Price in Connecticut Counties") +
theme_light()
sale price
of Single Family
properties for each county in 2008
sale price
of Single Family
properties for each town in 2020
mean_2008
and mean_2020
and create new column delta_mean
and c_mean
msf_08_20 <- msf_2008 %>%
left_join(msf_2020, by = "county") %>%
mutate(delta_mean = mean_3.y - mean_3.x) %>%
mutate(c_mean = (mean_3.y - mean_3.x)/mean_3.x) %>%
mutate(c_mean = c_mean * 100) %>%
mutate(across(contains("mean"), round, 2)) %>%
rename("mean_08" = "mean_3.x",
"mean_20" = "mean_3.y") %>%
select("county",
"mean_08",
"mean_20",
"delta_mean",
"c_mean")
msf_08_20 %>%
rename("County" = "county",
"Mean 2008" = "mean_08",
"Mean 2020" = "mean_20",
"Change in Mean ($)" = "delta_mean",
"Change in Mean (%)" = "c_mean") %>%
formattable(list(`Mean 2020` = FALSE,
`Mean 2008` = FALSE,
`Change in Mean (%)` = formatter("span",
style = ~ style(color = ifelse(`Change in Mean (%)` < 0,
"firebrick",
"forestgreen")),
~ icontext(sapply(`Change in Mean (%)`,
function(x) if (x < 0) "arrow-down" else if (x > 0) "arrow-up"),
`Change in Mean (%)`))))
County | Change in Mean ($) | Change in Mean (%) |
---|---|---|
fairfield | 63048.12 | 12.56 |
hartford | 13893.09 | 5.21 |
litchfield | 60529.46 | 20.61 |
middlesex | -13165.68 | -3.97 |
new haven | 110.93 | 0.04 |
new london | 1967.19 | 0.72 |
tolland | -6082.70 | -2.29 |
windham | 24476.58 | 11.89 |
msf_08_20 %>%
mutate(Color = ifelse(c_mean < 0, "rosybrown","steelblue")) %>%
ggplot(aes(x = reorder(county, +c_mean),
y = c_mean,
fill = Color)) +
geom_col()+
geom_text(aes(label = c_mean,
vjust = 1.2),
size = 3) +
labs(title = "Percentage Change in Mean Sale Price for Single Family Properties: 2008 to 2020",
x = "County Name",
y = "Percent Change") +
theme(axis.text.x = element_text(angle = 45,
hjust = 1)) +
scale_fill_identity(guide = "none")
connecticut %>%
group_by(county, residential_type, year = lubridate::floor_date(sale_date, "year")) %>%
summarize(county_mean = mean(sale_price)) %>%
filter(residential_type == "Single Family") %>%
ggplot(aes(x = year, y = county_mean)) +
geom_line(aes(color = county)) +
ggtitle("Mean Sale Price of Single Family Properties: Connecticut Counties") +
geom_smooth() +
ylab("Sale Price") +
xlab("Year") +
theme_classic()
connecticut %>%
group_by(county,
residential_type,
year = lubridate::floor_date(sale_date, "year")) %>%
summarize(mean_year = mean(sale_price)) %>%
filter(residential_type == "Single Family") %>%
ggplot(aes(x = year,
y = mean_year)) +
geom_line(aes(color = county)) +
ggtitle("Mean Sale Price of Single Family Properties: Connecticut Counties") +
ylab("Sale Price") +
xlab("Year") +
theme_bw() +
facet_wrap(vars(county)) +
guides(color = "none")
connecticut %>%
filter(county == "litchfield") %>%
group_by(residential_type,
year = lubridate::floor_date(sale_date, "year")) %>%
summarize(mean_litchfield = mean(sale_price)) %>%
ggplot(aes(x = year,
y = mean_litchfield)) +
geom_line(aes(color = residential_type)) +
geom_smooth() +
theme_bw() +
labs(title = "Mean Sale Price of Different Residential Types",
subtitle = "Litchfield County 2008-2020",
x = "Year",
y = "Mean Sale Price ($)") +
facet_wrap(vars(residential_type)) +
guides(color = "none")
Litchfield <- connecticut %>%
group_by(sale_year) %>%
filter(county == "litchfield")
Litchfield %>%
group_by(residential_type) %>%
count(residential_type) %>%
rename("Residential Type" = "residential_type",
"Count: Properties Sold" = "n") %>%
knitr::kable(caption = "Counts of Residential Types: Litchfield County") %>%
kable_classic(full_width = T) %>%
row_spec(2, background = "yellow") %>%
row_spec(4, background = "yellow")
Residential Type | Count: Properties Sold |
---|---|
Condo | 3611 |
Four Family | 61 |
Single Family | 22891 |
Three Family | 327 |
Two Family | 1177 |
Litchfield %>%
group_by(sale_year, residential_type) %>%
ggplot(aes(x = sale_year, fill = residential_type)) +
geom_bar(aes(stat="identity")) +
facet_wrap(vars(residential_type)) +
labs(title = "Number of Properties Sold Each Year in Litchfield: 2008-2020",
subtitle = "Displayed by Property Type",
x = "Year 2008-2020",
y = "Numnber of Properties") +
theme(legend.position = "none")
Litchfield %>%
filter(residential_type == "Four Family") %>%
group_by(sale_year) %>%
count(residential_type) %>%
rename("Year" = "sale_year",
"Residential Type" = "residential_type",
"Total Sold" = "n") %>%
knitr::kable(caption = "Counts of Four Family Properties Sold Each Year") %>%
kable_classic(full_width = F, position = "left") %>%
row_spec(4, background = "yellow")
Year | Residential Type | Total Sold |
---|---|---|
2008 | Four Family | 3 |
2009 | Four Family | 2 |
2010 | Four Family | 4 |
2011 | Four Family | 1 |
2012 | Four Family | 3 |
2013 | Four Family | 3 |
2014 | Four Family | 5 |
2015 | Four Family | 3 |
2016 | Four Family | 5 |
2017 | Four Family | 4 |
2018 | Four Family | 10 |
2019 | Four Family | 10 |
2020 | Four Family | 8 |
Litchfield %>%
group_by(town, year = lubridate::floor_date(sale_date, "year")) %>%
summarize(meanL = mean(sale_price)) %>%
ggplot(aes(x = year, y = meanL)) +
geom_line(aes(color = town)) +
facet_wrap(vars(town)) +
guides(color = "none")
L08 <- Litchfield_SF %>%
group_by(town, sale_year) %>%
summarise(mean_L08 = mean(sale_price)) %>%
filter(sale_year == "2008")
L20 <- Litchfield_SF %>%
group_by(town, sale_year) %>%
summarise(mean_L20 = mean(sale_price)) %>%
filter(sale_year == "2020")
L_combined <- L08 %>%
left_join(L20, by = "town")
L_combined <- L_combined %>%
mutate(L_delta_mean = mean_L20 - mean_L08) %>%
mutate(L_c_mean = (mean_L20 - mean_L08)/mean_L08) %>%
mutate(L_c_mean = L_c_mean * 100) %>%
mutate(across(contains("mean"), round, 2))
L_combined %>%
mutate(Color = ifelse(L_c_mean < 0, "rosybrown","steelblue")) %>%
ggplot(aes(x = reorder(town, +L_c_mean),
y = L_c_mean,
fill = Color)) +
geom_col()+
geom_text(aes(label = L_c_mean,
vjust = 1.2),
size = 3) +
labs(title = "Percentage Change in Mean Sale Price: 2008 to 2020",
x = "County Name",
y = "Percent Change") +
theme(axis.text.x = element_text(angle = 45,
hjust = 1)) +
scale_fill_identity(guide = "none")
# A tibble: 337 × 3
# Groups: sale_year [13]
sale_year town n
<dbl> <chr> <int>
1 2008 Barkhamsted 27
2 2008 Bethlehem 22
3 2008 Bridgewater 12
4 2008 Canaan 5
5 2008 Colebrook 18
6 2008 Cornwall 22
7 2008 Goshen 9
8 2008 Harwinton 32
9 2008 Kent 33
10 2008 Litchfield 71
# … with 327 more rows
pred <- auto.arima(ts_object)
forcast1 <- forecast(pred, 10)
print(forcast1)
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
4663 371356.5 351916.6 390796.4 341625.8 401087.2
4670 367049.0 346515.0 387583.0 335645.0 398453.1
4677 361690.1 340159.0 383221.2 328761.2 394619.0
4684 356143.5 333425.6 378861.3 321399.5 390887.4
4691 350616.8 326593.5 374640.1 313876.3 387357.3
4698 345227.6 319872.5 370582.7 306450.4 384004.8
4705 340062.3 313421.0 366703.6 299317.9 380806.7
4712 335184.9 307350.6 363019.1 292616.1 377753.6
4719 330640.4 301734.3 359546.4 286432.4 374848.4
4726 326458.0 296613.8 356302.2 280815.3 372100.7
plot(forcast1)
Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".
For attribution, please cite this work as
Farrell (2022, April 3). Data Analytics and Computational Social Science: Homework 6: Incomplete. Retrieved from https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomjnfarrell211884388/
BibTeX citation
@misc{farrell2022homework, author = {Farrell, Joseph}, title = {Data Analytics and Computational Social Science: Homework 6: Incomplete}, url = {https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomjnfarrell211884388/}, year = {2022} }