Challenge

Plotly, Gganimate, and Functions

Joseph Farrell
2022-04-06

Load Libraries

hide

Read in and clean data:

This is just data from final project that I cut and pasted in

hide
connecticut <- read_csv("/Users/nelsonfarrell/Downloads/Real_Estate_Sales_2001-2019_GL.csv")
county_town <- read_csv(("/Users/nelsonfarrell/Documents/501 Stats/Connecticut County:Towns.csv"))
county_town <- county_town %>%
  select("subregion", "town")

connecticut <- rename(connecticut, "town" = "Town")

connecticut <- 
  left_join(connecticut, county_town, by = "town")

connecticut <- select(connecticut, 
                      "List Year", 
                      "Date Recorded", 
                      "town", 
                      "Assessed Value", 
                      "Sale Amount",
                      "Residential Type",
                      "subregion")

connecticut <- connecticut %>%
  na.omit(`Residential Type`)

connecticut <- rename(connecticut, 
                      "list_year" = "List Year", 
                      "sale_date" = "Date Recorded", 
                      "assessed_price" = "Assessed Value", 
                      "sale_price" = "Sale Amount",
                      "property_type" = "Residential Type", 
                      "county" = "subregion")

connecticut$sale_date <- as_date(connecticut$sale_date, 
                                 format = "%m/%d/%Y")
connecticut <- connecticut %>%
  mutate(sale_year = year(sale_date))

connecticut <- connecticut %>%
  filter(sale_year > 2007)

connecticut <- connecticut %>%
  filter(sale_price < quantile(connecticut$sale_price, .975) & 
           sale_price > quantile(connecticut$sale_price, .025)) 

Gganimate: Geom_line:

hide
meh <-connecticut %>%
  group_by(year = lubridate::floor_date(sale_date, "year"), 
           property_type) %>%
  summarize(mean = mean(sale_price))

meh5 <-ggplot(meh, 
              aes(year, 
                  mean, 
                  color = property_type)) +
  geom_line() +
  transition_reveal(year) +
  labs(title = "Mean Sale Price",
                x = "Year",
                y = "Price ($)") +
  guides(color = guide_legend(title = "Property Type"))

 
animate(meh5)

Gganimate: Geom_point:

hide
meh2 <-ggplot(meh, 
              aes(year, 
                  mean, 
                  color = property_type)) +
  geom_point() +
  transition_time(year) +
  labs(title = "Mean Sale Price",
                x = "Year",
                y = "Price ($)") +
  guides(color = guide_legend(title = "Property Type"))

 
animate(meh2)

Plotly

hide
plotly <- connecticut %>%
  group_by(property_type, 
           month = lubridate::floor_date(sale_date, "month")) %>%
  summarize(mean_monthy_sale_price = mean(sale_price)) %>%
  ggplot(aes(x = month, 
             y = mean_monthy_sale_price)) +
  geom_point((aes(color = property_type))) +
  geom_smooth() +
  ggtitle("Mean Monthly Sale Price of Property Types") +
  ylab("Mean Sale Price ($)") +
  xlab("Year") +
  theme_linedraw() +
  guides(color = guide_legend(title = "Property Type"))

ggplotly(plotly)

Functions

This function finds the mean sale price of the groups entered

hide
mean_sale_price <- function(df, ...) {
  df %>%
    group_by(...) %>%
    dplyr::summarize(mean = mean(sale_price))
}

connecticut %>%
  mean_sale_price(town, sale_year)
# A tibble: 2,184 × 3
# Groups:   town [169]
   town    sale_year    mean
   <chr>       <dbl>   <dbl>
 1 Andover      2008 258944.
 2 Andover      2009 240323.
 3 Andover      2010 227498.
 4 Andover      2011 241943.
 5 Andover      2012 235318.
 6 Andover      2013 233322.
 7 Andover      2014 219865 
 8 Andover      2015 210757.
 9 Andover      2016 249672.
10 Andover      2017 226692.
# … with 2,174 more rows

Function to find the delta mean in ($) and in (%) of the entire column “town”

hide
delta_mean_sale_price <- function(df, ...) {
  a <- df %>%
    group_by(...) %>%
    dplyr::summarize(mean_a = mean(sale_price)) %>%
    filter(sale_year == "2008")
  
  b <- df %>%
    group_by(...) %>%
    dplyr::summarize(mean_b = mean(sale_price)) %>%
    filter(sale_year == "2020")
 
  a %>%
  left_join(b, by = "town") %>%
  mutate(delta_mean_town = mean_b - mean_a) %>%
  mutate(delta_mean_percent = (delta_mean_town/mean_a)*100) %>%
  mutate(across(contains("mean"), round, 2)) %>%
  select("town",
         "delta_mean_town",
         "delta_mean_percent")
}

connecticut %>%
  delta_mean_sale_price(town, sale_year)
# A tibble: 169 × 3
# Groups:   town [169]
   town         delta_mean_town delta_mean_percent
   <chr>                  <dbl>              <dbl>
 1 Andover               -3282.              -1.27
 2 Ansonia               -5165.              -2.39
 3 Ashford                -548.              -0.27
 4 Avon                 -82496.             -18.6 
 5 Barkhamsted           -5822.              -2.06
 6 Beacon Falls         -41526.             -12.2 
 7 Berlin                 7320.               2.56
 8 Bethany              -78237.             -17.9 
 9 Bethel                58070.              19.2 
10 Bethlehem            -45199.             -12.7 
# … with 159 more rows

Function to find the delta mean in ($) of specific town or county

hide
test <- function(df, col_name, value) {
  a <- df %>%
    filter({{col_name}} == {{value}} & sale_year == "2008") %>%
    summarise(town_mean_08 = mean(sale_price)) 
  b <- df %>%
    filter({{col_name}} == {{value}} & sale_year == "2020") %>%
    summarise(delta_mean = mean(sale_price)) 
  b - a 
}

 test(connecticut, county, "fairfield")
  delta_mean
1   49997.28

Function to return the towns with the five highest change in mean sale price

hide
max_mean <- function(df, ...) {
  a <- df %>%
    group_by(...) %>%
    dplyr::summarize(mean_a = mean(sale_price)) %>%
    filter(sale_year == "2008")
  
  b <- df %>%
    group_by(...) %>%
    dplyr::summarize(mean_b = mean(sale_price)) %>%
    filter(sale_year == "2020")
 
  a %>%
  left_join(b, by = "town") %>%
  mutate(delta_mean_town = mean_b - mean_a) %>%
  mutate(percent_change_mean = (delta_mean_town/mean_a)*100) %>%
  mutate(across(contains("mean"), 
                         round, 
                         2)) %>%
  select("town",
         "delta_mean_town",
         "percent_change_mean") %>%
   arrange(desc(delta_mean_town)) %>%
   head(n = 5)
}

connecticut %>%
  max_mean(town, sale_year)
# A tibble: 5 × 3
# Groups:   town [5]
  town       delta_mean_town percent_change_mean
  <chr>                <dbl>               <dbl>
1 Goshen             256402.               117. 
2 Cornwall           188251.                54.0
3 Salisbury          157199.                32.5
4 Washington         155603.                32.3
5 Warren             123385                 31.4

Reuse

Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".

Citation

For attribution, please cite this work as

Farrell (2022, April 6). Data Analytics and Computational Social Science: Challenge. Retrieved from https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomjnfarrell211887159/

BibTeX citation

@misc{farrell2022challenge,
  author = {Farrell, Joseph},
  title = {Data Analytics and Computational Social Science: Challenge},
  url = {https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomjnfarrell211887159/},
  year = {2022}
}