Connecticut Real Estate Sales Data
connecticut <- read_csv("/Users/nelsonfarrell/Downloads/Real_Estate_Sales_2001-2019_GL.csv")
connecticut
str(connecticut)
spec_tbl_df [930,621 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ Serial Number : num [1:930621] 141466 140604 14340 140455 141195 ...
$ List Year : num [1:930621] 2014 2014 2014 2014 2014 ...
$ Date Recorded : chr [1:930621] "08/06/2015" "06/29/2015" "07/01/2015" "04/30/2015" ...
$ Town : chr [1:930621] "Stamford" "New Haven" "Ridgefield" "New Britain" ...
$ Address : chr [1:930621] "83 OVERBROOK DRIVE" "56 HIGHVIEW LANE" "32 OVERLOOK DR" "171 BRADFORD WALK" ...
$ Assessed Value : num [1:930621] 503270 86030 351880 204680 229330 ...
$ Sale Amount : num [1:930621] 850000 149900 570000 261000 250000 ...
$ Sales Ratio : num [1:930621] 0.592 0.574 0.617 0.784 0.917 ...
$ Property Type : chr [1:930621] "Residential" "Residential" "Residential" "Residential" ...
$ Residential Type: chr [1:930621] "Single Family" "Single Family" "Single Family" "Condo" ...
$ Non Use Code : chr [1:930621] NA NA NA NA ...
$ Assessor Remarks: chr [1:930621] NA NA NA NA ...
$ OPM remarks : chr [1:930621] NA NA NA NA ...
$ Location : chr [1:930621] "POINT (-73.525969 41.081897)" "POINT (-72.878115 41.30285)" "POINT (-73.508273 41.286223)" "POINT (-72.775592 41.713335)" ...
- attr(*, "spec")=
.. cols(
.. `Serial Number` = col_double(),
.. `List Year` = col_double(),
.. `Date Recorded` = col_character(),
.. Town = col_character(),
.. Address = col_character(),
.. `Assessed Value` = col_double(),
.. `Sale Amount` = col_double(),
.. `Sales Ratio` = col_double(),
.. `Property Type` = col_character(),
.. `Residential Type` = col_character(),
.. `Non Use Code` = col_character(),
.. `Assessor Remarks` = col_character(),
.. `OPM remarks` = col_character(),
.. Location = col_character()
.. )
- attr(*, "problems")=<externalptr>
Serial number
, Non Use Code
, Assessor Remarks
, Sales Ratio
, and OPM Remarks
connecticut <- select(connecticut,
"List Year",
"Date Recorded",
"Town",
"Assessed Value",
"Sale Amount",
"Property Type",
"Residential Type",
"Location")
head(connecticut) #view first 6 rows of `connecticut`
# A tibble: 6 × 8
`List Year` `Date Recorded` Town `Assessed Value` `Sale Amount`
<dbl> <chr> <chr> <dbl> <dbl>
1 2014 08/06/2015 Stamford 503270 850000
2 2014 06/29/2015 New Haven 86030 149900
3 2014 07/01/2015 Ridgefie… 351880 570000
4 2014 04/30/2015 New Brit… 204680 261000
5 2014 06/26/2015 Stamford 229330 250000
6 2014 06/29/2015 West Har… 356300 600000
# … with 3 more variables: `Property Type` <chr>,
# `Residential Type` <chr>, Location <chr>
Property Type
and Residential Type
# A tibble: 18 × 2
`Property Type` n
<chr> <int>
1 A 2210
2 Apartments 2922
3 C 3
4 C/I/U 11913
5 Commercial 19404
6 Condo 9932
7 Four Family 237
8 Industrial 2551
9 Public Utility 66
10 R 290287
11 Residential 494147
12 Single Family 40518
13 Three Family 1228
14 Two Family 2411
15 UA 11
16 V 24479
17 Vacant Land 23414
18 <NA> 4888
# A tibble: 6 × 2
`Residential Type` n
<chr> <int>
1 Condo 105420
2 Four Family 2150
3 Single Family 401612
4 Three Family 12586
5 Two Family 26408
6 <NA> 382445
Property Type
and Residential Type
, it appears that looking at only residential properties (including different types) will be most revealing so I will remove Property Type
and filter out all the NAs (which would be non-residential properties) from Residential Type
.Property Type
Column
connecticut <- select(connecticut,
"List Year",
"Date Recorded",
"Town",
"Assessed Value",
"Sale Amount",
"Residential Type",
"Location")
Residential Type
connecticut <- connecticut %>%
na.omit(`Residential Type`)
connecticut %>%
count(`Residential Type`)
# A tibble: 5 × 2
`Residential Type` n
<chr> <int>
1 Condo 76907
2 Four Family 1690
3 Single Family 303731
4 Three Family 10194
5 Two Family 21184
residential_type
connecticut <- rename(connecticut,
"list_year" = "List Year",
"sale_date" = "Date Recorded",
"assessed_price" = "Assessed Value",
"sale_price" = "Sale Amount",
"residential_type" = "Residential Type",
"location" = "Location",
"town" = "Town")
List and description of remaining variables
list_year
: numeric, year the property was listedsale_date
: Date, date the property was soldassessed_price
: numeric, price used in tax evaluationsale_price
: numeric, price the property sold forresidential_type
: character, nominal, if residential, type of residential (i.e., single family, two family, etc.)location
: character, nominal, location of property in lat./long. (maybe useful to create a map later)town
: character, nominal, town location propertyPotential Research Questions
head(connecticut)
# A tibble: 6 × 7
list_year sale_date town assessed_price sale_price residential_type
<dbl> <chr> <chr> <dbl> <dbl> <chr>
1 2014 08/06/20… Stam… 503270 850000 Single Family
2 2014 06/29/20… New … 86030 149900 Single Family
3 2014 07/01/20… Ridg… 351880 570000 Single Family
4 2014 04/30/20… New … 204680 261000 Condo
5 2014 06/26/20… Stam… 229330 250000 Single Family
6 2014 06/29/20… West… 356300 600000 Single Family
# … with 1 more variable: location <chr>
tail(connecticut)
# A tibble: 6 × 7
list_year sale_date town assessed_price sale_price residential_type
<dbl> <chr> <chr> <dbl> <dbl> <chr>
1 2014 03/31/20… Litc… 220250 310000 Single Family
2 2014 02/19/20… Hamd… 220080 402500 Single Family
3 2014 12/18/20… Danb… 248500 428000 Single Family
4 2014 11/20/20… Naug… 175910 252500 Single Family
5 2014 06/18/20… Madi… 409200 580000 Single Family
6 2014 04/22/20… Ches… 379100 520000 Condo
# … with 1 more variable: location <chr>
sale_date
from character to date and create new column year
connecticut$sale_date <- as_date(connecticut$sale_date,
format = "%m/%d/%Y")
connecticut <- connecticut %>%
mutate(year = floor_date(sale_date,
unit = "year"))
str(connecticut)
tibble [413,706 × 8] (S3: tbl_df/tbl/data.frame)
$ list_year : num [1:413706] 2014 2014 2014 2014 2014 ...
$ sale_date : Date[1:413706], format: "2015-08-06" ...
$ town : chr [1:413706] "Stamford" "New Haven" "Ridgefield" "New Britain" ...
$ assessed_price : num [1:413706] 503270 86030 351880 204680 229330 ...
$ sale_price : num [1:413706] 850000 149900 570000 261000 250000 ...
$ residential_type: chr [1:413706] "Single Family" "Single Family" "Single Family" "Condo" ...
$ location : chr [1:413706] "POINT (-73.525969 41.081897)" "POINT (-72.878115 41.30285)" "POINT (-73.508273 41.286223)" "POINT (-72.775592 41.713335)" ...
$ year : Date[1:413706], format: "2015-01-01" ...
- attr(*, "na.action")= 'omit' Named int [1:516915] 9 13 17 18 19 25 27 28 33 42 ...
..- attr(*, "names")= chr [1:516915] "9" "13" "17" "18" ...
connecticut
head(connecticut, n = 75)
# A tibble: 75 × 8
list_year sale_date town assessed_price sale_price
<dbl> <date> <chr> <dbl> <dbl>
1 2014 2015-08-06 Stamford 503270 850000
2 2014 2015-06-29 New Haven 86030 149900
3 2014 2015-07-01 Ridgefield 351880 570000
4 2014 2015-04-30 New Britain 204680 261000
5 2014 2015-06-26 Stamford 229330 250000
6 2014 2015-06-29 West Hartford 356300 600000
7 2014 2014-10-17 Willington 236060 332900
8 2014 2015-08-05 Trumbull 183900 240000
9 2014 2014-12-02 Waterbury 75840 91000
10 2014 2015-07-01 Trumbull 178500 175000
# … with 65 more rows, and 3 more variables: residential_type <chr>,
# location <chr>, year <date>
tail(connecticut, n = 75)
# A tibble: 75 × 8
list_year sale_date town assessed_price sale_price
<dbl> <date> <chr> <dbl> <dbl>
1 2014 2015-06-02 Danbury 205900 382000
2 2014 2014-12-23 Easton 314300 536000
3 2014 2015-06-12 Colchester 210600 301800
4 2014 2015-06-04 Madison 451000 549000
5 2014 2014-10-15 Enfield 159720 121000
6 2014 2015-07-21 Danbury 203300 305000
7 2014 2015-07-07 Darien 677530 850000
8 2014 2015-04-29 East Hartford 103980 170000
9 2014 2015-06-12 Ellington 252780 378500
10 2014 2015-04-28 Hartford 12027 25090
# … with 65 more rows, and 3 more variables: residential_type <chr>,
# location <chr>, year <date>
assessed_price
and sale_price
summarize(connecticut,
mean_assessed_price = mean(assessed_price),
mean_sale_price = mean(sale_price),
median_assessed_price = median(assessed_price),
median_sale_price = median(sale_price),
sd_assessed_price =sd(assessed_price),
sd_sale_price = sd(sale_price))
# A tibble: 1 × 6
mean_assessed_pri… mean_sale_price median_assessed… median_sale_pri…
<dbl> <dbl> <dbl> <dbl>
1 243300. 339122. 159900 229000
# … with 2 more variables: sd_assessed_price <dbl>,
# sd_sale_price <dbl>
assessed_price
favstats(~assessed_price, data = connecticut)
min Q1 median Q3 max mean sd n missing
0 111440 159900 245980 881510000 243299.8 1457126 413706 0
sale_price
min Q1 median Q3 max mean sd n missing
0 148000 229000 358500 120000000 339121.6 608770 413706 0
residential_type
Condo Four Family Single Family Three Family Two Family
76907 1690 303731 10194 21184
# A tibble: 5 × 2
residential_type n
<chr> <int>
1 Condo 76907
2 Four Family 1690
3 Single Family 303731
4 Three Family 10194
5 Two Family 21184
residential_type
prop.table(table(select(connecticut, residential_type)))
Condo Four Family Single Family Three Family Two Family
0.185897715 0.004085027 0.734171126 0.024640687 0.051205445
town
variable
# A tibble: 169 × 2
town n
<chr> <int>
1 Bridgeport 15673
2 Waterbury 12509
3 Stamford 11776
4 Norwalk 9701
5 Fairfield 8878
6 Hamden 8746
7 New Haven 8362
8 West Hartford 8346
9 Danbury 8120
10 Milford 7926
# … with 159 more rows
town
variable
options(scipen = 999) #remove scientific notation
prop.table(table(select(connecticut, town)))
***Unknown*** Andover Ansonia Ashford
0.000002417175 0.000807336611 0.004481443344 0.001053888510
Avon Barkhamsted Beacon Falls Berlin
0.006492533345 0.000787999207 0.001899899929 0.007208017288
Bethany Bethel Bethlehem Bloomfield
0.001629176275 0.007169342480 0.000872600349 0.006485281818
Bolton Bozrah Branford Bridgeport
0.001522820554 0.000570453414 0.009086162637 0.037884391331
Bridgewater Bristol Brookfield Brooklyn
0.000601876695 0.018525232895 0.006098533741 0.003014217826
Burlington Canaan Canterbury Canton
0.003400965903 0.000352907620 0.001317360638 0.003676523908
Chaplin Cheshire Chester Clinton
0.000563201887 0.008846862265 0.001123986599 0.005448313537
Colchester Colebrook Columbia Cornwall
0.004508032274 0.000464097693 0.001588084292 0.000352907620
Coventry Cromwell Danbury Darien
0.004237308620 0.005242853621 0.019627464915 0.008092703514
Deep River Derby Durham East Granby
0.001588084292 0.003149579653 0.002296316708 0.002359163271
East Haddam East Hampton East Hartford East Haven
0.001723446119 0.005564337960 0.017481013087 0.009939425582
East Lyme East Windsor Eastford Easton
0.003604008644 0.002651641504 0.000345656094 0.002982794545
Ellington Enfield Essex Fairfield
0.005312951710 0.012397693048 0.002032844580 0.021459683930
Farmington Franklin Glastonbury Goshen
0.007210434463 0.000423005709 0.013415323926 0.001467225518
Granby Greenwich Griswold Groton
0.003558082310 0.011732969790 0.003526659028 0.007019477600
Guilford Haddam Hamden Hampton
0.007785722228 0.001051471335 0.021140616766 0.000493103798
Hartford Hartland Harwinton Hebron
0.015513432244 0.000464097693 0.001580832765 0.001600170169
Kent Killingly Killingworth Lebanon
0.001237593847 0.006581968838 0.002223801444 0.002337408691
Ledyard Lisbon Litchfield Lyme
0.003113322021 0.001177164460 0.001950660614 0.000894354928
Madison Manchester Mansfield Marlborough
0.006934876458 0.016025873446 0.004172044882 0.002117445722
Meriden Middlebury Middlefield Middletown
0.016151566571 0.002542868607 0.000848428594 0.012533054875
Milford Monroe Montville Morris
0.019158532871 0.006816434860 0.000246551899 0.000727569820
Naugatuck New Britain New Canaan New Fairfield
0.009791977878 0.018435797402 0.008080617637 0.004203468163
New Hartford New Haven New London New Milford
0.002368831972 0.020212421381 0.006569882960 0.008849279440
Newington Newtown Norfolk North Branford
0.012201901834 0.004802927683 0.000570453414 0.002496942273
North Canaan North Haven North Stonington Norwalk
0.000795250734 0.007742213069 0.001421299183 0.023449019352
Norwich Old Lyme Old Saybrook Orange
0.011479166365 0.003758707875 0.005008387599 0.003756290699
Oxford Plainfield Plainville Plymouth
0.004039100231 0.002588794941 0.005264608200 0.000908857981
Portland Preston Prospect Putnam
0.003241432322 0.001295606058 0.003019052177 0.003142328127
Redding Ridgefield Rocky Hill Roxbury
0.002434095710 0.008293329079 0.005929331458 0.000783164856
Salem Salisbury Scotland Seymour
0.001256931251 0.000727569820 0.000036257632 0.005387884150
Sharon Shelton Sherman Simsbury
0.001000710650 0.012871459442 0.001358452621 0.006502202047
Somers South Windsor Southbury Southington
0.002975543018 0.008409353502 0.008757426772 0.011111755691
Sprague Stafford Stamford Sterling
0.000785582032 0.003623346048 0.028464658477 0.001041802633
Stonington Stratford Suffield Thomaston
0.002774917453 0.017524522245 0.004435517010 0.002192378162
Thompson Tolland Torrington Trumbull
0.001464808342 0.004128535723 0.013623201017 0.011111755691
Union Vernon Voluntown Wallingford
0.000183705337 0.008672825630 0.000870183174 0.015779321547
Warren Washington Waterbury Waterford
0.000505189676 0.001515569027 0.030236448106 0.005919662756
Watertown West Hartford West Haven Westbrook
0.005320203236 0.020173746574 0.012922220127 0.002521114028
Weston Westport Wethersfield Willington
0.003342953692 0.010894209898 0.008112040918 0.001097397669
Wilton Winchester Windham Windsor
0.005233184919 0.003432389185 0.002146451828 0.008887954248
Windsor Locks Wolcott Woodbridge Woodbury
0.004524952503 0.005001136072 0.002936868211 0.003052892634
Woodstock
0.002516279677
residential_types
connecticut %>%
group_by(residential_type) %>%
summarize(mean_sale_price_type = mean(sale_price),
median_sale_price_type = median(sale_price),
sd_sale_price_type = sd(sale_price))
# A tibble: 5 × 4
residential_type mean_sale_price_… median_sale_pri… sd_sale_price_t…
<chr> <dbl> <dbl> <dbl>
1 Condo 255460. 175000 612254.
2 Four Family 244174. 190000 382695.
3 Single Family 376150. 250000 631711.
4 Three Family 180494. 155000 165494.
5 Two Family 195847. 163500 227566.
sale price
, check for outliers
connecticut %>%
group_by(residential_type) %>%
ggplot(aes(x = residential_type,
y = sale_price)) +
geom_violin(notch = TRUE) +
labs(title = "Distribution of Sale Price for Residential Types",
x = "Property Type",
y = "Sale Price") +
theme_light()
sale_price
connecticut %>%
group_by(residential_type) %>%
ggplot(aes(x = residential_type,
y = sale_price)) +
geom_violin(aes(fill = residential_type), notch = TRUE) +
labs(title = "Distribution of Sale Price for Residential Types",
x = "Residential Type",
y = "Sale Price") +
theme_light()
sale_price
connecticut %>%
ggplot(aes(x=sale_price, fill = residential_type)) +
geom_histogram(binwidth = 100000) +
labs(title = "Distribution of Sale Price",
ylab = "Count",
xlab = "Sale Price")
sale_price
facet wrapped by residential_type
connecticut %>%
ggplot(aes(x=sale_price, fill = residential_type)) +
geom_histogram(binwidth = 200000) +
labs(title = "Distribution of Sale Price: Residential Type",
ylab = "Count",
xlab = "Sale Price") +
theme_bw() +
facet_wrap(vars(residential_type),
scales = "free_y",
"free_x")
sale_price
after being trimmedfavstats(~sale_price, data = connecticut)
min Q1 median Q3 max mean sd n missing
35001 150000 229000 350000 1374000 283812.3 208105.4 392896 0
sale price
of each town in Connecticut trimmed another 10%
connecticut %>%
group_by(town, residential_type) %>%
summarize(trimmed_mean_10 = mean(sale_price, trim = .1)) %>%
ggplot(aes(x = town, y = trimmed_mean_10, fill = residential_type)) +
geom_bar(stat = "identity") +
labs(title = "Trimmed Mean (10%) of Sale Price of Towns in Connecticut",
y = "Price",
x = "Town") +
theme_classic()
town
and trimmed_mean_025
(the mean trimming the highest and lowest 10%).# A tibble: 18 × 2
year mean_year
<date> <dbl>
1 2001-01-01 480542.
2 2004-01-01 458225
3 2005-01-01 180333.
4 2006-01-01 313608.
5 2007-01-01 328287.
6 2008-01-01 295805.
7 2009-01-01 268327.
8 2010-01-01 282171.
9 2011-01-01 275290.
10 2012-01-01 287261.
11 2013-01-01 280918.
12 2014-01-01 270095.
13 2015-01-01 276110.
14 2016-01-01 270123.
15 2017-01-01 283867.
16 2018-01-01 274166.
17 2019-01-01 272908.
18 2020-01-01 307886.
year
# A tibble: 392,896 × 8
# Groups: sale_date, year [4,137]
list_year sale_date town assessed_price sale_price
<dbl> <date> <chr> <dbl> <dbl>
1 2017 2001-08-01 Shelton 390810 635000
2 2017 2001-08-23 Shelton 427910 558000
3 2006 2001-09-05 Newington 166110 256000
4 2006 2001-09-25 Naugatuck 183850 293711
5 2006 2001-10-27 Greenwich 573930 660000
6 2006 2004-07-27 South Windsor 113580 210000
7 2006 2004-08-02 Stratford 92750 179000
8 2006 2004-09-24 South Windsor 123520 243900
9 2007 2004-11-02 Westport 869600 1200000
10 2007 2005-02-05 Meriden 155340 264000
# … with 392,886 more rows, and 3 more variables:
# residential_type <chr>, location <chr>, year <date>
mean_bottom_5
# A tibble: 5 × 2
town mean_bottom_5
<chr> <dbl>
1 Waterbury 114718.
2 Torrington 140221.
3 New Britain 140224.
4 Norwich 143596.
5 East Hartford 144837.
mean_top_5
# A tibble: 5 × 2
town mean_top_5
<chr> <dbl>
1 Darien 867241.
2 New Canaan 856889.
3 Westport 820937.
4 Greenwich 762582.
5 Weston 758676.
top_5 <- mean_top_5$town
connecticut %>%
group_by(town, year) %>%
summarize(mean_year = mean(sale_price)) %>%
filter(town %in% top_5) %>%
ggplot(aes(x = year, y = mean_year)) +
geom_line(aes(color = town)) +
ggtitle("Towns in Connecticut with the Highest Mean Sale Price: Top 5") +
ylab("Sale Price") +
xlab("Year") +
theme_classic()
mean_year
, year
, and town
.residential_type
first and look at each property type separately.connecticut %>%
group_by(town, year) %>%
summarize(mean_year = mean(sale_price)) %>%
filter(town %in% top_5) %>%
ggplot(aes(x = year, y = mean_year)) +
geom_line(aes(color = town)) +
geom_smooth() +
ggtitle("Towns With Highest Mean Sale Price: Top 5") +
ylab("Sale Price") +
xlab("Year") +
theme_classic() +
facet_wrap(vars(town))
mean_monthly_sale_price
grouped by residential_type
connecticut %>%
group_by(residential_type, month = lubridate::floor_date(sale_date, "month")) %>%
summarize(mean_monthy_sale_price = mean(sale_price)) %>%
ggplot(aes(x = month, y = mean_monthy_sale_price)) +
geom_point((aes(color = residential_type))) +
geom_smooth() +
ggtitle("Mean Monthly Sale Price of Residential Types") +
ylab("Mean Sale Price") +
xlab("Time") +
theme_linedraw()
residential_type
with confidence intervals and regression line
connecticut %>%
group_by(residential_type, year) %>%
summarize(mean_year_type = mean(sale_price)) %>%
ggplot(aes(x = year, y = mean_year_type)) +
geom_point((aes(color = residential_type))) +
geom_smooth() +
ggtitle("Yearly Mean Sale Price: Residential Types") +
ylab("Mean Sale Price") +
xlab("Time") +
theme_linedraw() +
facet_wrap(vars(residential_type))
residential_types
.con_map <- map_data("county",
"connecticut")
ggplot() +
geom_polygon(data = con_map,
aes(x = long, y = lat, group = group),
color = "black", fill = "slateblue1")
mapping
: Joining connecticut
and county_town
data
mapping <-
left_join(connecticut, county_town, by = "town") %>%
group_by(subregion) %>%
summarize(county_mean = mean(sale_price))
mapping
# A tibble: 9 × 2
subregion county_mean
<chr> <dbl>
1 fairfield 419357.
2 hartford 235023.
3 litchfield 253131.
4 middlesex 276324.
5 new haven 237101.
6 new london 233944.
7 tolland 233742.
8 windham 189834.
9 <NA> 282450
mapping
object with con_map
data
mapping <- inner_join(mapping, con_map, by = "subregion")
ggplot(mapping, aes(x = long, y = lat)) +
geom_polygon(aes(fill = county_mean, group = subregion)) +
geom_text(data = county_names, aes(long, lat, label = subregion), size =3) +
scale_fill_gradient(low = "bisque4", high = "bisque2") +
theme_light()
sale_price
sale price
for each town in 2006
sale price
for each town in 2018
mean_2006
and mean_2018
mean_2006
# A tibble: 130 × 3
# Groups: town [130]
town year mean_3
<chr> <date> <dbl>
1 Ansonia 2006-01-01 240118.
2 Ashford 2006-01-01 236024.
3 Avon 2006-01-01 412245.
4 Barkhamsted 2006-01-01 268300
5 Berlin 2006-01-01 271868.
6 Bethany 2006-01-01 377326.
7 Bethel 2006-01-01 340494.
8 Bethlehem 2006-01-01 289000.
9 Bloomfield 2006-01-01 305865.
10 Bolton 2006-01-01 286017.
# … with 120 more rows
mean_2018
# A tibble: 168 × 3
# Groups: town [168]
town year mean_3
<chr> <date> <dbl>
1 Andover 2018-01-01 236506.
2 Ansonia 2018-01-01 197108.
3 Ashford 2018-01-01 187244.
4 Avon 2018-01-01 389546.
5 Barkhamsted 2018-01-01 301730.
6 Beacon Falls 2018-01-01 238719.
7 Berlin 2018-01-01 254156.
8 Bethany 2018-01-01 373936.
9 Bethel 2018-01-01 325866.
10 Bethlehem 2018-01-01 311912.
# … with 158 more rows
mean_2006
and mean_2018
and create new column delta_mean
and c_mean
combined <- mean_2006 %>%
left_join(mean_2018, by = "town") %>%
mutate(delta_mean = mean_3.y - mean_3.x) %>%
mutate(c_mean = (mean_3.y - mean_3.x)/mean_3.x)
combined
# A tibble: 130 × 7
# Groups: town [130]
town year.x mean_3.x year.y mean_3.y delta_mean c_mean
<chr> <date> <dbl> <date> <dbl> <dbl> <dbl>
1 Ansonia 2006-01-01 240118. 2018-01-01 197108. -43010. -0.179
2 Ashford 2006-01-01 236024. 2018-01-01 187244. -48780. -0.207
3 Avon 2006-01-01 412245. 2018-01-01 389546. -22699. -0.0551
4 Barkha… 2006-01-01 268300 2018-01-01 301730. 33430. 0.125
5 Berlin 2006-01-01 271868. 2018-01-01 254156. -17712. -0.0651
6 Bethany 2006-01-01 377326. 2018-01-01 373936. -3390. -0.00898
7 Bethel 2006-01-01 340494. 2018-01-01 325866. -14628. -0.0430
8 Bethle… 2006-01-01 289000. 2018-01-01 311912. 22913. 0.0793
9 Bloomf… 2006-01-01 305865. 2018-01-01 213458. -92406. -0.302
10 Bolton 2006-01-01 286017. 2018-01-01 271425. -14592. -0.0510
# … with 120 more rows
combined
combined <- rename(combined,
"mean_06" = "mean_3.x",
"mean_18" = "mean_3.y")
combined <-
select(combined,
"mean_06",
"mean_18",
"delta_mean",
"c_mean",
"town")
# A tibble: 6 × 5
# Groups: town [6]
mean_06 mean_18 delta_mean c_mean town
<dbl> <dbl> <dbl> <dbl> <chr>
1 216725 285550 68825 0.318 Franklin
2 386750 481049. 94299. 0.244 Sharon
3 436000 528995. 92995. 0.213 Salisbury
4 273850 324583. 50733. 0.185 Colebrook
5 248727. 289751. 41024. 0.165 Chester
6 298083. 341824. 43740. 0.147 Morris
delta_top_6b <- delta_top_6$town
connecticut %>%
group_by(town, year, residential_type) %>%
summarise(mean5 = mean(sale_price)) %>%
filter(town %in% delta_top_6b) %>%
ggplot(aes(x = year,
y = mean5)) +
geom_line(aes(color = town)) +
geom_smooth() +
facet_wrap(vars(town))
Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".
For attribution, please cite this work as
Farrell (2022, March 12). Data Analytics and Computational Social Science: Homework 5: Complete. Retrieved from https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomjnfarrell211875890/
BibTeX citation
@misc{farrell2022homework, author = {Farrell, Joseph}, title = {Data Analytics and Computational Social Science: Homework 5: Complete}, url = {https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomjnfarrell211875890/}, year = {2022} }