NYC Real Estate Sales Data
nyc_rolling_sales <- read_csv("/Users/nelsonfarrell/Downloads/nyc-rolling-sales.csv")
colnames(nyc_rolling_sales)
[1] "...1" "BOROUGH"
[3] "NEIGHBORHOOD" "BUILDING CLASS CATEGORY"
[5] "TAX CLASS AT PRESENT" "BLOCK"
[7] "LOT" "EASE-MENT"
[9] "BUILDING CLASS AT PRESENT" "ADDRESS"
[11] "APARTMENT NUMBER" "ZIP CODE"
[13] "RESIDENTIAL UNITS" "COMMERCIAL UNITS"
[15] "TOTAL UNITS" "LAND SQUARE FEET"
[17] "GROSS SQUARE FEET" "YEAR BUILT"
[19] "TAX CLASS AT TIME OF SALE" "BUILDING CLASS AT TIME OF SALE"
[21] "SALE PRICE" "SALE DATE"
head(nyc_rolling_sales)
# A tibble: 6 × 22
...1 BOROUGH NEIGHBORHOOD `BUILDING CLASS…` `TAX CLASS AT …` BLOCK
<dbl> <dbl> <chr> <chr> <chr> <dbl>
1 4 1 ALPHABET CITY 07 RENTALS - WAL… 2A 392
2 5 1 ALPHABET CITY 07 RENTALS - WAL… 2 399
3 6 1 ALPHABET CITY 07 RENTALS - WAL… 2 399
4 7 1 ALPHABET CITY 07 RENTALS - WAL… 2B 402
5 8 1 ALPHABET CITY 07 RENTALS - WAL… 2A 404
6 9 1 ALPHABET CITY 07 RENTALS - WAL… 2 405
# … with 16 more variables: LOT <dbl>, `EASE-MENT` <lgl>,
# `BUILDING CLASS AT PRESENT` <chr>, ADDRESS <chr>,
# `APARTMENT NUMBER` <chr>, `ZIP CODE` <dbl>,
# `RESIDENTIAL UNITS` <dbl>, `COMMERCIAL UNITS` <dbl>,
# `TOTAL UNITS` <dbl>, `LAND SQUARE FEET` <chr>,
# `GROSS SQUARE FEET` <chr>, `YEAR BUILT` <dbl>,
# `TAX CLASS AT TIME OF SALE` <dbl>, …
str(nyc_rolling_sales)
spec_tbl_df [84,548 × 22] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ ...1 : num [1:84548] 4 5 6 7 8 9 10 11 12 13 ...
$ BOROUGH : num [1:84548] 1 1 1 1 1 1 1 1 1 1 ...
$ NEIGHBORHOOD : chr [1:84548] "ALPHABET CITY" "ALPHABET CITY" "ALPHABET CITY" "ALPHABET CITY" ...
$ BUILDING CLASS CATEGORY : chr [1:84548] "07 RENTALS - WALKUP APARTMENTS" "07 RENTALS - WALKUP APARTMENTS" "07 RENTALS - WALKUP APARTMENTS" "07 RENTALS - WALKUP APARTMENTS" ...
$ TAX CLASS AT PRESENT : chr [1:84548] "2A" "2" "2" "2B" ...
$ BLOCK : num [1:84548] 392 399 399 402 404 405 406 407 379 387 ...
$ LOT : num [1:84548] 6 26 39 21 55 16 32 18 34 153 ...
$ EASE-MENT : logi [1:84548] NA NA NA NA NA NA ...
$ BUILDING CLASS AT PRESENT : chr [1:84548] "C2" "C7" "C7" "C4" ...
$ ADDRESS : chr [1:84548] "153 AVENUE B" "234 EAST 4TH STREET" "197 EAST 3RD STREET" "154 EAST 7TH STREET" ...
$ APARTMENT NUMBER : chr [1:84548] NA NA NA NA ...
$ ZIP CODE : num [1:84548] 10009 10009 10009 10009 10009 ...
$ RESIDENTIAL UNITS : num [1:84548] 5 28 16 10 6 20 8 44 15 24 ...
$ COMMERCIAL UNITS : num [1:84548] 0 3 1 0 0 0 0 2 0 0 ...
$ TOTAL UNITS : num [1:84548] 5 31 17 10 6 20 8 46 15 24 ...
$ LAND SQUARE FEET : chr [1:84548] "1633" "4616" "2212" "2272" ...
$ GROSS SQUARE FEET : chr [1:84548] "6440" "18690" "7803" "6794" ...
$ YEAR BUILT : num [1:84548] 1900 1900 1900 1913 1900 ...
$ TAX CLASS AT TIME OF SALE : num [1:84548] 2 2 2 2 2 2 2 2 2 2 ...
$ BUILDING CLASS AT TIME OF SALE: chr [1:84548] "C2" "C7" "C7" "C4" ...
$ SALE PRICE : chr [1:84548] "6625000" "-" "-" "3936272" ...
$ SALE DATE : POSIXct[1:84548], format: "2017-07-19" ...
- attr(*, "spec")=
.. cols(
.. ...1 = col_double(),
.. BOROUGH = col_double(),
.. NEIGHBORHOOD = col_character(),
.. `BUILDING CLASS CATEGORY` = col_character(),
.. `TAX CLASS AT PRESENT` = col_character(),
.. BLOCK = col_double(),
.. LOT = col_double(),
.. `EASE-MENT` = col_logical(),
.. `BUILDING CLASS AT PRESENT` = col_character(),
.. ADDRESS = col_character(),
.. `APARTMENT NUMBER` = col_character(),
.. `ZIP CODE` = col_double(),
.. `RESIDENTIAL UNITS` = col_double(),
.. `COMMERCIAL UNITS` = col_double(),
.. `TOTAL UNITS` = col_double(),
.. `LAND SQUARE FEET` = col_character(),
.. `GROSS SQUARE FEET` = col_character(),
.. `YEAR BUILT` = col_double(),
.. `TAX CLASS AT TIME OF SALE` = col_double(),
.. `BUILDING CLASS AT TIME OF SALE` = col_character(),
.. `SALE PRICE` = col_character(),
.. `SALE DATE` = col_datetime(format = "")
.. )
- attr(*, "problems")=<externalptr>
nyc_rolling_sales$`SALE PRICE` <- as.numeric(nyc_rolling_sales$`SALE PRICE`)
nyc_rolling_sales <-select(nyc_rolling_sales, "BOROUGH", "NEIGHBORHOOD", "BUILDING CLASS AT PRESENT",
"RESIDENTIAL UNITS", "GROSS SQUARE FEET", "SALE PRICE", "SALE DATE")
nyc_rolling_sales <- rename(nyc_rolling_sales, "BUILDING CLASS" = "BUILDING CLASS AT PRESENT",
"SQ FOOTAGE" = "GROSS SQUARE FEET", "UNITS" = "RESIDENTIAL UNITS")
nyc_rolling_sales <- filter(nyc_rolling_sales, ! `UNITS` == "0" & ! `SALE PRICE` == "-" &
`SALE PRICE` > 100000 & `SALE PRICE` < 5000000)
arrange(nyc_rolling_sales, `SALE PRICE`)
# A tibble: 37,897 × 7
BOROUGH NEIGHBORHOOD `BUILDING CLASS` UNITS `SQ FOOTAGE`
<chr> <chr> <chr> <dbl> <chr>
1 Staten Island MIDLAND BEACH "A6" 1 588
2 Brooklyn PARK SLOPE SOUTH "C0" 3 2781
3 Queens FLUSHING-SOUTH "Condo 1 to 3 S… 1 -
4 Queens JAMAICA "S2" 2 3225
5 Bronx MELROSE/CONCOURSE "C0" 3 3972
6 Brooklyn BOERUM HILL "Condo Elevator… 1 0
7 Bronx PARKCHESTER "Condo Elevator… 1 0
8 Bronx PARKCHESTER "Condo Elevator… 1 0
9 Queens REGO PARK "B3" 2 2128
10 Bronx PARKCHESTER "Condo Elevator… 1 0
# … with 37,887 more rows, and 2 more variables: `SALE PRICE` <dbl>,
# `SALE DATE` <dttm>
head(nyc_rolling_sales)
# A tibble: 6 × 7
BOROUGH NEIGHBORHOOD `BUILDING CLASS` UNITS `SQ FOOTAGE`
<chr> <chr> <chr> <dbl> <chr>
1 Manhatten ALPHABET CITY C4 10 6794
2 Manhatten ALPHABET CITY C4 8 4226
3 Manhatten ALPHABET CITY Condo Walkup 1 -
4 Manhatten ALPHABET CITY Condo Elevator Building 1 -
5 Manhatten ALPHABET CITY Condo Elevator Building 1 -
6 Manhatten ALPHABET CITY Condo Elevator Building 1 -
# … with 2 more variables: `SALE PRICE` <dbl>, `SALE DATE` <dttm>
tally(~ `BUILDING CLASS`, data = nyc_rolling_sales)
BUILDING CLASS
A0 A1
307 4666
A2 A3
1895 238
A4 A5
91 4023
A6 A7
62 2
A9 B1
949 2718
B2 B3
3124 2437
B9 C0
918 2245
C1 C2
267 369
C3 C4
538 46
C5 C6
63 7
C7 C9
66 5
Condo 1 to 3 Stories Condo 2 to 10 Unit Building
1076 934
Condo Elevator Building Condo Walkup
8931 646
D1 D3
11 3
D4 D6
24 6
D7 D9
2 2
E9 F9
2 2
G0 G4
7 1
G9 HR
1 2
I7 K1
2 2
K2 K4
2 99
K9 M1
3 1
M9 N2
5 1
N9 O2
2 1
O5 O7
2 1
O8 P5
18 2
R6 RR
129 9
S0 S1
7 202
S2 S3
403 79
S4 S5
61 50
S9 V0
96 28
V1 W2
3 1
Z0 Z9
1 1
nyc_rolling_sales<- filter(nyc_rolling_sales, `BUILDING CLASS` == "Condo Elevator Building")
head(nyc_rolling_sales)
# A tibble: 6 × 7
BOROUGH NEIGHBORHOOD `BUILDING CLASS` UNITS `SQ FOOTAGE`
<chr> <chr> <chr> <dbl> <chr>
1 Manhatten ALPHABET CITY Condo Elevator Building 1 -
2 Manhatten ALPHABET CITY Condo Elevator Building 1 -
3 Manhatten ALPHABET CITY Condo Elevator Building 1 -
4 Manhatten ALPHABET CITY Condo Elevator Building 1 -
5 Manhatten ALPHABET CITY Condo Elevator Building 1 -
6 Manhatten ALPHABET CITY Condo Elevator Building 1 -
# … with 2 more variables: `SALE PRICE` <dbl>, `SALE DATE` <dttm>
Brooklyn_1 <- filter(nyc_rolling_sales, `BOROUGH` == "Brooklyn")
Manhatten_1 <- filter(nyc_rolling_sales, `BOROUGH` == "Manhatten")
options(scipen = 999) ##Remove Scientific Notation
boxplot(Brooklyn_1$`SALE PRICE`, Manhatten_1$`SALE PRICE`, horizontal = TRUE, names=c("Brooklyn", "Manhatten"),
main = "Sales of Single Unit Homes in Brooklyn & Manhatten", ylab = "Boroughs", xlab = "Sale Price")
favstats(Brooklyn_1$`SALE PRICE`)
min Q1 median Q3 max mean sd n missing
101825 682227 936270 1482500 4938512 1206647 806376.9 2159 0
favstats(Manhatten_1$`SALE PRICE`)
min Q1 median Q3 max mean sd n missing
150000 900000 1396500 2310000 4996841 1734470 1074408 4783 0
nyc_rolling_sales %>%
group_by(BOROUGH) %>%
select("SALE PRICE") %>%
summarize_all(median, na.rm = TRUE)
# A tibble: 5 × 2
BOROUGH `SALE PRICE`
<chr> <dbl>
1 Bronx 143500
2 Brooklyn 936270
3 Manhatten 1396500
4 Queens 643000
5 Staten Island 240318
Manhatten_1 %>%
group_by(NEIGHBORHOOD) %>%
select("SALE PRICE") %>%
summarize_all(median, na.rm = TRUE)
# A tibble: 38 × 2
NEIGHBORHOOD `SALE PRICE`
<chr> <dbl>
1 ALPHABET CITY 1310000
2 CHELSEA 1820000
3 CHINATOWN 1440000
4 CIVIC CENTER 3250000
5 CLINTON 1236250
6 EAST VILLAGE 1285000
7 FASHION 1735000
8 FINANCIAL 1450000
9 FLATIRON 2300000
10 GRAMERCY 1834270.
# … with 28 more rows
Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".
For attribution, please cite this work as
Farrell (2022, Feb. 16). Data Analytics and Computational Social Science: Homework3. Retrieved from https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomjnfarrell211866926/
BibTeX citation
@misc{farrell2022homework3, author = {Farrell, Joseph}, title = {Data Analytics and Computational Social Science: Homework3}, url = {https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomjnfarrell211866926/}, year = {2022} }