challenge_6
airbnb
Visualizing Time and Relationships
Author

Mani Shanker Kamarapu

Published

August 23, 2022

Code
library(tidyverse)
library(ggplot2)

knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)

Read in data

Code
airbnb <- read_csv("_data/AB_NYC_2019.csv")
print(summarytools::dfSummary(airbnb, 
                              varnumbers = FALSE, 
                              plain.ascii = FALSE, 
                              style = "grid", 
                              graph.magnif = 0.70, 
                              valid.col = FALSE), 
      method = 'render', 
      table.classes = 'table-condensed')

Data Frame Summary

airbnb

Dimensions: 48895 x 16
Duplicates: 0
Variable Stats / Values Freqs (% of Valid) Graph Missing
id [numeric]
Mean (sd) : 19017143 (10983108)
min ≤ med ≤ max:
2539 ≤ 19677284 ≤ 36487245
IQR (CV) : 19680234 (0.6)
48895 distinct values 0 (0.0%)
name [character]
1. Hillside Hotel
2. Home away from home
3. New york Multi-unit build
4. Brooklyn Apartment
5. Loft Suite @ The Box Hous
6. Private Room
7. Artsy Private BR in Fort
8. Private room
9. Beautiful Brooklyn Browns
10. Cozy Brooklyn Apartment
[ 47884 others ]
18(0.0%)
17(0.0%)
16(0.0%)
12(0.0%)
11(0.0%)
11(0.0%)
10(0.0%)
10(0.0%)
8(0.0%)
8(0.0%)
48758(99.8%)
16 (0.0%)
host_id [numeric]
Mean (sd) : 67620011 (78610967)
min ≤ med ≤ max:
2438 ≤ 30793816 ≤ 274321313
IQR (CV) : 99612390 (1.2)
37457 distinct values 0 (0.0%)
host_name [character]
1. Michael
2. David
3. Sonder (NYC)
4. John
5. Alex
6. Blueground
7. Sarah
8. Daniel
9. Jessica
10. Maria
[ 11442 others ]
417(0.9%)
403(0.8%)
327(0.7%)
294(0.6%)
279(0.6%)
232(0.5%)
227(0.5%)
226(0.5%)
205(0.4%)
204(0.4%)
46060(94.2%)
21 (0.0%)
neighbourhood_group [character]
1. Bronx
2. Brooklyn
3. Manhattan
4. Queens
5. Staten Island
1091(2.2%)
20104(41.1%)
21661(44.3%)
5666(11.6%)
373(0.8%)
0 (0.0%)
neighbourhood [character]
1. Williamsburg
2. Bedford-Stuyvesant
3. Harlem
4. Bushwick
5. Upper West Side
6. Hell's Kitchen
7. East Village
8. Upper East Side
9. Crown Heights
10. Midtown
[ 211 others ]
3920(8.0%)
3714(7.6%)
2658(5.4%)
2465(5.0%)
1971(4.0%)
1958(4.0%)
1853(3.8%)
1798(3.7%)
1564(3.2%)
1545(3.2%)
25449(52.0%)
0 (0.0%)
latitude [numeric]
Mean (sd) : 40.7 (0.1)
min ≤ med ≤ max:
40.5 ≤ 40.7 ≤ 40.9
IQR (CV) : 0.1 (0)
19048 distinct values 0 (0.0%)
longitude [numeric]
Mean (sd) : -74 (0)
min ≤ med ≤ max:
-74.2 ≤ -74 ≤ -73.7
IQR (CV) : 0 (0)
14718 distinct values 0 (0.0%)
room_type [character]
1. Entire home/apt
2. Private room
3. Shared room
25409(52.0%)
22326(45.7%)
1160(2.4%)
0 (0.0%)
price [numeric]
Mean (sd) : 152.7 (240.2)
min ≤ med ≤ max:
0 ≤ 106 ≤ 10000
IQR (CV) : 106 (1.6)
674 distinct values 0 (0.0%)
minimum_nights [numeric]
Mean (sd) : 7 (20.5)
min ≤ med ≤ max:
1 ≤ 3 ≤ 1250
IQR (CV) : 4 (2.9)
109 distinct values 0 (0.0%)
number_of_reviews [numeric]
Mean (sd) : 23.3 (44.6)
min ≤ med ≤ max:
0 ≤ 5 ≤ 629
IQR (CV) : 23 (1.9)
394 distinct values 0 (0.0%)
last_review [Date]
min : 2011-03-28
med : 2019-05-19
max : 2019-07-08
range : 8y 3m 10d
1764 distinct values 10052 (20.6%)
reviews_per_month [numeric]
Mean (sd) : 1.4 (1.7)
min ≤ med ≤ max:
0 ≤ 0.7 ≤ 58.5
IQR (CV) : 1.8 (1.2)
937 distinct values 10052 (20.6%)
calculated_host_listings_count [numeric]
Mean (sd) : 7.1 (33)
min ≤ med ≤ max:
1 ≤ 1 ≤ 327
IQR (CV) : 1 (4.6)
47 distinct values 0 (0.0%)
availability_365 [numeric]
Mean (sd) : 112.8 (131.6)
min ≤ med ≤ max:
0 ≤ 45 ≤ 365
IQR (CV) : 227 (1.2)
366 distinct values 0 (0.0%)

Generated by summarytools 1.0.1 (R version 4.2.1)
2022-08-28

Code
airbnb <- airbnb %>%
  select("neighbourhood_group", "room_type", "price", "number_of_reviews", "last_review")
print(summarytools::dfSummary(airbnb, 
                              varnumbers = FALSE, 
                              plain.ascii = FALSE, 
                              style = "grid", 
                              graph.magnif = 0.70, 
                              valid.col = FALSE), 
      method = 'render', 
      table.classes = 'table-condensed')

Data Frame Summary

airbnb

Dimensions: 48895 x 5
Duplicates: 9104
Variable Stats / Values Freqs (% of Valid) Graph Missing
neighbourhood_group [character]
1. Bronx
2. Brooklyn
3. Manhattan
4. Queens
5. Staten Island
1091(2.2%)
20104(41.1%)
21661(44.3%)
5666(11.6%)
373(0.8%)
0 (0.0%)
room_type [character]
1. Entire home/apt
2. Private room
3. Shared room
25409(52.0%)
22326(45.7%)
1160(2.4%)
0 (0.0%)
price [numeric]
Mean (sd) : 152.7 (240.2)
min ≤ med ≤ max:
0 ≤ 106 ≤ 10000
IQR (CV) : 106 (1.6)
674 distinct values 0 (0.0%)
number_of_reviews [numeric]
Mean (sd) : 23.3 (44.6)
min ≤ med ≤ max:
0 ≤ 5 ≤ 629
IQR (CV) : 23 (1.9)
394 distinct values 0 (0.0%)
last_review [Date]
min : 2011-03-28
med : 2019-05-19
max : 2019-07-08
range : 8y 3m 10d
1764 distinct values 10052 (20.6%)

Generated by summarytools 1.0.1 (R version 4.2.1)
2022-08-28

Code
airbnb <- airbnb %>%
  drop_na()

Time Dependent Visualization

Code
airbnb %>%
  ggplot(aes(last_review, fill = neighbourhood_group)) + 
  geom_density(alpha = 0.9, adjust = 5) +
  facet_wrap(vars(neighbourhood_group)) +
  theme_minimal() +
  labs(title = "Last Review dates of different neighbourhood groups")

Code
airbnb %>%
  ggplot(aes(last_review, fill = room_type)) + 
  geom_density(alpha=0.9) +
  facet_wrap(vars(room_type)) +
  theme_minimal() +
  labs(title = "Last Review dates of different rooms")

Code
airbnb %>%
  ggplot(aes(last_review, number_of_reviews)) +
  geom_line() +
  theme_minimal() +
  labs(title = "Last review date of AB_NYC_2019")

Visualizing Part-Whole Relationships

Code
airbnb %>%
  ggplot(aes(last_review, price, color = room_type)) +
  geom_point(size = .5) +
  geom_line() +
  theme_minimal() +
  labs(title = "Price in differnt years according to room types")

Code
airbnb %>%
  ggplot(aes(last_review, price, color = neighbourhood_group)) +
  geom_point(size = .5) +
  geom_line() +
  theme_minimal() +
  labs(title = "Price in different years according to neighbourhood groups")  

Code
airbnb %>%
  ggplot(aes(room_type, price, color = room_type)) +
  geom_point() +
  theme_minimal() +
  facet_wrap(vars(neighbourhood_group)) +
  theme(axis.text.x = element_text(angle = 90,hjust = 1)) +
  labs(title = "Price in different rooms according to neighbourhood groups")