Code
library(tidyverse)
library(ggplot2)
::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE) knitr
Mani Shanker Kamarapu
August 22, 2022
name host_name neighbourhood_group room_type
Length:48895 Length:48895 Length:48895 Length:48895
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
price number_of_reviews reviews_per_month
Min. : 0.0 Min. : 0.00 Min. : 0.010
1st Qu.: 69.0 1st Qu.: 1.00 1st Qu.: 0.190
Median : 106.0 Median : 5.00 Median : 0.720
Mean : 152.7 Mean : 23.27 Mean : 1.373
3rd Qu.: 175.0 3rd Qu.: 24.00 3rd Qu.: 2.020
Max. :10000.0 Max. :629.00 Max. :58.500
NA's :10052
calculated_host_listings_count availability_365
Min. : 1.000 Min. : 0.0
1st Qu.: 1.000 1st Qu.: 0.0
Median : 1.000 Median : 45.0
Mean : 7.144 Mean :112.8
3rd Qu.: 2.000 3rd Qu.:227.0
Max. :327.000 Max. :365.0
name host_name neighbourhood_group room_type
Length:48895 Length:48895 Length:48895 Length:48895
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
price number_of_reviews reviews_per_month
Min. : 0.0 Min. : 0.00 Min. : 0.000
1st Qu.: 69.0 1st Qu.: 1.00 1st Qu.: 0.040
Median : 106.0 Median : 5.00 Median : 0.370
Mean : 152.7 Mean : 23.27 Mean : 1.091
3rd Qu.: 175.0 3rd Qu.: 24.00 3rd Qu.: 1.580
Max. :10000.0 Max. :629.00 Max. :58.500
calculated_host_listings_count availability_365
Min. : 1.000 Min. : 0.0
1st Qu.: 1.000 1st Qu.: 0.0
Median : 1.000 Median : 45.0
Mean : 7.144 Mean :112.8
3rd Qu.: 2.000 3rd Qu.:227.0
Max. :327.000 Max. :365.0
---
title: "Challenge 5"
author: "Mani Shanker Kamarapu"
description: "Introduction to Visualization"
date: "08/22/2022"
format:
html:
df-print: paged
toc: true
css: styles.css
code-fold: true
code-copy: true
code-tools: true
categories:
- challenge_5
- AB_NYC_2019
---
```{r}
#| label: setup
#| warning: false
#| message: false
library(tidyverse)
library(ggplot2)
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)
```
## Read in data
```{r}
AB_NYC_2019 <- read_csv("_data/AB_NYC_2019.csv") %>%
select(!c("id", "host_id", "neighbourhood", "latitude", "longitude", "minimum_nights", "last_review"))
AB_NYC_2019
```
### Briefly describe the data
```{r}
dim(AB_NYC_2019)
```
```{r}
summary(AB_NYC_2019)
```
## Tidy Data (as needed)
```{r}
AB_NYC_2019 <- replace_na(AB_NYC_2019, list(reviews_per_month = 0))
AB_NYC_2019
```
```{r}
dim(AB_NYC_2019)
```
```{r}
summary(AB_NYC_2019)
```
## Univariate Visualizations
```{r}
ggplot(AB_NYC_2019, aes(price)) +
geom_histogram(fill = "blue") +
xlim(0,1700) +
labs(title = "Price of the room", x = "Price", y = "Frequency") +
theme_bw()
```
```{r}
ggplot(AB_NYC_2019, aes(number_of_reviews)) +
geom_histogram(fill = "lightgreen") +
xlim(0,400) +
labs(title = "No of reviews", x = "Reviews", y = "Frequency") +
theme_bw()
```
```{r}
ggplot(AB_NYC_2019, aes(reviews_per_month)) +
geom_histogram(fill = "red") +
xlim(0,15) +
labs(title = "Reviews in a month", x = "Monthly reviews", y = "Frequency") +
theme_bw()
```
```{r}
ggplot(AB_NYC_2019, aes(availability_365)) +
geom_histogram(fill = "orange") +
xlim(0,370) +
labs(title = "Availability of the room", x = "Availability", y = "Frequency") +
theme_bw()
```
## Bivariate Visualization(s)
```{r}
ggplot(AB_NYC_2019, aes(price, room_type)) +
geom_point() +
labs(title = "Price and Room Type", x = "Price", y = "Room Type") +
theme_bw()
```