HW_3

Identifying the dataset for final project

MANI KANTA GOGULA
2022-03-02

#1. Identify the dataset you will be using for the final project

I will be using the data set of hotel bookings from the course dataset .

#Importing the dataset

library(readr)
hotel_bookings_ <- read_csv("hotel_bookings .csv")

#Preview of the Dataset

head(hotel_bookings_)
# A tibble: 6 x 32
  hotel        is_canceled lead_time arrival_date_ye~ arrival_date_mo~
  <chr>              <dbl>     <dbl>            <dbl> <chr>           
1 Resort Hotel           0       342             2015 July            
2 Resort Hotel           0       737             2015 July            
3 Resort Hotel           0         7             2015 July            
4 Resort Hotel           0        13             2015 July            
5 Resort Hotel           0        14             2015 July            
6 Resort Hotel           0        14             2015 July            
# ... with 27 more variables: arrival_date_week_number <dbl>,
#   arrival_date_day_of_month <dbl>, stays_in_weekend_nights <dbl>,
#   stays_in_week_nights <dbl>, adults <dbl>, children <dbl>,
#   babies <dbl>, meal <chr>, country <chr>, market_segment <chr>,
#   distribution_channel <chr>, is_repeated_guest <dbl>,
#   previous_cancellations <dbl>,
#   previous_bookings_not_canceled <dbl>, ...

#USING THE FUNCTION dim() to get the dimensions of the dataset

dim(hotel_bookings_)
[1] 119390     32

Selecting the country and is_repeated_guest column from the dataset using select() function to see which country customers repeated

select(hotel_bookings_,country,is_repeated_guest)
# A tibble: 119,390 x 2
   country is_repeated_guest
   <chr>               <dbl>
 1 PRT                     0
 2 PRT                     0
 3 GBR                     0
 4 GBR                     0
 5 GBR                     0
 6 GBR                     0
 7 PRT                     0
 8 PRT                     0
 9 PRT                     0
10 PRT                     0
# ... with 119,380 more rows

#Applying filter() function to the datset to see how many customers need car parking space.

filter(hotel_bookings_,required_car_parking_spaces==1)
# A tibble: 7,383 x 32
   hotel       is_canceled lead_time arrival_date_ye~ arrival_date_mo~
   <chr>             <dbl>     <dbl>            <dbl> <chr>           
 1 Resort Hot~           0        78             2015 July            
 2 Resort Hot~           0        99             2015 July            
 3 Resort Hot~           0         3             2015 July            
 4 Resort Hot~           0         1             2015 July            
 5 Resort Hot~           0         1             2015 July            
 6 Resort Hot~           0         5             2015 July            
 7 Resort Hot~           0        10             2015 July            
 8 Resort Hot~           0         3             2015 July            
 9 Resort Hot~           0        72             2015 July            
10 Resort Hot~           0         9             2015 July            
# ... with 7,373 more rows, and 27 more variables:
#   arrival_date_week_number <dbl>, arrival_date_day_of_month <dbl>,
#   stays_in_weekend_nights <dbl>, stays_in_week_nights <dbl>,
#   adults <dbl>, children <dbl>, babies <dbl>, meal <chr>,
#   country <chr>, market_segment <chr>, distribution_channel <chr>,
#   is_repeated_guest <dbl>, previous_cancellations <dbl>,
#   previous_bookings_not_canceled <dbl>, ...

#Arranging the datset based on the reservation using arrange() function

arrange(hotel_bookings_,reservation_status_date)
# A tibble: 119,390 x 32
   hotel       is_canceled lead_time arrival_date_ye~ arrival_date_mo~
   <chr>             <dbl>     <dbl>            <dbl> <chr>           
 1 Resort Hot~           1       344             2015 September       
 2 Resort Hot~           1       399             2015 December        
 3 Resort Hot~           1       202             2015 July            
 4 City Hotel            1       258             2015 July            
 5 City Hotel            1       258             2015 July            
 6 City Hotel            1       258             2015 July            
 7 City Hotel            1       258             2015 July            
 8 City Hotel            1       258             2015 July            
 9 City Hotel            1       258             2015 July            
10 City Hotel            1       258             2015 July            
# ... with 119,380 more rows, and 27 more variables:
#   arrival_date_week_number <dbl>, arrival_date_day_of_month <dbl>,
#   stays_in_weekend_nights <dbl>, stays_in_week_nights <dbl>,
#   adults <dbl>, children <dbl>, babies <dbl>, meal <chr>,
#   country <chr>, market_segment <chr>, distribution_channel <chr>,
#   is_repeated_guest <dbl>, previous_cancellations <dbl>,
#   previous_bookings_not_canceled <dbl>, ...

#Identify potential research questions that your dataset can help answer.

1.Which country customers are mostly repeated customers? 2.How many customers are checking in through the ditsribution market as corporate?

Reuse

Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".

Citation

For attribution, please cite this work as

GOGULA (2022, March 6). Data Analytics and Computational Social Science: HW_3. Retrieved from https://github.com/DACSS/dacss_course_website/posts/httpsrpubscommanikanta871555/

BibTeX citation

@misc{gogula2022hw_3,
  author = {GOGULA, MANI KANTA},
  title = {Data Analytics and Computational Social Science: HW_3},
  url = {https://github.com/DACSS/dacss_course_website/posts/httpsrpubscommanikanta871555/},
  year = {2022}
}