Error: `path` does not exist: 'Assignment_1_LujiaLi_folder/2019_brooklyn.xlsx'
Code
head(brooklyn2019)
Error in head(brooklyn2019): object 'brooklyn2019' not found
Code
# dimension of datasetdim(brooklyn2019)
Error in eval(expr, envir, enclos): object 'brooklyn2019' not found
Code
# variables/column namescolnames(brooklyn2019)
Error in is.data.frame(x): object 'brooklyn2019' not found
Code
# remove the special characters in variablescolnames(brooklyn2019) <-colnames(brooklyn2019) %>%gsub("\r", "", .) %>%# delete the special character "\r" in variablesgsub("\n", " ", .) %>%# replace the special character "\n" with space in variablesgsub(" $", "",. , perl = T) %>%# delete the space at the end of variablesgsub(" ", " ",. , perl = T) # replace the double spaces with single space in variables
Error in is.data.frame(x): object 'brooklyn2019' not found
Code
colnames(brooklyn2019)
Error in is.data.frame(x): object 'brooklyn2019' not found
Code
# remove duplicated rows in datasetbrooklyn2019 <-distinct(brooklyn2019)
Error in distinct(brooklyn2019): object 'brooklyn2019' not found
Code
str(brooklyn2019)
Error in str(brooklyn2019): object 'brooklyn2019' not found
Code
# subset dataset# c('BOROUGH','RESIDENTIAL UNITS','COMMERCIAL UNITS','TOTAL UNITS','LAND SQUARE FEET','GROSS SQUARE FEET',# 'YEAR BUILT','BUILDING CLASS AT TIME OF SALE','SALE PRICE','SALE DATE') %>%# subset(brooklyn2019, select = .) -> selected_brooklyn2019selected_brooklyn2019 <-subset(brooklyn2019, select =c('BOROUGH','RESIDENTIAL UNITS','COMMERCIAL UNITS','TOTAL UNITS','LAND SQUARE FEET','GROSS SQUARE FEET','YEAR BUILT','BUILDING CLASS AT TIME OF SALE','SALE PRICE','SALE DATE'))
Error in subset(brooklyn2019, select = c("BOROUGH", "RESIDENTIAL UNITS", : object 'brooklyn2019' not found
Code
str(selected_brooklyn2019)
Error in str(selected_brooklyn2019): object 'selected_brooklyn2019' not found
Code
# count the number and calculate the percentage of NA valuesNumNA_column <-function(x){sum(is.na(x))} # function to count the number of NA valuesapply(selected_brooklyn2019, 2, NumNA_column) # number of NA in each column
Error in apply(selected_brooklyn2019, 2, NumNA_column): object 'selected_brooklyn2019' not found
Code
PercentageNA_column <-function(x){sum(is.na(x))/length(x)*100}apply(selected_brooklyn2019, 2, PercentageNA_column) # percentage of NA in each column
Error in apply(selected_brooklyn2019, 2, PercentageNA_column): object 'selected_brooklyn2019' not found
Code
# count the number of 0 sales price# 0 sale price means that a transfer of ownership without a cash considerationtable(selected_brooklyn2019$`SALE PRICE`==0)
Error in table(selected_brooklyn2019$`SALE PRICE` == 0): object 'selected_brooklyn2019' not found
Error in table(selected_brooklyn2019$`SALE PRICE` == 0): object 'selected_brooklyn2019' not found
Code
# remove rows with NA values and 0 sale price# 0 sale price means that a transfer of ownership without a cash considerationnew_brooklyn2019 <-na.omit(selected_brooklyn2019) %>%# delete rows with NA valuesfilter(., 'SALE PRICE'!=0) # select rows with non-zero sale price
Error in na.omit(selected_brooklyn2019): object 'selected_brooklyn2019' not found
Code
str(new_brooklyn2019)
Error in str(new_brooklyn2019): object 'new_brooklyn2019' not found
Code
# statics of datasetsummary(new_brooklyn2019)
Error in summary(new_brooklyn2019): object 'new_brooklyn2019' not found
Code
# merge all sub-datasetsfinal_dataframe <-data.frame() # define a dataframefilename <-list.files("Assignment_1_LujiaLi_folder") # names of all .xlsx filesfor (i in filename){ dataframe <-read_excel(gsub(" ", "", paste("Assignment_1_LujiaLi_folder/", i)), skip =6)colnames(dataframe) <-colnames(dataframe) %>%gsub("\r", "", .) %>%# delete the special character "\r" in column namesgsub("\n", " ", .) %>%# replace the special character "\n" with space in column namesgsub(" $","",. ,perl = T) %>%# delete the space at the tail of column namesgsub(" "," ",. ,perl = T) # replace the double spaces with single space in column names# remove duplicated rows in dataset dataframe <-distinct(dataframe)# subset dataset# c('BOROUGH','RESIDENTIAL UNITS','COMMERCIAL UNITS','TOTAL UNITS','LAND SQUARE FEET','GROSS SQUARE FEET',# 'YEAR BUILT','BUILDING CLASS AT TIME OF SALE','SALE PRICE','SALE DATE') %>%# subset(dataframe, select = .) -> selected_dataframe selected_dataframe <-subset(dataframe, select =c('BOROUGH','RESIDENTIAL UNITS','COMMERCIAL UNITS','TOTAL UNITS','LAND SQUARE FEET','GROSS SQUARE FEET','YEAR BUILT','BUILDING CLASS AT TIME OF SALE','SALE PRICE','SALE DATE'))# remove rows with NA values and 0 sale price# 0 sale price means that a transfer of ownership without a cash consideration new_dataframe <-na.omit(selected_dataframe) %>%filter(., `SALE PRICE`!=0)# merge dataframe final_dataframe <-rbind(final_dataframe, new_dataframe) }str(final_dataframe)
'data.frame': 0 obs. of 0 variables
Source Code
---title: "Real Estate Data Analysis"author: "Lujia Li"desription: "601 Assigment #1"date: "05/03/2023"format: html: toc: true code-fold: true code-copy: true code-tools: truecategories: - Assignment 1 - Lujia Li - NeighborhoodSales Summary - ggplot2---```{r}# read datasetlibrary(readxl)library(dbplyr)library(tidyverse)brooklyn2019 <-read_excel('Assignment_1_LujiaLi_folder/2019_brooklyn.xlsx', skip =6)head(brooklyn2019)# dimension of datasetdim(brooklyn2019)# variables/column namescolnames(brooklyn2019)# remove the special characters in variablescolnames(brooklyn2019) <-colnames(brooklyn2019) %>%gsub("\r", "", .) %>%# delete the special character "\r" in variablesgsub("\n", " ", .) %>%# replace the special character "\n" with space in variablesgsub(" $", "",. , perl = T) %>%# delete the space at the end of variablesgsub(" ", " ",. , perl = T) # replace the double spaces with single space in variablescolnames(brooklyn2019)# remove duplicated rows in datasetbrooklyn2019 <-distinct(brooklyn2019)str(brooklyn2019)# subset dataset# c('BOROUGH','RESIDENTIAL UNITS','COMMERCIAL UNITS','TOTAL UNITS','LAND SQUARE FEET','GROSS SQUARE FEET',# 'YEAR BUILT','BUILDING CLASS AT TIME OF SALE','SALE PRICE','SALE DATE') %>%# subset(brooklyn2019, select = .) -> selected_brooklyn2019selected_brooklyn2019 <-subset(brooklyn2019, select =c('BOROUGH','RESIDENTIAL UNITS','COMMERCIAL UNITS','TOTAL UNITS','LAND SQUARE FEET','GROSS SQUARE FEET','YEAR BUILT','BUILDING CLASS AT TIME OF SALE','SALE PRICE','SALE DATE'))str(selected_brooklyn2019)# count the number and calculate the percentage of NA valuesNumNA_column <-function(x){sum(is.na(x))} # function to count the number of NA valuesapply(selected_brooklyn2019, 2, NumNA_column) # number of NA in each columnPercentageNA_column <-function(x){sum(is.na(x))/length(x)*100}apply(selected_brooklyn2019, 2, PercentageNA_column) # percentage of NA in each column# count the number of 0 sales price# 0 sale price means that a transfer of ownership without a cash considerationtable(selected_brooklyn2019$`SALE PRICE`==0)prop.table(table(selected_brooklyn2019$`SALE PRICE`==0))# remove rows with NA values and 0 sale price# 0 sale price means that a transfer of ownership without a cash considerationnew_brooklyn2019 <-na.omit(selected_brooklyn2019) %>%# delete rows with NA valuesfilter(., 'SALE PRICE'!=0) # select rows with non-zero sale pricestr(new_brooklyn2019)# statics of datasetsummary(new_brooklyn2019)# merge all sub-datasetsfinal_dataframe <-data.frame() # define a dataframefilename <-list.files("Assignment_1_LujiaLi_folder") # names of all .xlsx filesfor (i in filename){ dataframe <-read_excel(gsub(" ", "", paste("Assignment_1_LujiaLi_folder/", i)), skip =6)colnames(dataframe) <-colnames(dataframe) %>%gsub("\r", "", .) %>%# delete the special character "\r" in column namesgsub("\n", " ", .) %>%# replace the special character "\n" with space in column namesgsub(" $","",. ,perl = T) %>%# delete the space at the tail of column namesgsub(" "," ",. ,perl = T) # replace the double spaces with single space in column names# remove duplicated rows in dataset dataframe <-distinct(dataframe)# subset dataset# c('BOROUGH','RESIDENTIAL UNITS','COMMERCIAL UNITS','TOTAL UNITS','LAND SQUARE FEET','GROSS SQUARE FEET',# 'YEAR BUILT','BUILDING CLASS AT TIME OF SALE','SALE PRICE','SALE DATE') %>%# subset(dataframe, select = .) -> selected_dataframe selected_dataframe <-subset(dataframe, select =c('BOROUGH','RESIDENTIAL UNITS','COMMERCIAL UNITS','TOTAL UNITS','LAND SQUARE FEET','GROSS SQUARE FEET','YEAR BUILT','BUILDING CLASS AT TIME OF SALE','SALE PRICE','SALE DATE'))# remove rows with NA values and 0 sale price# 0 sale price means that a transfer of ownership without a cash consideration new_dataframe <-na.omit(selected_dataframe) %>%filter(., `SALE PRICE`!=0)# merge dataframe final_dataframe <-rbind(final_dataframe, new_dataframe) }str(final_dataframe)```