Homework4
Author

Rahul Somu

Published

May 16, 2023

Question 1

Code
calculate_prediction <- function(home_size, lot_size) {
  intercept <- -10536
  coefficient_home_size <- 53.8
  coefficient_lot_size <- 2.84
  predicted_price <- intercept + coefficient_home_size * home_size + coefficient_lot_size * lot_size
  
  actual_price <- 145000
  residual <- actual_price - predicted_price
  
  result <- list(
    predicted_price = predicted_price,
    residual = residual
  )
  return(result)
}

result <- calculate_prediction(home_size = 1240, lot_size = 18000)

predicted_price <- result$predicted_price
residual <- result$residual

predicted_price
[1] 107296
Code
residual
[1] 37704
Code
increase_price <- 53.8

increase_lot_size <- 53.8*1 / 2.84

increase_price
[1] 53.8
Code
increase_lot_size
[1] 18.94366

#Question 2

#A we can reject the null hypothesis that the mean salary for men and women is the same. There is a significant difference in salary between men and women, with male faculty earning more on average.

#C Based on the coefficients, we can interpret that degreePhD,sexFemale,ysdeg are Not statistically significant and rankAssoc,rankProf,year are Statistically significant.

#E The coefficient for sex show that females make $1286 less than males.

Code
library(alr4)
Loading required package: car
Loading required package: carData
Loading required package: effects
lattice theme set by effectsTheme()
See ?effectsTheme for details.
Code
data(salary)

summary(lm(salary ~ sex, data = salary))

Call:
lm(formula = salary ~ sex, data = salary)

Residuals:
    Min      1Q  Median      3Q     Max 
-8602.8 -4296.6  -100.8  3513.1 16687.9 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)    24697        938  26.330   <2e-16 ***
sexFemale      -3340       1808  -1.847   0.0706 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5782 on 50 degrees of freedom
Multiple R-squared:  0.0639,    Adjusted R-squared:  0.04518 
F-statistic: 3.413 on 1 and 50 DF,  p-value: 0.0706
Code
#B
model <- lm(salary ~ ., data = salary)
summary(model)

Call:
lm(formula = salary ~ ., data = salary)

Residuals:
    Min      1Q  Median      3Q     Max 
-4045.2 -1094.7  -361.5   813.2  9193.1 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 15746.05     800.18  19.678  < 2e-16 ***
degreePhD    1388.61    1018.75   1.363    0.180    
rankAssoc    5292.36    1145.40   4.621 3.22e-05 ***
rankProf    11118.76    1351.77   8.225 1.62e-10 ***
sexFemale    1166.37     925.57   1.260    0.214    
year          476.31      94.91   5.018 8.65e-06 ***
ysdeg        -124.57      77.49  -1.608    0.115    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2398 on 45 degrees of freedom
Multiple R-squared:  0.855, Adjusted R-squared:  0.8357 
F-statistic: 44.24 on 6 and 45 DF,  p-value: < 2.2e-16
Code
summary(model)$coefficients
              Estimate Std. Error   t value     Pr(>|t|)
(Intercept) 15746.0477  800.17827 19.678175 9.759111e-24
degreePhD    1388.6133 1018.74688  1.363060 1.796454e-01
rankAssoc    5292.3608 1145.39802  4.620543 3.216343e-05
rankProf    11118.7640 1351.77241  8.225323 1.623713e-10
sexFemale    1166.3731  925.56888  1.260169 2.141043e-01
year          476.3090   94.91357  5.018345 8.653790e-06
ysdeg        -124.5743   77.48628 -1.607695 1.148967e-01
Code
#D
salary$rank <- relevel(salary$rank, ref = "Prof")
model <- lm(salary ~ ., data = salary)
summary(model)$coefficients
               Estimate Std. Error   t value     Pr(>|t|)
(Intercept)  26864.8117 1375.28806 19.533953 1.313481e-23
degreePhD     1388.6133 1018.74688  1.363060 1.796454e-01
rankAsst    -11118.7640 1351.77241 -8.225323 1.623713e-10
rankAssoc    -5826.4032 1012.93301 -5.752012 7.278088e-07
sexFemale     1166.3731  925.56888  1.260169 2.141043e-01
year           476.3090   94.91357  5.018345 8.653790e-06
ysdeg         -124.5743   77.48628 -1.607695 1.148967e-01
Code
model <- lm(salary ~ . - rank, data = salary)
summary(model)

Call:
lm(formula = salary ~ . - rank, data = salary)

Residuals:
    Min      1Q  Median      3Q     Max 
-8146.9 -2186.9  -491.5  2279.1 11186.6 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 17183.57    1147.94  14.969  < 2e-16 ***
degreePhD   -3299.35    1302.52  -2.533 0.014704 *  
sexFemale   -1286.54    1313.09  -0.980 0.332209    
year          351.97     142.48   2.470 0.017185 *  
ysdeg         339.40      80.62   4.210 0.000114 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3744 on 47 degrees of freedom
Multiple R-squared:  0.6312,    Adjusted R-squared:  0.5998 
F-statistic: 20.11 on 4 and 47 DF,  p-value: 1.048e-09
Code
#F
salary$new_dean <- ifelse(salary$ysdeg <= 15, 1, 0)
cor.test(salary$new_dean, salary$ysdeg)

    Pearson's product-moment correlation

data:  salary$new_dean and salary$ysdeg
t = -11.101, df = 50, p-value = 4.263e-15
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.9074548 -0.7411040
sample estimates:
       cor 
-0.8434239 
Code
summary(lm(salary ~ . -ysdeg, data = salary))

Call:
lm(formula = salary ~ . - ysdeg, data = salary)

Residuals:
    Min      1Q  Median      3Q     Max 
-3403.3 -1387.0  -167.0   528.2  9233.8 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  24425.32    1107.52  22.054  < 2e-16 ***
degreePhD      818.93     797.48   1.027   0.3100    
rankAsst    -11096.95    1191.00  -9.317 4.54e-12 ***
rankAssoc    -6124.28    1028.58  -5.954 3.65e-07 ***
sexFemale      907.14     840.54   1.079   0.2862    
year           434.85      78.89   5.512 1.65e-06 ***
new_dean      2163.46    1072.04   2.018   0.0496 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2362 on 45 degrees of freedom
Multiple R-squared:  0.8594,    Adjusted R-squared:  0.8407 
F-statistic: 45.86 on 6 and 45 DF,  p-value: < 2.2e-16

#Question 3

Code
library(smss)

data(house.selling.price)

model1 <- lm(Price ~ Size + New, data = house.selling.price)

summary(model1)

Call:
lm(formula = Price ~ Size + New, data = house.selling.price)

Residuals:
    Min      1Q  Median      3Q     Max 
-205102  -34374   -5778   18929  163866 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -40230.867  14696.140  -2.738  0.00737 ** 
Size           116.132      8.795  13.204  < 2e-16 ***
New          57736.283  18653.041   3.095  0.00257 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 53880 on 97 degrees of freedom
Multiple R-squared:  0.7226,    Adjusted R-squared:  0.7169 
F-statistic: 126.3 on 2 and 97 DF,  p-value: < 2.2e-16
Code
summary(lm(Price ~ Size * New, data = house.selling.price))

Call:
lm(formula = Price ~ Size * New, data = house.selling.price)

Residuals:
    Min      1Q  Median      3Q     Max 
-175748  -28979   -6260   14693  192519 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -22227.808  15521.110  -1.432  0.15536    
Size           104.438      9.424  11.082  < 2e-16 ***
New         -78527.502  51007.642  -1.540  0.12697    
Size:New        61.916     21.686   2.855  0.00527 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 52000 on 96 degrees of freedom
Multiple R-squared:  0.7443,    Adjusted R-squared:  0.7363 
F-statistic: 93.15 on 3 and 96 DF,  p-value: < 2.2e-16

#B selling_price_new = -40230.867 + 116.132 * Size + 57736.283 selling_price_not_new = -40230.867 + 116.132 * Size

#C

selling_price_new = -40230.867 + 116.132 * 3000 + 57736.283

selling_price_not_new = -40230.867 + 116.132 * 3000 = 25693.6

Code
#D
new_interaction_term <- lm(Price ~ Size + New + Size * New, data = house.selling.price)
summary(new_interaction_term)

Call:
lm(formula = Price ~ Size + New + Size * New, data = house.selling.price)

Residuals:
    Min      1Q  Median      3Q     Max 
-175748  -28979   -6260   14693  192519 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -22227.808  15521.110  -1.432  0.15536    
Size           104.438      9.424  11.082  < 2e-16 ***
New         -78527.502  51007.642  -1.540  0.12697    
Size:New        61.916     21.686   2.855  0.00527 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 52000 on 96 degrees of freedom
Multiple R-squared:  0.7443,    Adjusted R-squared:  0.7363 
F-statistic: 93.15 on 3 and 96 DF,  p-value: < 2.2e-16

#E

new_sp_pred = -22228 + 104Size - 78528 + 62Size = -100756 + 168*Size

not_new_pred = -22228 + 104*Size

Code
#F
new_pred <- -100756 + 168*(3000)
not_new_pred <- 22228 + 104*(3000)

new_pred
[1] 403244
Code
not_new_pred
[1] 334228
Code
#G
new_pred <- -100756 + 168*(1500)
not_new_pred <- 22228 + 104*(1500)

new_pred
[1] 151244
Code
not_new_pred
[1] 178228

H. I prefer the model with interaction as it provides a better fit to the data. The R-squared value for the model with interaction is 0.72, which is higher than the R-squared value for the model without interaction (0.67). This indicates that the model with interaction is able to explain more of the variation in the data.