hw4
desriptive statistics
probability
Homework 4
Author

Hannah Rosenbaum

Published

May 4, 2023

Code
library(alr4)
Loading required package: car
Loading required package: carData
Loading required package: effects
lattice theme set by effectsTheme()
See ?effectsTheme for details.
Code
library(smss)
Code
data(salary)
data("house.selling.price")

Question 1

a

ŷ = −10536 + 53.8x1 + 2.84x2

ŷ = -10536 + 53.8(1240) + 2.84(18000)

ŷ = 107296

residual = 145000 - 107296 = 37704

b

ŷ = −10536 + 53.8x1 + 2.84x2

ŷ = -10536 + 53.8(1240) + 2.84(18000)

ŷ = 107296

ŷ2 = −10536 + 53.8x1 + 2.84x2

ŷ2 = -10536 + 53.8(1241) + 2.84(18000)

ŷ2 = 107349.8

A 1 point increase in square foot home size results in a $53.8 home value increase.

c

107349.8 = -10536 + 53.8(1240) + 2.84(x)

2.84(x) = 51173.8

x = 18018.94

Question 2

a

Code
boxplot(salary ~ sex, data=salary)

b

Code
fit <- lm(salary ~ year + ysdeg + degree + rank + sex, data=salary)
summary(fit)

Call:
lm(formula = salary ~ year + ysdeg + degree + rank + sex, data = salary)

Residuals:
    Min      1Q  Median      3Q     Max 
-4045.2 -1094.7  -361.5   813.2  9193.1 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 15746.05     800.18  19.678  < 2e-16 ***
year          476.31      94.91   5.018 8.65e-06 ***
ysdeg        -124.57      77.49  -1.608    0.115    
degreePhD    1388.61    1018.75   1.363    0.180    
rankAssoc    5292.36    1145.40   4.621 3.22e-05 ***
rankProf    11118.76    1351.77   8.225 1.62e-10 ***
sexFemale    1166.37     925.57   1.260    0.214    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2398 on 45 degrees of freedom
Multiple R-squared:  0.855, Adjusted R-squared:  0.8357 
F-statistic: 44.24 on 6 and 45 DF,  p-value: < 2.2e-16
Code
confint(fit)
                 2.5 %      97.5 %
(Intercept) 14134.4059 17357.68946
year          285.1433   667.47476
ysdeg        -280.6397    31.49105
degreePhD    -663.2482  3440.47485
rankAssoc    2985.4107  7599.31080
rankProf     8396.1546 13841.37340
sexFemale    -697.8183  3030.56452

c

year and ysdeg have the narrowest interval. rankAssoc has the widest interval.

d

Code
fit <- lm(salary ~ rank, data=salary)
summary(fit)

Call:
lm(formula = salary ~ rank, data = salary)

Residuals:
    Min      1Q  Median      3Q     Max 
-5209.0 -1819.2  -417.8  1586.6  8386.1 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  17768.7      705.5   25.19  < 2e-16 ***
rankAssoc     5407.3     1066.6    5.07 6.09e-06 ***
rankProf     11890.3      972.4   12.23  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2993 on 49 degrees of freedom
Multiple R-squared:  0.7542,    Adjusted R-squared:  0.7442 
F-statistic: 75.17 on 2 and 49 DF,  p-value: 1.174e-15
Code
confint(fit)
                2.5 %    97.5 %
(Intercept) 16350.995 19186.338
rankAssoc    3263.944  7550.579
rankProf     9936.158 13844.408

e

Code
fit2 <- lm(salary ~ year + ysdeg + degree + sex, data=salary)
summary(fit2)

Call:
lm(formula = salary ~ year + ysdeg + degree + sex, data = salary)

Residuals:
    Min      1Q  Median      3Q     Max 
-8146.9 -2186.9  -491.5  2279.1 11186.6 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 17183.57    1147.94  14.969  < 2e-16 ***
year          351.97     142.48   2.470 0.017185 *  
ysdeg         339.40      80.62   4.210 0.000114 ***
degreePhD   -3299.35    1302.52  -2.533 0.014704 *  
sexFemale   -1286.54    1313.09  -0.980 0.332209    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3744 on 47 degrees of freedom
Multiple R-squared:  0.6312,    Adjusted R-squared:  0.5998 
F-statistic: 20.11 on 4 and 47 DF,  p-value: 1.048e-09
Code
confint(fit2)
                  2.5 %     97.5 %
(Intercept) 14874.21086 19492.9326
year           65.33403   638.6031
ysdeg         177.21057   501.5875
degreePhD   -5919.68013  -679.0176
sexFemale   -3928.13773  1355.0491

Rsquared decreased by 0.2359, residuals decreased by 130

f

Code
new <- salary[salary$year <= 15,]
pairs(salary)

Code
pairs(new)

Code
summary(salary)
     degree      rank        sex          year            ysdeg      
 Masters:34   Asst :18   Male  :38   Min.   : 0.000   Min.   : 1.00  
 PhD    :18   Assoc:14   Female:14   1st Qu.: 3.000   1st Qu.: 6.75  
              Prof :20               Median : 7.000   Median :15.50  
                                     Mean   : 7.481   Mean   :16.12  
                                     3rd Qu.:11.000   3rd Qu.:23.25  
                                     Max.   :25.000   Max.   :35.00  
     salary     
 Min.   :15000  
 1st Qu.:18247  
 Median :23719  
 Mean   :23798  
 3rd Qu.:27259  
 Max.   :38045  
Code
summary(new)
     degree      rank        sex          year            ysdeg      
 Masters:31   Asst :17   Male  :33   Min.   : 0.000   Min.   : 1.00  
 PhD    :16   Assoc:14   Female:14   1st Qu.: 3.000   1st Qu.: 5.50  
              Prof :16               Median : 6.000   Median :15.00  
                                     Mean   : 6.319   Mean   :15.13  
                                     3rd Qu.: 9.500   3rd Qu.:22.50  
                                     Max.   :15.000   Max.   :33.00  
     salary     
 Min.   :15000  
 1st Qu.:18038  
 Median :23712  
 Mean   :23145  
 3rd Qu.:25965  
 Max.   :38045  
Code
fit3 <- lm(salary ~ year + ysdeg + degree + sex, data=new)
summary(fit3)

Call:
lm(formula = salary ~ year + ysdeg + degree + sex, data = new)

Residuals:
   Min     1Q Median     3Q    Max 
 -6818  -2220   -503   1956  11160 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 16841.99    1212.15  13.894  < 2e-16 ***
year          450.64     179.27   2.514 0.015867 *  
ysdeg         314.49      81.68   3.850 0.000396 ***
degreePhD   -2853.35    1362.31  -2.094 0.042289 *  
sexFemale   -1109.42    1300.32  -0.853 0.398395    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3666 on 42 degrees of freedom
Multiple R-squared:  0.5971,    Adjusted R-squared:  0.5587 
F-statistic: 15.56 on 4 and 42 DF,  p-value: 6.94e-08
Code
confint(fit3)
                  2.5 %     97.5 %
(Intercept) 14395.75913 19288.2135
year           88.85496   812.4206
ysdeg         149.64653   479.3398
degreePhD   -5602.60615  -104.0887
sexFemale   -3733.58009  1514.7408

Question 3

a

Code
fit <- lm(Price ~ New + Size, data=house.selling.price)
summary(fit)

Call:
lm(formula = Price ~ New + Size, data = house.selling.price)

Residuals:
    Min      1Q  Median      3Q     Max 
-205102  -34374   -5778   18929  163866 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -40230.867  14696.140  -2.738  0.00737 ** 
New          57736.283  18653.041   3.095  0.00257 ** 
Size           116.132      8.795  13.204  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 53880 on 97 degrees of freedom
Multiple R-squared:  0.7226,    Adjusted R-squared:  0.7169 
F-statistic: 126.3 on 2 and 97 DF,  p-value: < 2.2e-16

b

Code
fit2 <- lm(Price ~ New + Size, data=house.selling.price[house.selling.price$New==0,])
summary(fit2)

Call:
lm(formula = Price ~ New + Size, data = house.selling.price[house.selling.price$New == 
    0, ])

Residuals:
    Min      1Q  Median      3Q     Max 
-175748  -29155   -7297   14159  192519 

Coefficients: (1 not defined because of singularities)
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -22227.808  15708.186  -1.415    0.161    
New                 NA         NA      NA       NA    
Size           104.438      9.538  10.950   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 52620 on 87 degrees of freedom
Multiple R-squared:  0.5795,    Adjusted R-squared:  0.5747 
F-statistic: 119.9 on 1 and 87 DF,  p-value: < 2.2e-16
Code
fit3 <- lm(Price ~ New + Size, data=house.selling.price[house.selling.price$New==1,])
summary(fit3)

Call:
lm(formula = Price ~ New + Size, data = house.selling.price[house.selling.price$New == 
    1, ])

Residuals:
   Min     1Q Median     3Q    Max 
-78606 -16092   -987  20068  76140 

Coefficients: (1 not defined because of singularities)
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -100755.31   42513.73  -2.370   0.0419 *  
New                 NA         NA      NA       NA    
Size            166.35      17.09   9.735 4.47e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 45500 on 9 degrees of freedom
Multiple R-squared:  0.9133,    Adjusted R-squared:  0.9036 
F-statistic: 94.76 on 1 and 9 DF,  p-value: 4.474e-06

c

Code
predict(fit, data.frame(New=0, Size = 3000))
       1 
308163.9 
Code
predict(fit, data.frame(New=1, Size = 3000))
       1 
365900.2 

d

Code
fit4 <- lm(Price ~ New + Size + New:Size, data=house.selling.price)
summary(fit4)

Call:
lm(formula = Price ~ New + Size + New:Size, data = house.selling.price)

Residuals:
    Min      1Q  Median      3Q     Max 
-175748  -28979   -6260   14693  192519 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -22227.808  15521.110  -1.432  0.15536    
New         -78527.502  51007.642  -1.540  0.12697    
Size           104.438      9.424  11.082  < 2e-16 ***
New:Size        61.916     21.686   2.855  0.00527 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 52000 on 96 degrees of freedom
Multiple R-squared:  0.7443,    Adjusted R-squared:  0.7363 
F-statistic: 93.15 on 3 and 96 DF,  p-value: < 2.2e-16

e

Code
plot(fit2)

Code
plot(fit3)

f

Code
predict(fit4, data.frame(New=0, Size = 3000))
       1 
291087.4 
Code
predict(fit4, data.frame(New=1, Size = 3000))
       1 
398307.5 

g

Code
predict(fit4, data.frame(New=0, Size = 1500))
       1 
134429.8 
Code
predict(fit4, data.frame(New=1, Size = 1500))
       1 
148776.1 

h

The model with interaction appears to estimate lower prices than the original regression.