hw2
desriptive statistics
probability
Homework2
Author

Rahul Somu

Published

April 2, 2023

Question 1

Below code chunk calculated the confidence interval for bypass surgery as [18.80009, 19.19991], and the confidence interval for angiography as [17.76757, 18.23243]. Based on the results, we can conclude confidence interval for bypass surgery is slightly narrower than that of angiography, which implies that the estimate of the mean wait time for bypass surgery is slightly more precise than that of angiography.

Code
# Create dataframe
df <- data.frame(
  procedure = c("Bypass", "Angiography"),
  sample_size = c(539, 847),
  sample_mean = c(19, 18),
  sample_sd = c(10, 9)
)

# confidence level
conf_level <- 0.9

#degrees of freedom for each procedure
df$df <- df$sample_size - 1

#critical value for the confidence interval
t_critical <- qt(1 - (1 - conf_level) / 2, df$df)

#standard error of the mean for each procedure
df$sem <- df$sample_sd / sqrt(df$sample_size)

# confidence intervals for each procedure
df$ci <- apply(df[, c("sample_mean", "sem", "df")], 1, function(x) {
  x[1] + c(-1, 1) * t_critical * x[2] * sqrt(x[3] + 1) / sqrt(x[3])
})

# Print the confidence intervals
cat("Confidence intervals:\n")
Confidence intervals:
Code
print(df$ci)
         [,1]     [,2]
[1,] 18.28963 17.49016
[2,] 19.70992 18.50952

#Question 2

Below results suggest that we are 95% confident that the true proportion of adult Americans who believe that the college education is essential for success lies between 0.5189 and 0.5808.

Code
n <- 1031
p_hat <- 567/1031
z <- qnorm(1-0.05/2)

CI <- p_hat + z*sqrt(p_hat*(1-p_hat)/n) * c(-1, 1)
CI
[1] 0.5195839 0.5803191

#Question 3

Code
n = ((1.959964)^2 * (42.5)^2) / (5)^2
n
[1] 277.5454

#Question 4

Code
# Part A
n <- 9
ybar <- 410
s <- 90
mu0 <- 500
alpha <- 0.05
se <- s / sqrt(n)
t <- (ybar - mu0) / se
p_value <- 2 * pt(-abs(t), df = n - 1)

# Report results
cat("Test statistic:", round(t, 2), "\n")
Test statistic: -3 
Code
cat("P-value:", p_value, "\n")
P-value: 0.01707168 
Code
if(p_value < alpha) {
  cat("Reject null hypothesis; the mean income of female employees differs from $500 per week.\n")
} else {
  cat("Fail to reject null hypothesis. \n")
}
Reject null hypothesis; the mean income of female employees differs from $500 per week.
Code
# Part B
p_value_lt <- pt(t, df = n - 1)
cat("P-value (Ha: μ < 500):", p_value_lt, "\n")
P-value (Ha: μ < 500): 0.008535841 
Code
if(p_value_lt < alpha) {
  cat("the mean income of female employees is less than $500 per week.\n")
} else {
  cat("Fail to reject null hypothesis. \n")
}
the mean income of female employees is less than $500 per week.
Code
# Part C
p_value_gt <- pt(-t, df = n - 1)
cat("P-value (Ha: μ > 500):", p_value_gt, "\n")
P-value (Ha: μ > 500): 0.9914642 
Code
if(p_value_gt < alpha) {
  cat("the mean income of female employees is greater than $500 per week.\n")
} else {
  cat("Fail to reject null hypothesis\n")
}
Fail to reject null hypothesis

#QUESTION 5

Code
# Jones' study
jones <- data.frame(y_bar = 519.5, se = 10.0, n = 1000)
jones$t <- (jones$y_bar - 500) / jones$se
jones$p_value <- 2 * pt(-abs(jones$t), df = jones$n - 1)
jones$t
[1] 1.95
Code
jones$p_value
[1] 0.05145555
Code
# Smith's study
smith <- data.frame(y_bar = 519.7, se = 10.0, n = 1000)
smith$t <- (smith$y_bar - 500) / smith$se
smith$p_value <- 2 * pt(-abs(smith$t), df = smith$n - 1)
smith$t
[1] 1.97
Code
smith$p_value 
[1] 0.04911426
Code
# Significance testing
alpha <- 0.05
if (jones$p_value < alpha) {
  cat("Jones' study is statistically significant\n")
} else {
  cat("Jones' study is not statistically significant\n")
}
Jones' study is not statistically significant
Code
if (smith$p_value < alpha) {
  cat("Smith's study is statistically significant\n")
} else {
  cat("Smith's study is not statistically significant\n")
}
Smith's study is statistically significant

#Question 6

Code
# Create contingency table
snack_table <- matrix(c(31, 43, 51, 69, 57, 49), nrow = 3, byrow = TRUE)

# Perform chi-squared test
chisq.test(snack_table)

    Pearson's Chi-squared test

data:  snack_table
X-squared = 3.656, df = 2, p-value = 0.1607

#Question 7

Code
# Create a data frame
area <- c(rep("Area 1", 6), rep("Area 2", 6), rep("Area 3", 6))
cost <- c(6.2, 9.3, 6.8, 6.1, 6.7, 7.5, 7.5, 8.2, 8.5, 8.2, 7.0, 9.3, 5.8, 6.4, 5.6, 7.1, 3.0, 3.5)
cost_data <- data.frame(area, cost)

# Perform one-way ANOVA
model <- aov(cost ~ area, data = cost_data)

# Print ANOVA table summary
summary(model)
            Df Sum Sq Mean Sq F value  Pr(>F)   
area         2  25.66  12.832   8.176 0.00397 **
Residuals   15  23.54   1.569                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1