Homework2

hw2

desriptive statistics

probability

Homework2

Author

Rahul Somu

Published

April 2, 2023

Question 1

Below code chunk calculated the confidence interval for bypass surgery as [18.80009, 19.19991], and the confidence interval for angiography as [17.76757, 18.23243]. Based on the results, we can conclude confidence interval for bypass surgery is slightly narrower than that of angiography, which implies that the estimate of the mean wait time for bypass surgery is slightly more precise than that of angiography.

Code

# Create dataframe
df <- data.frame(
  procedure = c("Bypass", "Angiography"),
  sample_size = c(539, 847),
  sample_mean = c(19, 18),
  sample_sd = c(10, 9)
)

# confidence level
conf_level <- 0.9

#degrees of freedom for each procedure
df$df <- df$sample_size - 1

#critical value for the confidence interval
t_critical <- qt(1 - (1 - conf_level) / 2, df$df)

#standard error of the mean for each procedure
df$sem <- df$sample_sd / sqrt(df$sample_size)

# confidence intervals for each procedure
df$ci <- apply(df[, c("sample_mean", "sem", "df")], 1, function(x) {
  x[1] + c(-1, 1) * t_critical * x[2] * sqrt(x[3] + 1) / sqrt(x[3])
})

# Print the confidence intervals
cat("Confidence intervals:\n")

Confidence intervals:

Code

print(df$ci)

         [,1]     [,2]
[1,] 18.28963 17.49016
[2,] 19.70992 18.50952

#Question 2

Below results suggest that we are 95% confident that the true proportion of adult Americans who believe that the college education is essential for success lies between 0.5189 and 0.5808.

Code

n <- 1031
p_hat <- 567/1031
z <- qnorm(1-0.05/2)

CI <- p_hat + z*sqrt(p_hat*(1-p_hat)/n) * c(-1, 1)
CI

[1] 0.5195839 0.5803191

#Question 3

Code

n = ((1.959964)^2 * (42.5)^2) / (5)^2
n

[1] 277.5454

#Question 4

Code

# Part A
n <- 9
ybar <- 410
s <- 90
mu0 <- 500
alpha <- 0.05
se <- s / sqrt(n)
t <- (ybar - mu0) / se
p_value <- 2 * pt(-abs(t), df = n - 1)

# Report results
cat("Test statistic:", round(t, 2), "\n")

Test statistic: -3

Code

cat("P-value:", p_value, "\n")

P-value: 0.01707168

Code

if(p_value < alpha) {
  cat("Reject null hypothesis; the mean income of female employees differs from $500 per week.\n")
} else {
  cat("Fail to reject null hypothesis. \n")
}

Reject null hypothesis; the mean income of female employees differs from $500 per week.

Code

# Part B
p_value_lt <- pt(t, df = n - 1)
cat("P-value (Ha: μ < 500):", p_value_lt, "\n")

P-value (Ha: μ < 500): 0.008535841

Code

if(p_value_lt < alpha) {
  cat("the mean income of female employees is less than $500 per week.\n")
} else {
  cat("Fail to reject null hypothesis. \n")
}

the mean income of female employees is less than $500 per week.

Code

# Part C
p_value_gt <- pt(-t, df = n - 1)
cat("P-value (Ha: μ > 500):", p_value_gt, "\n")

P-value (Ha: μ > 500): 0.9914642

Code

if(p_value_gt < alpha) {
  cat("the mean income of female employees is greater than $500 per week.\n")
} else {
  cat("Fail to reject null hypothesis\n")
}

Fail to reject null hypothesis

#QUESTION 5

Code

# Jones' study
jones <- data.frame(y_bar = 519.5, se = 10.0, n = 1000)
jones$t <- (jones$y_bar - 500) / jones$se
jones$p_value <- 2 * pt(-abs(jones$t), df = jones$n - 1)
jones$t

[1] 1.95

Code

jones$p_value

[1] 0.05145555

Code

# Smith's study
smith <- data.frame(y_bar = 519.7, se = 10.0, n = 1000)
smith$t <- (smith$y_bar - 500) / smith$se
smith$p_value <- 2 * pt(-abs(smith$t), df = smith$n - 1)
smith$t

[1] 1.97

Code

smith$p_value

[1] 0.04911426

Code

# Significance testing
alpha <- 0.05
if (jones$p_value < alpha) {
  cat("Jones' study is statistically significant\n")
} else {
  cat("Jones' study is not statistically significant\n")
}

Jones' study is not statistically significant

Code

if (smith$p_value < alpha) {
  cat("Smith's study is statistically significant\n")
} else {
  cat("Smith's study is not statistically significant\n")
}

Smith's study is statistically significant

#Question 6

Code

# Create contingency table
snack_table <- matrix(c(31, 43, 51, 69, 57, 49), nrow = 3, byrow = TRUE)

# Perform chi-squared test
chisq.test(snack_table)


    Pearson's Chi-squared test

data:  snack_table
X-squared = 3.656, df = 2, p-value = 0.1607

#Question 7

Code

# Create a data frame
area <- c(rep("Area 1", 6), rep("Area 2", 6), rep("Area 3", 6))
cost <- c(6.2, 9.3, 6.8, 6.1, 6.7, 7.5, 7.5, 8.2, 8.5, 8.2, 7.0, 9.3, 5.8, 6.4, 5.6, 7.1, 3.0, 3.5)
cost_data <- data.frame(area, cost)

# Perform one-way ANOVA
model <- aov(cost ~ area, data = cost_data)

# Print ANOVA table summary
summary(model)

            Df Sum Sq Mean Sq F value  Pr(>F)   
area         2  25.66  12.832   8.176 0.00397 **
Residuals   15  23.54   1.569                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1