hw1
challenge1
my name
dataset
ggplot2
Author

Paritosh G

Published

May 26, 2023

Code
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.1     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
Code
library(readxl)

dt <- read_excel("/Users/paritosh/Documents/Code_Submission/UMass/603_Spring_2023/posts/_data/LungCapData.xls")

Q.1)

A

Code
hist(dt$LungCap, freq = FALSE)

B

Code
dt %>% 
  select(Gender, LungCap) %>% 
  ggplot(aes(x = Gender, y = LungCap, fill = Gender)) + 
  geom_boxplot() +
  scale_y_continuous(breaks = seq(from = 0, to = 15, by = 0.75))

In median,quantiles and other measures males have higher lung capacity

C

Code
dt %>% 
  select(LungCap,Smoke) %>% 
  group_by(Smoke) %>% 
  summarise(mean = mean(LungCap))
# A tibble: 2 × 2
  Smoke  mean
  <chr> <dbl>
1 no     7.77
2 yes    8.65

Smokers have higher lung capacity. Ideally they should be having less as per out premise it does not seem to be the case, May be smoking might not be hurting the lung capacity as it is assumed to be.

D

  • Age less than or equal to 13
Code
dt %>% 
  filter(Age <= 13) %>% 
  group_by(Smoke) %>% 
  summarise(LungCap = mean(LungCap))
# A tibble: 2 × 2
  Smoke LungCap
  <chr>   <dbl>
1 no       6.36
2 yes      7.20
  • Age is equals 14 or 15
Code
dt %>% 
  filter(Age == 14 | Age == 15) %>% 
  group_by(Smoke) %>% 
  summarise(LungCap = mean(LungCap))
# A tibble: 2 × 2
  Smoke LungCap
  <chr>   <dbl>
1 no       9.14
2 yes      8.39
  • Age is 16 or 17
Code
dt %>% 
  filter(Age == 16 | Age == 17) %>% 
  group_by(Smoke) %>% 
  summarise(LungCap = mean(LungCap))
# A tibble: 2 × 2
  Smoke LungCap
  <chr>   <dbl>
1 no      10.5 
2 yes      9.38
  • Age is greater than or equal to 18
Code
dt %>% 
  filter(Age >= 18) %>% 
  group_by(Smoke) %>% 
  summarise(LungCap = mean(LungCap))
# A tibble: 2 × 2
  Smoke LungCap
  <chr>   <dbl>
1 no       11.1
2 yes      10.5

E

  • After the age of 13 the lung capacity of non smokers is higher than that of smokers.

    Code
    # Creating a tibble
    table <- tibble( x = c(0,1,2,3,4), Frequency = c(128,434,160,64,24))
    n <- 810

Q.2)

A

Code
table %>%
  filter(x == 2) %>%
  pull(Frequency)/n
[1] 0.1975309

B

Code
table %>%
  filter(x < 2) %>%
  pull(Frequency) %>%
  sum()/n
[1] 0.6938272

C

Code
table %>%
  filter(x <= 2) %>%
  pull(Frequency) %>%
  sum()/n
[1] 0.891358

D

Code
table %>%
  filter(x > 2) %>%
  pull(Frequency) %>%
  sum()/n
[1] 0.108642

E

Code
table <- table %>%
  mutate(probability = Frequency / n) 

print(table)
# A tibble: 5 × 3
      x Frequency probability
  <dbl>     <dbl>       <dbl>
1     0       128      0.158 
2     1       434      0.536 
3     2       160      0.198 
4     3        64      0.0790
5     4        24      0.0296
Code
sum(table$x * table$probability)
[1] 1.28642

F

Code
 var(table$x)
[1] 2.5
Code
sd(table$x)
[1] 1.581139