Data Analytics and Computational Social Science: Homework 2

Joseph Farrell

Install libraries

library(tidyverse)
library(dplyr)

Read in and name data “australian_marriage_data”

library(readr)
australian_marriage_data <- read_csv("/Users/nelsonfarrell/Downloads/australian_marriage_tidy - australian_marriage_tidy.csv")
View(australian_marriage_data)

Check the variables and columns of “australian_marriage_data”

str(australian_marriage_data)

spec_tbl_df [16 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ territory: chr [1:16] "New South Wales" "New South Wales" "Victoria" "Victoria" ...
 $ resp     : chr [1:16] "yes" "no" "yes" "no" ...
 $ count    : num [1:16] 2374362 1736838 2145629 1161098 1487060 ...
 $ percent  : num [1:16] 57.8 42.2 64.9 35.1 60.7 39.3 62.5 37.5 63.7 36.3 ...
 - attr(*, "spec")=
  .. cols(
  ..   territory = col_character(),
  ..   resp = col_character(),
  ..   count = col_double(),
  ..   percent = col_double()
  .. )
 - attr(*, "problems")=<externalptr>

“australian_marriage_data” has 4 variables. The first variable (territory) is the territory of the respondent, it is a character. The second variable (resp) is the response, either “yes” or “no,” it is also a character. The third variable (count) is the total number of responses either “yes” or “no” respectively, it is numeric. The fourth variable (percent) is also numeric, it is the percent of respondents who reported either “yes” or “no.”

Filter “yes” from “australian_marriage_data”

filter(australian_marriage_data, `resp` == "yes")

# A tibble: 8 × 4
  territory                       resp    count percent
  <chr>                           <chr>   <dbl>   <dbl>
1 New South Wales                 yes   2374362    57.8
2 Victoria                        yes   2145629    64.9
3 Queensland                      yes   1487060    60.7
4 South Australia                 yes    592528    62.5
5 Western Australia               yes    801575    63.7
6 Tasmania                        yes    191948    63.6
7 Northern Territory(b)           yes     48686    60.6
8 Australian Capital Territory(c) yes    175459    74

Filter for “yes” and arrange “count” column in descending order

filter(australian_marriage_data, `resp` == "yes") %>%
arrange(desc(count))

# A tibble: 8 × 4
  territory                       resp    count percent
  <chr>                           <chr>   <dbl>   <dbl>
1 New South Wales                 yes   2374362    57.8
2 Victoria                        yes   2145629    64.9
3 Queensland                      yes   1487060    60.7
4 Western Australia               yes    801575    63.7
5 South Australia                 yes    592528    62.5
6 Tasmania                        yes    191948    63.6
7 Australian Capital Territory(c) yes    175459    74  
8 Northern Territory(b)           yes     48686    60.6

Filter for “yes”, remove the “count” column, and arrange “percent” column in descending order

filter(australian_marriage_data, `resp` == "yes") %>%
select(territory, resp, percent) %>%
arrange(desc(percent))

# A tibble: 8 × 3
  territory                       resp  percent
  <chr>                           <chr>   <dbl>
1 Australian Capital Territory(c) yes      74  
2 Victoria                        yes      64.9
3 Western Australia               yes      63.7
4 Tasmania                        yes      63.6
5 South Australia                 yes      62.5
6 Queensland                      yes      60.7
7 Northern Territory(b)           yes      60.6
8 New South Wales                 yes      57.8

Filter the rows below 60 and above 70 in the “percent” column, arrange in descending order

filter(australian_marriage_data, percent > 60 & percent < 70) %>%
  arrange(desc(percent))

# A tibble: 6 × 4
  territory             resp    count percent
  <chr>                 <chr>   <dbl>   <dbl>
1 Victoria              yes   2145629    64.9
2 Western Australia     yes    801575    63.7
3 Tasmania              yes    191948    63.6
4 South Australia       yes    592528    62.5
5 Queensland            yes   1487060    60.7
6 Northern Territory(b) yes     48686    60.6

Filter for “yes”, select “percent” and create a new vector “percent_married”

percent_married <- filter(australian_marriage_data, `resp` == "yes") %>%
select(percent)

Create blue boxplot for “percent_married”

  boxplot(percent_married, horizontal = TRUE, 
          main = "Boxplot: Australian Marriage Data: 'yes'", 
          ylab = "Territories", 
          xlab = "Percent of respondents who said 'yes' to being married", col = (c("blue")))


```{.r .distill-force-highlighting-css}

Comment on this article Share:

Homework 2

Reuse

Citation