Practice wrangling data with the Australian Marraige Law Excel document.
ausmarraige <- read_excel("../../_data/australian_marriage_law_postal_survey_2017_-_response_final.xls", "Table 2", skip=6)
head(ausmarraige, 10)
# A tibble: 10 x 16
...1 no....2 `%...3` no....4 `%...5` no....6 `%...7` ...8 no....9
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <dbl>
1 New ~ NA NA NA NA NA NA NA NA
2 Banks 37736 44.9 46343 55.1 84079 100 NA 84079
3 Bart~ 37153 43.6 47984 56.4 85137 100 NA 85137
4 Benn~ 42943 49.8 43215 50.2 86158 100 NA 86158
5 Bero~ 48471 54.6 40369 45.4 88840 100 NA 88840
6 Blax~ 20406 26.1 57926 73.9 78332 100 NA 78332
7 Brad~ 53681 60.6 34927 39.4 88608 100 NA 88608
8 Cala~ 54091 60.2 35779 39.8 89870 100 NA 89870
9 Chif~ 32871 41.3 46702 58.7 79573 100 NA 79573
10 Cook 47505 55 38804 45 86309 100 NA 86309
# ... with 7 more variables: %...10 <dbl>, no....11 <dbl>,
# %...12 <dbl>, no....13 <dbl>, %...14 <dbl>, no....15 <dbl>,
# %...16 <dbl>
colnames(ausmarraige)
[1] "...1" "no....2" "%...3" "no....4" "%...5" "no....6"
[7] "%...7" "...8" "no....9" "%...10" "no....11" "%...12"
[13] "no....13" "%...14" "no....15" "%...16"
The essential columns in this dataset are the cities, response clear yes, response clear no, response not clear, and non responding.
ausmarraige1 <- select(ausmarraige, "...1", "no....2", "no....4", "no....11", "no....13")%>%
rename(Cities=...1, Response_Clear_Yes=no....2, Response_Clear_No=no....4, Response_Not_Clear=no....11, Non_Response=no....13)%>%
drop_na(Cities)
head(ausmarraige1)
# A tibble: 6 x 5
Cities Response_Clear_~ Response_Clear_~ Response_Not_Cl~
<chr> <dbl> <dbl> <dbl>
1 New South Wales Divisions NA NA NA
2 Banks 37736 46343 247
3 Barton 37153 47984 226
4 Bennelong 42943 43215 244
5 Berowra 48471 40369 212
6 Blaxland 20406 57926 220
# ... with 1 more variable: Non_Response <dbl>
ausmarraige1 <- ausmarraige1 %>%
filter(!str_detect(Cities, "Total"))%>%
filter(!str_starts(Cities, "\\("))%>%
filter(Cities != "Australia")%>%
filter(!str_starts(Cities, "\\©"))
ausmarraige1 <- ausmarraige1 %>%
mutate(Divisions = case_when(
str_ends(Cities, "Divisions") ~ Cities
))
ausmarraige2 <- ausmarraige1[, c("Divisions", "Cities", "Response_Clear_Yes", "Response_Clear_No", "Response_Not_Clear", "Non_Response")]
head(ausmarraige2)
# A tibble: 6 x 6
Divisions Cities Response_Clear_~ Response_Clear_~ Response_Not_Cl~
<chr> <chr> <dbl> <dbl> <dbl>
1 New South~ New S~ NA NA NA
2 <NA> Banks 37736 46343 247
3 <NA> Barton 37153 47984 226
4 <NA> Benne~ 42943 43215 244
5 <NA> Berow~ 48471 40369 212
6 <NA> Blaxl~ 20406 57926 220
# ... with 1 more variable: Non_Response <dbl>
summary(ausmarraige2)
Divisions Cities Response_Clear_Yes
Length:158 Length:158 Min. :19026
Class :character Class :character 1st Qu.:44620
Mode :character Mode :character Median :51783
Mean :52115
3rd Qu.:59879
Max. :89590
NA's :8
Response_Clear_No Response_Not_Clear Non_Response
Min. :14860 Min. :106.0 Min. :13092
1st Qu.:28452 1st Qu.:207.2 1st Qu.:18951
Median :31654 Median :240.0 Median :21417
Mean :32493 Mean :244.6 Mean :21855
3rd Qu.:36727 3rd Qu.:276.0 3rd Qu.:24513
Max. :57926 Max. :377.0 Max. :35841
NA's :8 NA's :8 NA's :8
# A tibble: 158 x 6
Divisions Cities Response_Clear_~ Response_Clear_~ Response_Not_Cl~
<chr> <chr> <dbl> <dbl> <dbl>
1 <NA> Canberra(d) 89590 31361 281
2 <NA> Fenner(e) 85869 30159 253
3 <NA> Melbourne 81287 15839 182
4 <NA> Sydney 76144 14860 146
5 <NA> McEwen 73705 39007 377
6 <NA> Grayndler 73208 18429 136
7 <NA> Brisbane 72812 18762 159
8 <NA> Newcastle 71158 23999 232
9 <NA> Melbourne Ports 70589 15523 198
10 <NA> Higgins 70059 19375 180
# ... with 148 more rows, and 1 more variable: Non_Response <dbl>
Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".
For attribution, please cite this work as
Georges (2021, Oct. 5). DACSS 601 Fall 2021: Homework 3: Basic Data Wrangling. Retrieved from https://mrolfe.github.io/DACSS601Fall21/posts/2021-10-05-hw3-blog-georges/
BibTeX citation
@misc{georges2021homework, author = {Georges, Megan}, title = {DACSS 601 Fall 2021: Homework 3: Basic Data Wrangling}, url = {https://mrolfe.github.io/DACSS601Fall21/posts/2021-10-05-hw3-blog-georges/}, year = {2021} }