DACSS 601 HW2

Reading in Data

Snehal Prabhu
2/2/2022

Railroad Data

library(readr)
library(tidyverse)
data <- read_csv("601/railroad_2012_clean_state.csv")
colnames(data)
[1] "state"           "total_employees"
head(data)
# A tibble: 6 x 2
  state total_employees
  <chr>           <dbl>
1 AE                  2
2 AK                103
3 AL               4257
4 AP                  1
5 AR               3871
6 AZ               3153

Data wrangling Operation

# filter data with the number of employees and then arrange in decreasing order
filter(data,total_employees>10000) %>%
  arrange(desc(total_employees)) 
# A tibble: 6 x 2
  state total_employees
  <chr>           <dbl>
1 TX              19839
2 IL              19131
3 NY              17050
4 NE              13176
5 CA              13137
6 PA              12769

Reading unclean excel data

library(readxl)
exceldata <- read_excel("601/StateCounty2012.xls", skip=3)
colnames(exceldata)
[1] "STATE"  "...2"   "COUNTY" "...4"   "TOTAL" 
head(exceldata)
# A tibble: 6 x 5
  STATE     ...2  COUNTY               ...4  TOTAL
  <chr>     <lgl> <chr>                <lgl> <dbl>
1 AE        NA    APO                  NA        2
2 AE Total1 NA    <NA>                 NA        2
3 AK        NA    ANCHORAGE            NA        7
4 AK        NA    FAIRBANKS NORTH STAR NA        2
5 AK        NA    JUNEAU               NA        3
6 AK        NA    MATANUSKA-SUSITNA    NA        2

Data wrangling Operation

library(tidyr)
exceldata <- exceldata %>%
  select("STATE","COUNTY","TOTAL") %>%
  drop_na("COUNTY")
head(exceldata)
# A tibble: 6 x 3
  STATE COUNTY               TOTAL
  <chr> <chr>                <dbl>
1 AE    APO                      2
2 AK    ANCHORAGE                7
3 AK    FAIRBANKS NORTH STAR     2
4 AK    JUNEAU                   3
5 AK    MATANUSKA-SUSITNA        2
6 AK    SITKA                    1
aggregate(x=exceldata$TOTAL, by=list(exceldata$STATE), FUN=sum)
   Group.1     x
1       AE     2
2       AK   103
3       AL  4257
4       AP     1
5       AR  3871
6       AZ  3153
7       CA 13137
8       CO  3650
9       CT  2592
10      DC   279
11      DE  1495
12      FL  7419
13      GA  8605
14      HI     4
15      IA  4019
16      ID  1563
17      IL 19131
18      IN  8537
19      KS  6092
20      KY  4811
21      LA  3915
22      MA  3379
23      MD  4709
24      ME   654
25      MI  3932
26      MN  5467
27      MO  8419
28      MS  2111
29      MT  3327
30      NC  3143
31      ND  2204
32      NE 13176
33      NH   393
34      NJ  8329
35      NM  1958
36      NV   746
37      NY 17050
38      OH  9056
39      OK  2318
40      OR  2322
41      PA 12769
42      RI   487
43      SC  2296
44      SD   949
45      TN  4952
46      TX 19839
47      UT  1917
48      VA  7551
49      VT   259
50      WA  5222
51      WI  3773
52      WV  3213
53      WY  2876

Reuse

Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".

Citation

For attribution, please cite this work as

Prabhu (2022, Feb. 9). Data Analytics and Computational Social Science: DACSS 601 HW2. Retrieved from https://github.com/DACSS/dacss_course_website/posts/httprpubscomsnehalhw2/

BibTeX citation

@misc{prabhu2022dacss,
  author = {Prabhu, Snehal},
  title = {Data Analytics and Computational Social Science: DACSS 601 HW2},
  url = {https://github.com/DACSS/dacss_course_website/posts/httprpubscomsnehalhw2/},
  year = {2022}
}