library(tidyverse)
library(ggplot2)
library(lubridate)
::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE) knitr
Challenge 8
Joining Data
Read in data
<-read_csv("_data/birds.csv", col_names = c("Domain_Code", "Domain", "Area_Code", "Area", "Element_Code","Element", "Item_Code","Item", "Year_Code", "Year","Unit", "Value", "Flag", "Flag_Description" ), skip=1)%>%
birdsselect(-c("Year_Code"))
birds
Domain_Code <chr> | Domain <chr> | Area_Code <dbl> | Area <chr> | Element_Code <dbl> | Element <chr> | Item_Code <dbl> | Item <chr> | Year <dbl> | Unit <chr> | |
---|---|---|---|---|---|---|---|---|---|---|
QA | Live Animals | 2 | Afghanistan | 5112 | Stocks | 1057 | Chickens | 1961 | 1000 Head | |
QA | Live Animals | 2 | Afghanistan | 5112 | Stocks | 1057 | Chickens | 1962 | 1000 Head | |
QA | Live Animals | 2 | Afghanistan | 5112 | Stocks | 1057 | Chickens | 1963 | 1000 Head | |
QA | Live Animals | 2 | Afghanistan | 5112 | Stocks | 1057 | Chickens | 1964 | 1000 Head | |
QA | Live Animals | 2 | Afghanistan | 5112 | Stocks | 1057 | Chickens | 1965 | 1000 Head | |
QA | Live Animals | 2 | Afghanistan | 5112 | Stocks | 1057 | Chickens | 1966 | 1000 Head | |
QA | Live Animals | 2 | Afghanistan | 5112 | Stocks | 1057 | Chickens | 1967 | 1000 Head | |
QA | Live Animals | 2 | Afghanistan | 5112 | Stocks | 1057 | Chickens | 1968 | 1000 Head | |
QA | Live Animals | 2 | Afghanistan | 5112 | Stocks | 1057 | Chickens | 1969 | 1000 Head | |
QA | Live Animals | 2 | Afghanistan | 5112 | Stocks | 1057 | Chickens | 1970 | 1000 Head |
<- read_csv("_data/FAOSTAT_livestock.csv", col_names = c("Domain_Code", "Domain", "Area_Code", "Area", "Element_Code","Element", "Item_Code","Item", "Year_Code", "Year","Unit", "Value", "Flag", "Flag_Description" ), skip=1) %>%
livestock select(-c("Year_Code"))
livestock
Domain_Code <chr> | Domain <chr> | Area_Code <dbl> | Area <chr> | Element_Code <dbl> | Element <chr> | Item_Code <dbl> | Item <chr> | Year <dbl> | Unit <chr> | |
---|---|---|---|---|---|---|---|---|---|---|
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1961 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1962 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1963 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1964 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1965 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1966 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1967 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1968 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1969 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1970 | Head |
<- read_csv("_data/FAOSTAT_country_groups.csv",col_names = c("Country_Group_Code","Country_Group","Country_Code", "Country", "M49_Code", "ISO2_Code", "ISO3_Code"), skip=1)
c_groups c_groups
Country_Group_Code <dbl> | Country_Group <chr> | Country_Code <dbl> | Country <chr> | M49_Code <chr> | ISO2_Code <chr> | ISO3_Code <chr> |
---|---|---|---|---|---|---|
5100 | Africa | 4 | Algeria | 012 | DZ | DZA |
5100 | Africa | 7 | Angola | 024 | AO | AGO |
5100 | Africa | 53 | Benin | 204 | BJ | BEN |
5100 | Africa | 20 | Botswana | 072 | BW | BWA |
5100 | Africa | 233 | Burkina Faso | 854 | BF | BFA |
5100 | Africa | 29 | Burundi | 108 | BI | BDI |
5100 | Africa | 35 | Cabo Verde | 132 | CV | CPV |
5100 | Africa | 32 | Cameroon | 120 | CM | CMR |
5100 | Africa | 37 | Central African Republic | 140 | CF | CAF |
5100 | Africa | 39 | Chad | 148 | TD | TCD |
%>%
c_groups count(Country_Group_Code,Country_Code) %>%
filter(n > 1)
Country_Group_Code <dbl> | Country_Code <dbl> | n <int> |
---|
My final filter is what I think the Primary key is for c_groups.
Join pt1
<-birds %>%
b_sum group_by(Item)%>%
summarise(avg_stocks = mean(Value, na.rm=TRUE),
med_stocks = median(Value, na.rm=TRUE),
n_missing = sum(is.na(Value)))
<-livestock %>%
l_sum group_by(Item)%>%
summarise(avg_stocks = mean(Value, na.rm=TRUE),
med_stocks = median(Value, na.rm=TRUE),
n_missing = sum(is.na(Value)))
b_sum
Item <chr> | avg_stocks <dbl> | med_stocks <dbl> | n_missing <int> | |
---|---|---|---|---|
Chickens | 207930.808 | 10783.5 | 104 | |
Ducks | 23071.673 | 510.0 | 417 | |
Geese and guinea fowls | 10291.937 | 258.0 | 139 | |
Pigeons, other birds | 6163.375 | 2800.0 | 58 | |
Turkeys | 15227.919 | 528.0 | 318 |
l_sum
Item <chr> | avg_stocks <dbl> | med_stocks <dbl> | n_missing <int> | |
---|---|---|---|---|
Asses | 1142260 | 30000 | 139 | |
Buffaloes | 10189405 | 260000 | 185 | |
Camels | 1566357 | 231000 | 96 | |
Cattle | 22621285 | 1393394 | 166 | |
Goats | 11799739 | 435080 | 227 | |
Horses | 1264376 | 57100 | 208 | |
Mules | 468767 | 14000 | 80 | |
Pigs | 15423572 | 349039 | 112 | |
Sheep | 21454179 | 709257 | 88 |
<- full_join(b_sum,l_sum)
item_sum
item_sum
Item <chr> | avg_stocks <dbl> | med_stocks <dbl> | n_missing <int> | |
---|---|---|---|---|
Chickens | 207930.808 | 10783.5 | 104 | |
Ducks | 23071.673 | 510.0 | 417 | |
Geese and guinea fowls | 10291.937 | 258.0 | 139 | |
Pigeons, other birds | 6163.375 | 2800.0 | 58 | |
Turkeys | 15227.919 | 528.0 | 318 | |
Asses | 1142260.006 | 30000.0 | 139 | |
Buffaloes | 10189405.200 | 260000.0 | 185 | |
Camels | 1566356.858 | 231000.0 | 96 | |
Cattle | 22621285.128 | 1393393.5 | 166 | |
Goats | 11799738.637 | 435080.0 | 227 |
ggplot(item_sum, aes(x=Item, y=avg_stocks))+
geom_bar(stat="identity") +
coord_flip() +
geom_text(aes(label = round(avg_stocks), color = "red"), size=3, hjust=.6) +
theme(legend.position = "none")
Here I summarized the Birds and Livestock stocks and joined them into one table. I then created a Bar graph showing the average amount of stock for all animals.
%>%
livestock count(Area_Code,Item, Year) %>%
filter(n > 1)
Area_Code <dbl> | Item <chr> | Year <dbl> | n <int> |
---|
<- livestock %>%
livestock_new mutate(index = row_number()) %>%
select(index, everything())
<- c_groups %>%
c_groups_new mutate(index = row_number()) %>%
select(index, everything())
livestock_new
index <int> | Domain_Code <chr> | Domain <chr> | Area_Code <dbl> | Area <chr> | Element_Code <dbl> | Element <chr> | Item_Code <dbl> | Item <chr> | Year <dbl> | |
---|---|---|---|---|---|---|---|---|---|---|
1 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1961 | |
2 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1962 | |
3 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1963 | |
4 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1964 | |
5 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1965 | |
6 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1966 | |
7 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1967 | |
8 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1968 | |
9 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1969 | |
10 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1970 |
c_groups_new
index <int> | Country_Group_Code <dbl> | Country_Group <chr> | Country_Code <dbl> | Country <chr> | M49_Code <chr> | ISO2_Code <chr> | ISO3_Code <chr> |
---|---|---|---|---|---|---|---|
1 | 5100 | Africa | 4 | Algeria | 012 | DZ | DZA |
2 | 5100 | Africa | 7 | Angola | 024 | AO | AGO |
3 | 5100 | Africa | 53 | Benin | 204 | BJ | BEN |
4 | 5100 | Africa | 20 | Botswana | 072 | BW | BWA |
5 | 5100 | Africa | 233 | Burkina Faso | 854 | BF | BFA |
6 | 5100 | Africa | 29 | Burundi | 108 | BI | BDI |
7 | 5100 | Africa | 35 | Cabo Verde | 132 | CV | CPV |
8 | 5100 | Africa | 32 | Cameroon | 120 | CM | CMR |
9 | 5100 | Africa | 37 | Central African Republic | 140 | CF | CAF |
10 | 5100 | Africa | 39 | Chad | 148 | TD | TCD |
The First filter represents the possible primary key for livestock. I created a Index for livestock and c_groups so that I could join them together.
Join pt2
%>%
livestock full_join(birds)
Domain_Code <chr> | Domain <chr> | Area_Code <dbl> | Area <chr> | Element_Code <dbl> | Element <chr> | Item_Code <dbl> | Item <chr> | Year <dbl> | Unit <chr> | |
---|---|---|---|---|---|---|---|---|---|---|
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1961 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1962 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1963 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1964 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1965 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1966 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1967 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1968 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1969 | Head | |
QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1970 | Head |
%>%
livestock_new inner_join(c_groups_new)
index <int> | Domain_Code <chr> | Domain <chr> | Area_Code <dbl> | Area <chr> | Element_Code <dbl> | Element <chr> | Item_Code <dbl> | Item <chr> | Year <dbl> | |
---|---|---|---|---|---|---|---|---|---|---|
1 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1961 | |
2 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1962 | |
3 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1963 | |
4 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1964 | |
5 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1965 | |
6 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1966 | |
7 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1967 | |
8 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1968 | |
9 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1969 | |
10 | QA | Live Animals | 2 | Afghanistan | 5111 | Stocks | 1107 | Asses | 1970 |