2004-2013 USDA Certified Organic Egg and Chicken Prices

challenge_3

Michele Carlin

organiceggpoultry.xls

Author

Michele Carlin

Published

March 4, 2023

Code

library(tidyverse)
library(summarytools)
library(dbplyr)
library(readxl)

knitr::opts_chunk$set(echo = TRUE)

Read in and view summary of ‘organic egg poultry’ dataset

Code

sheet_names <- excel_sheets("_data/organiceggpoultry.xls")
sheet_names

[1] "Data"                            "Organic egg prices, 2004-13"    
[3] "Organic poultry prices, 2004-13"

Code

organic_data<-read_xls("_data/organiceggpoultry.xls", skip = 5,            col_names= c("date", "xl_dozen", "xl_half", "lrg_dozen", "lrg_half", "delete", "chkn_whole", "chkn_boneless", "chkn_bonein", "chkn_legs", "chkn_thighs"))%>%
  select(!contains("delete"))
View(organic_data)
view(dfSummary(organic_data))

Switching method to 'browser'

Output file written: C:\Users\CarlinML\AppData\Local\Temp\RtmpieAYA6\file74785eb01180.html

This dataset contains 120 rows and 10 columns. Each row is for a particular month and year, and contains average prices by carton for 4 different ‘egg’ categories (e.g., XL dozen, XL half dozen, etc.) as well as the average price per pound for 5 different ‘chicken’ categories (e.g., whole, boneless, thighs, etc.). In order to have each row be one observation, we will need to use ‘pivot long’ however before doing so, we need to tidy up some of the variables based on findings in Data Frame Summary (e.g., remove extra characters in ‘date’ field).

Number of unique values in the variable ‘date’

Code

table(select(organic_data, date))

date
      April      August    December    February February /1    Jan 2004 
         10          10          10           8           2           1 
   Jan 2005    Jan 2006    Jan 2007    Jan 2008    Jan 2009    Jan 2010 
          1           1           1           1           1           1 
   Jan 2011    Jan 2012    Jan 2013        July        June       March 
          1           1           1          10          10          10 
        May    November     October   September 
         10          10          10          10

Separate date variable into month and year, remove ‘/1’ from date variable and then fill ‘year’ variable.

Code

organic_data <- organic_data %>% 
  mutate(date = str_remove(date, " /1")) %>%
  separate(col=date, into=c('month', 'year'), sep=' ') %>%
    fill(year)

Warning: Expected 2 pieces. Missing pieces filled with `NA` in 110 rows [2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, ...].

Code

 select(organic_data, month, year)

# A tibble: 120 × 2
   month     year 
   <chr>     <chr>
 1 Jan       2004 
 2 February  2004 
 3 March     2004 
 4 April     2004 
 5 May       2004 
 6 June      2004 
 7 July      2004 
 8 August    2004 
 9 September 2004 
10 October   2004 
# … with 110 more rows

Pivot longer so that each row contains one observation.

Code

organic_data_long<-pivot_longer(organic_data, col = c(xl_dozen, xl_half, lrg_dozen, lrg_half, chkn_whole, chkn_boneless, chkn_bonein, chkn_legs, chkn_thighs),
                 names_to="item",
                 values_to = "value")

Error in `pivot_longer()`:
! Can't combine `xl_dozen` <double> and `chkn_bonein` <character>.

Couldn’t pivot because some variables are numeric and others are string; filter out rows that have a value of ‘too few’ in chkn_bonein and chkn_thighs; check number of rows before and after.

Code

nrow(organic_data)

[1] 120

Code

organic_data <- organic_data %>%
  filter(chkn_bonein != 'too few' & chkn_thighs != 'too few')
nrow(organic_data)

[1] 114

Code

organic_data <- organic_data %>%
  mutate(chkn_bonein = as.numeric(chkn_bonein)) %>%
  mutate(chkn_thighs = as.numeric(chkn_thighs))
str(organic_data)

tibble [114 × 11] (S3: tbl_df/tbl/data.frame)
 $ month        : chr [1:114] "July" "August" "September" "October" ...
 $ year         : chr [1:114] "2004" "2004" "2004" "2004" ...
 $ xl_dozen     : num [1:114] 241 241 241 241 241 241 241 241 241 241 ...
 $ xl_half      : num [1:114] 137 137 136 136 136 ...
 $ lrg_dozen    : num [1:114] 234 234 234 234 234 ...
 $ lrg_half     : num [1:114] 134 134 130 128 128 ...
 $ chkn_whole   : num [1:114] 217 217 217 217 217 217 217 217 217 217 ...
 $ chkn_boneless: num [1:114] 642 642 642 642 642 ...
 $ chkn_bonein  : num [1:114] 390 390 390 390 390 ...
 $ chkn_legs    : num [1:114] 204 204 204 204 204 ...
 $ chkn_thighs  : num [1:114] 200 200 200 200 200 ...

Try ‘pivot longer’ now that all variables are numeric.

Code

organic_data_long <- pivot_longer(organic_data, col = c(xl_dozen, xl_half, lrg_dozen, lrg_half, chkn_whole, chkn_boneless, chkn_bonein, chkn_legs, chkn_thighs),
                 names_to="item",
                 values_to = "value")
organic_data_long

# A tibble: 1,026 × 4
   month  year  item          value
   <chr>  <chr> <chr>         <dbl>
 1 July   2004  xl_dozen       241 
 2 July   2004  xl_half        137 
 3 July   2004  lrg_dozen      234.
 4 July   2004  lrg_half       134.
 5 July   2004  chkn_whole     217 
 6 July   2004  chkn_boneless  642.
 7 July   2004  chkn_bonein    390.
 8 July   2004  chkn_legs      204.
 9 July   2004  chkn_thighs    200.
10 August 2004  xl_dozen       241 
# … with 1,016 more rows

Code

view(dfSummary(organic_data_long))

Switching method to 'browser'

Output file written: C:\Users\CarlinML\AppData\Local\Temp\RtmpieAYA6\file74786f4f5841.html

Summarize data - create new ‘item_type’ variable to group by.

Code

organic_data_long <- organic_data_long %>%
 mutate(item_type = case_when(startsWith(item, "chkn") ~ "Chicken", TRUE ~ "Eggs"))
view(dfSummary(organic_data_long))

Switching method to 'browser'

Output file written: C:\Users\CarlinML\AppData\Local\Temp\RtmpieAYA6\file74782bef5507.html

Code

organic_data_long %>%
group_by(item_type, item) %>%
  summarise(Avg_Cost = mean(value),
            Min_Cost = min(value),
            Max_Cost = max(value))

`summarise()` has grouped output by 'item_type'. You can override using the
`.groups` argument.

# A tibble: 9 × 5
# Groups:   item_type [2]
  item_type item          Avg_Cost Min_Cost Max_Cost
  <chr>     <chr>            <dbl>    <dbl>    <dbl>
1 Chicken   chkn_bonein       390.     390.     390.
2 Chicken   chkn_boneless     656.     638.     704.
3 Chicken   chkn_legs         204.     204.     204.
4 Chicken   chkn_thighs       218.     200.     222 
5 Chicken   chkn_whole        232.     217      248 
6 Eggs      lrg_dozen         256.     234.     278.
7 Eggs      lrg_half          156.     128.     178 
8 Eggs      xl_dozen          269.     241      290 
9 Eggs      xl_half           166.     136.     188.