Sasi Tansaraviput HW2

Read in my own dataset, explain the variables and perform 2 data-wrangling operations

Sasi Tansaraviput
2022-05-10

Read in my own dataset and clean it

TPA <- read_excel("TPA Fall 2021.xlsx")
View(TPA)


TPA <-  TPA %>% drop_na(Batch)
TPA <- data.table(TPA[,c(1:5,10:16,19)])
colnames(TPA) <- c('Product','Brand','Type','Protein','Component','Hardness','Adhesiveness','Resilence','Cohesion','Springiness','Gumminess','Chewiness','Diameter')

#fill na
TPA <- LOCF(TPA)
#delete Throw out
TPA <- subset(TPA, Diameter!="THROW OUT")


#throw out wrong data
TPA <- subset(TPA, Product!= "FIELD ROAST GARLIC & FENNEL")

#count sampling
TPA <- TPA %>% group_by(Product) %>% dplyr::mutate(Sampling = 1:n())
TPA <- TPA[,c("Product","Brand","Type","Protein","Component","Sampling","Hardness","Adhesiveness","Resilence","Cohesion","Springiness","Gumminess","Chewiness")]

Explain the variables

str(TPA)
grouped_df [165 x 13] (S3: grouped_df/tbl_df/tbl/data.frame)
 $ Product     : chr [1:165] "BIGY SWEET ITALIAN SAUSAGE" "BIGY SWEET ITALIAN SAUSAGE" "BIGY SWEET ITALIAN SAUSAGE" "BIGY SWEET ITALIAN SAUSAGE" ...
 $ Brand       : chr [1:165] "BIGY" "BIGY" "BIGY" "BIGY" ...
 $ Type        : chr [1:165] "IT" "IT" "IT" "IT" ...
 $ Protein     : chr [1:165] "M" "M" "M" "M" ...
 $ Component   : chr [1:165] "P" "P" "P" "P" ...
 $ Sampling    : int [1:165] 1 2 3 4 5 6 1 2 3 4 ...
 $ Hardness    : num [1:165] 12035.8 12318.1 11075.6 26.5 9164.7 ...
 $ Adhesiveness: num [1:165] -11.035 -10.095 -8.275 -1.74 -0.159 ...
 $ Resilence   : num [1:165] 30.8 22.3 21.7 23.7 32.8 ...
 $ Cohesion    : num [1:165] 0.588 0.469 0.464 0.503 0.624 0.494 0.399 0.439 0.422 0.291 ...
 $ Springiness : num [1:165] 88.6 81.1 84.1 85.8 88.1 ...
 $ Gumminess   : num [1:165] 7083.1 5782.7 5141 13.3 5721.1 ...
 $ Chewiness   : num [1:165] 6277.3 4689.9 4322.6 11.4 5042 ...
 - attr(*, "groups")= tibble [27 x 2] (S3: tbl_df/tbl/data.frame)
  ..$ Product: chr [1:27] "ALL NATULRAL SWEET ITALIAN SAUSAGE" "BallPark Beef Franks" "BAR S FRANKS" "BEYOND MEAT HOPT ITALIAN" ...
  ..$ .rows  : list<int> [1:27] 
  .. ..$ : int [1:5] 7 8 9 10 11
  .. ..$ : int [1:9] 48 49 50 51 52 53 54 55 56
  .. ..$ : int [1:6] 88 89 90 91 92 93
  .. ..$ : int [1:6] 154 155 156 157 158 159
  .. ..$ : int [1:6] 18 19 20 21 22 23
  .. ..$ : int [1:6] 1 2 3 4 5 6
  .. ..$ : int [1:6] 30 31 32 33 34 35
  .. ..$ : int [1:6] 142 143 144 145 146 147
  .. ..$ : int [1:6] 24 25 26 27 28 29
  .. ..$ : int [1:6] 76 77 78 79 80 81
  .. ..$ : int [1:6] 94 95 96 97 98 99
  .. ..$ : int [1:6] 100 101 102 103 104 105
  .. ..$ : int [1:6] 42 43 44 45 46 47
  .. ..$ : int [1:6] 136 137 138 139 140 141
  .. ..$ : int [1:6] 160 161 162 163 164 165
  .. ..$ : int [1:6] 36 37 38 39 40 41
  .. ..$ : int [1:6] 124 125 126 127 128 129
  .. ..$ : int [1:6] 82 83 84 85 86 87
  .. ..$ : int [1:6] 148 149 150 151 152 153
  .. ..$ : int [1:6] 106 107 108 109 110 111
  .. ..$ : int [1:6] 70 71 72 73 74 75
  .. ..$ : int [1:6] 130 131 132 133 134 135
  .. ..$ : int [1:6] 57 58 59 60 61 62
  .. ..$ : int [1:6] 118 119 120 121 122 123
  .. ..$ : int [1:6] 112 113 114 115 116 117
  .. ..$ : int [1:7] 63 64 65 66 67 68 69
  .. ..$ : int [1:6] 12 13 14 15 16 17
  .. ..@ ptype: int(0) 
  ..- attr(*, ".drop")= logi TRUE

TPA data has 13 variables. Product (Variable 1), Brand (Variable 2), Type (Variable 3), Protein (Variable 4), and Component (Variable 5) variables are characters type while Sampling (Variable 6) is integer. The other variables, i.e. Hardness (Variable 7), Adhesiveness (Variable 8), Resilence (Variable 9), Cohesion (Variable 10), Springiness (Variable 11), Gumminess (Variable 12), Chewiness (Variable 13) are numeric.

Data-wrangling operations

TPA %>%
  filter(Type == "HD") %>%
  arrange(Product)
# A tibble: 76 x 13
# Groups:   Product [12]
   Product             Brand Type  Protein Component Sampling Hardness
   <chr>               <chr> <chr> <chr>   <chr>        <int>    <dbl>
 1 BallPark Beef Fran~ Ball~ HD    M       B                1     29.5
 2 BallPark Beef Fran~ Ball~ HD    M       B                2   3820. 
 3 BallPark Beef Fran~ Ball~ HD    M       B                3     27.5
 4 BallPark Beef Fran~ Ball~ HD    M       B                4     28.4
 5 BallPark Beef Fran~ Ball~ HD    M       B                5   4522. 
 6 BallPark Beef Fran~ Ball~ HD    M       B                6   3653. 
 7 BallPark Beef Fran~ Ball~ HD    M       B                7   3594. 
 8 BallPark Beef Fran~ Ball~ HD    M       B                8     27.7
 9 BallPark Beef Fran~ Ball~ HD    M       B                9     28.0
10 BAR S FRANKS        Bar S HD    M       CP               1   2886. 
# ... with 66 more rows, and 6 more variables: Adhesiveness <dbl>,
#   Resilence <dbl>, Cohesion <dbl>, Springiness <dbl>,
#   Gumminess <dbl>, Chewiness <dbl>

Reuse

Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".

Citation

For attribution, please cite this work as

Tansaraviput (2022, May 11). Data Analytics and Computational Social Science: Sasi Tansaraviput HW2. Retrieved from https://github.com/DACSS/dacss_course_website/posts/httprpubscomsnoutsnake900806/

BibTeX citation

@misc{tansaraviput2022sasi,
  author = {Tansaraviput, Sasi},
  title = {Data Analytics and Computational Social Science: Sasi Tansaraviput HW2},
  url = {https://github.com/DACSS/dacss_course_website/posts/httprpubscomsnoutsnake900806/},
  year = {2022}
}