Sasi Tansaraviput HW5

Modifying previous homework and answering questions

Sasi Tansaraviput
2022-05-10

Research questions

Read in dataset

library(readxl)
TPA <- read_excel("TPA_all.xlsx")
View(TPA)

#delete diameter
TPA <- TPA[,c("Product","Brand","Type","Protein","Component","Sampling","Hardness","Adhesiveness","Resilence","Cohesion","Springiness","Gumminess","Chewiness")]

#change type of sausages
TPA <- TPA %>% dplyr::mutate(Type = ifelse(as.character(Type) != "HD","IT",as.character(Type)))

Compute descriptive statistics

Mean, median, and standard deviations of each hotdog and sausage

TPA_mean <- TPA %>% group_by(Product,Type,Protein,Component) %>% summarise_at(vars(Hardness:Chewiness), list(mean = mean))
#round to 3 digits
TPA_mean <- mutate_if(TPA_mean , is.numeric, round, 3)

TPA_med <- TPA %>% group_by(Product,Type,Protein,Component) %>% summarise_at(vars(Hardness:Chewiness), list(med = median))
#round to 3 digits
TPA_med <- mutate_if(TPA_med , is.numeric, round, 3)
View(TPA_med)

TPA_sd <- TPA %>% group_by(Product,Type,Protein,Component) %>% summarise_at(vars(Hardness:Chewiness), list(sd = sd))
#round to 3 digits
TPA_sd <- mutate_if(TPA_sd , is.numeric, round, 3)
View(TPA_sd)

Statistical Analysis of the data and visualizations

ANOVA and Tukey’s post hoc test

#For hotdogs
HD <- TPA_mean %>%
  filter(Type == "HD")

HD_M <- HD %>%
  filter(Protein == "M")
  
  
#Hardness
A_M_Hardness_HD <- aov(Hardness_mean ~ Component, data = HD_M)
summary(A_M_Hardness_HD)
            Df  Sum Sq Mean Sq F value Pr(>F)
Component    3 3747142 1249047   0.483  0.728
Residuals    2 5168328 2584164               
#Adhesiveness
A_M_Adhesiveness_HD <- aov(Adhesiveness_mean ~ Component, data = HD_M)
summary(A_M_Adhesiveness_HD)
            Df Sum Sq Mean Sq F value Pr(>F)
Component    3  570.4   190.1   1.375  0.447
Residuals    2  276.5   138.3               
#Resilence
A_M_Resilence_HD <- aov(Resilence_mean ~ Component, data = HD_M)
summary(A_M_Resilence_HD)
            Df Sum Sq Mean Sq F value Pr(>F)
Component    3  35.03   11.68   1.154  0.495
Residuals    2  20.24   10.12               
#Cohesion
A_M_Cohesion_HD <- aov(Cohesion_mean ~ Component, data = HD_M)
summary(A_M_Cohesion_HD)
            Df   Sum Sq  Mean Sq F value Pr(>F)
Component    3 0.016121 0.005374   4.376  0.192
Residuals    2 0.002456 0.001228               
#Springiness
A_M_Springiness_HD <- aov(Springiness_mean ~ Component, data = HD_M)
summary(A_M_Springiness_HD)
            Df Sum Sq Mean Sq F value Pr(>F)  
Component    3  37.68  12.561   23.96 0.0403 *
Residuals    2   1.05   0.524                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Gumminess
A_M_Gumminess_HD <- aov(Gumminess_mean ~ Component, data = HD_M)
summary(A_M_Gumminess_HD)
            Df  Sum Sq Mean Sq F value Pr(>F)
Component    3  705663  235221   0.173  0.906
Residuals    2 2716263 1358132               
#Chewiness
A_M_Chewiness_HD <- aov(Chewiness_mean ~ Component, data = HD_M)
summary(A_M_Chewiness_HD)
            Df  Sum Sq Mean Sq F value Pr(>F)
Component    3  383833  127944   0.126  0.937
Residuals    2 2032661 1016331               
TukeyHSD(A_M_Springiness_HD)
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = Springiness_mean ~ Component, data = HD_M)

$Component
           diff         lwr       upr     p adj
BP-B   2.804667 -2.98819995  8.597533 0.1871227
CP-B   5.373667 -0.41919995 11.166533 0.0577043
TCP-B  5.780667 -0.01219995 11.573533 0.0502022
CP-BP  2.569000 -4.52578368  9.663784 0.2966390
TCP-BP 2.976000 -4.11878368 10.070784 0.2365905
TCP-CP 0.407000 -6.68778368  7.501784 0.9739568
TK_M_Springiness_HD <- TukeyHSD(A_M_Springiness_HD)
plot(TK_M_Springiness_HD, las = 1)

After separate the hotdogs from Italian sausage, the analysis of variance (ANOVA) was used to determine whether the plant-based hotdogs should be compared with individual component of meat-based hotdogs. From the ANOVA result, the only attribute that is significantly different at 95% confidence interval for meat-based hotdogs is springiness. The Tukey’s post hoc test was then conducted to determine how each component is different. Only the Turkey-Chicken-Pork meat blended and Beef is significantly different with each other; however, it is narrowly different. Thus, it was determined that plant-based hotdogs should be compared with overall meat-based hotdogs.

#For Italian sausages
IT <- TPA_mean %>%
  filter(Type == "IT")

IT_M <- IT %>%
  filter(Protein == "M")
  
  
#Hardness
A_M_Hardness_IT <- aov(Hardness_mean ~ Component, data = IT_M)
summary(A_M_Hardness_IT)
            Df   Sum Sq Mean Sq F value Pr(>F)
Component    1   792104  792104    0.29   0.61
Residuals    6 16409919 2734986               
#Adhesiveness
A_M_Adhesiveness_IT <- aov(Adhesiveness_mean ~ Component, data = IT_M)
summary(A_M_Adhesiveness_IT)
            Df Sum Sq Mean Sq F value Pr(>F)
Component    1     12    12.0   0.026  0.877
Residuals    6   2750   458.4               
#Resilence
A_M_Resilence_IT <- aov(Resilence_mean ~ Component, data = IT_M)
summary(A_M_Resilence_IT)
            Df Sum Sq Mean Sq F value Pr(>F)
Component    1  49.31   49.31   1.493  0.268
Residuals    6 198.14   33.02               
#Cohesion
A_M_Cohesion_IT <- aov(Cohesion_mean ~ Component, data = IT_M)
summary(A_M_Cohesion_IT)
            Df  Sum Sq  Mean Sq F value Pr(>F)
Component    1 0.01594 0.015939   2.045  0.203
Residuals    6 0.04677 0.007795               
#Springiness
A_M_Springiness_IT <- aov(Springiness_mean ~ Component, data = IT_M)
summary(A_M_Springiness_IT)
            Df Sum Sq Mean Sq F value Pr(>F)  
Component    1  8.314   8.314   4.674 0.0739 .
Residuals    6 10.672   1.779                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Gumminess
A_M_Gumminess_IT <- aov(Gumminess_mean ~ Component, data = IT_M)
summary(A_M_Gumminess_IT)
            Df   Sum Sq Mean Sq F value Pr(>F)
Component    1  2870804 2870804   1.396  0.282
Residuals    6 12335681 2055947               
#Chewiness
A_M_Chewiness_IT <- aov(Chewiness_mean ~ Component, data = IT_M)
summary(A_M_Chewiness_IT)
            Df  Sum Sq Mean Sq F value Pr(>F)
Component    1 1685890 1685890   1.186  0.318
Residuals    6 8529505 1421584               

As for the Italian sausages, there is no significant different at 95% confidence interval for any of the attributes for meat-based sausages, which means that plant-based sausages should also be compared with overall meat-based sausages.

#For hotdogs
HD <- HD %>% dplyr::mutate(Component = ifelse(as.character(Component) != "PP" & as.character(Component) != "SP"& as.character(Component) != "VWG","M",as.character(Component)))
HD <- HD %>% arrange(Protein)
HD$Product = c('M1','M2','M3','M4','M5','M6','PB1','PB2','PB3','PB4','PB5','PB6')


#For Italian sausages
IT <- IT %>% dplyr::mutate(Component = ifelse(as.character(Component) != "PP" & as.character(Component) != "SP"& as.character(Component) != "VWG","M",as.character(Component)))
IT <- IT %>% arrange(Protein)
IT$Product = c('M1','M2','M3','M4','M5','M6','M7','M8','PB1','PB2','PB3','PB4','PB5','PB6','PB7')

Analysis of variance between individual plant-based hotdogs’ components and overall meat hotdog

#Anova for HD
#Hardness
A_HD_hardness <- aov(Hardness_mean ~ Component, data = HD)
summary(A_HD_hardness)
            Df   Sum Sq Mean Sq F value Pr(>F)
Component    3  7946174 2648725   1.593  0.266
Residuals    8 13301120 1662640               
#Adhesiveness
A_HD_Adhesiveness <- aov(Adhesiveness_mean ~ Component, data = HD)
summary(A_HD_Adhesiveness)
            Df Sum Sq Mean Sq F value Pr(>F)  
Component    3   4931  1643.5   5.351 0.0258 *
Residuals    8   2457   307.1                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Resilence
A_HD_Resilence <- aov(Resilence_mean ~ Component, data = HD)
summary(A_HD_Resilence)
            Df Sum Sq Mean Sq F value Pr(>F)  
Component    3  174.4   58.13    3.18 0.0848 .
Residuals    8  146.2   18.28                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Cohesion
A_HD_Cohesion <- aov(Cohesion_mean ~ Component, data = HD)
summary(A_HD_Cohesion)
            Df  Sum Sq  Mean Sq F value Pr(>F)
Component    3 0.01177 0.003922    1.16  0.383
Residuals    8 0.02704 0.003380               
#Springiness
A_HD_Springiness <- aov(Springiness_mean ~ Component, data = HD)
summary(A_HD_Springiness)
            Df Sum Sq Mean Sq F value Pr(>F)  
Component    3 137.72   45.91   3.961  0.053 .
Residuals    8  92.71   11.59                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Gumminess
A_HD_Gumminess <- aov(Gumminess_mean ~ Component, data = HD)
summary(A_HD_Gumminess)
            Df  Sum Sq Mean Sq F value Pr(>F)
Component    3 2866681  955560   1.273  0.348
Residuals    8 6006044  750755               
#Chewiness
A_HD_Chewiness <- aov(Chewiness_mean ~ Component, data = HD)
summary(A_HD_Chewiness)
            Df  Sum Sq Mean Sq F value Pr(>F)
Component    3 3001488 1000496    1.68  0.248
Residuals    8 4765487  595686               
#Tukey for HD
TK_HD_Adhesiveness <- TukeyHSD(A_HD_Adhesiveness)
plot(TK_HD_Adhesiveness, las = 1)

ANOVA was conducted to compare each attributes between individual plant based hotdogs’ component and overall meat hotdogs. Only adhesiveness of the hotdogs is significantly different at 95% confidence interval. The Tukey’s post hoc test was then conducted to determine the different. From the Tukey’s plot of the adhesiveness, it can be determined that only soy protein component hotdogs are significantly different from meat hotdogs. This means that pea protein and vital wheat gluten are better than soy protein in imitating the adhesiveness of meat hotdogs.

Analysis of variance between individual plant-based sausages’ components and overall meat sausage

#Anova for IT
#Hardness
A_IT_hardness <- aov(Hardness_mean ~ Component, data = IT)
summary(A_IT_hardness)
            Df    Sum Sq  Mean Sq F value   Pr(>F)    
Component    3 123755140 41251713   14.62 0.000374 ***
Residuals   11  31045055  2822278                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Adhesiveness
A_IT_Adhesiveness <- aov(Adhesiveness_mean ~ Component, data = IT)
summary(A_IT_Adhesiveness)
            Df Sum Sq Mean Sq F value Pr(>F)
Component    3   1887   628.9   2.048  0.166
Residuals   11   3378   307.1               
#Resilence
A_IT_Resilence <- aov(Resilence_mean ~ Component, data = IT)
summary(A_IT_Resilence)
            Df Sum Sq Mean Sq F value Pr(>F)  
Component    3  487.3  162.45   5.241 0.0173 *
Residuals   11  340.9   30.99                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Cohesion
A_IT_Cohesion <- aov(Cohesion_mean ~ Component, data = IT)
summary(A_IT_Cohesion)
            Df Sum Sq Mean Sq F value Pr(>F)  
Component    3 0.1265 0.04217   6.104 0.0106 *
Residuals   11 0.0760 0.00691                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Springiness
A_IT_Springiness <- aov(Springiness_mean ~ Component, data = IT)
summary(A_IT_Springiness)
            Df Sum Sq Mean Sq F value   Pr(>F)    
Component    3  835.6  278.52   17.32 0.000177 ***
Residuals   11  176.8   16.08                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Gumminess
A_IT_Gumminess <- aov(Gumminess_mean ~ Component, data = IT)
summary(A_IT_Gumminess)
            Df   Sum Sq  Mean Sq F value Pr(>F)  
Component    3 30116483 10038828   4.441 0.0282 *
Residuals   11 24865014  2260456                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Chewiness
A_IT_Chewiness <- aov(Chewiness_mean ~ Component, data = IT)
summary(A_IT_Chewiness)
            Df   Sum Sq Mean Sq F value Pr(>F)  
Component    3 24502666 8167555   4.693  0.024 *
Residuals   11 19144592 1740417                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Tukey for IT
TK_IT_hardness <- TukeyHSD(A_IT_hardness)
plot(TK_IT_hardness, las = 1)
TK_IT_Resilence <- TukeyHSD(A_IT_Resilence)
plot(TK_IT_Resilence, las = 1)
TK_IT_Cohesion <- TukeyHSD(A_IT_Cohesion)
plot(TK_IT_Cohesion, las = 1)
TK_IT_Springiness <- TukeyHSD(A_IT_Springiness)
plot(TK_IT_Springiness, las = 1)
TK_IT_Gumminess <- TukeyHSD(A_IT_Gumminess)
plot(TK_IT_Gumminess, las = 1)
TK_IT_Chewiness <- TukeyHSD(A_IT_Chewiness)
plot(TK_IT_Chewiness, las = 1)

As for the Italian sausages, the only attribute that is not significantly different is adhesiveness. * Hardness The soy protein-based sausages’ hardness is not significantly different from meat sausages while vital wheat gluten-based sausages are not significantly different from pea protein and soy protein sausages. Additionally, while soy protein and pea protein sausages are significantly different in hardness with each other, the result is almost on the fence. It can be inferred from this post-hoc test that soy protein is appropriate for accurately imitating meat sausages in hardness.

In conclusion based on the Anova test, to make the accurate imitation of conventional meat sausages, the best protein to use as a main component of Italian sausages is soy protein as it can accurately imitate the all texture attributes of meat-based sausages while the vital wheat gluten can only imitate the resilience, springiness, gumminess, and chewiness of the conventional sausages. Additionally, only texture attributes of soy protein that similar to meat sausages are resilience and cohesiveness.

Principal component analysis

These are visualization of every texture attributes, i.e. Hardness, adhesiveness, resilience, cohesiveness, springiness, gumminess, and chewiness, and how every hotdogs and sausages are influenced by those attributes, and how are they similar or different to each other, which can be used to determine the best commercially used plant protein to imitate the meat hotdogs and sausages.

Principal component analysis of hotdogs

PCA_HD <- prcomp(HD[,c(5:11)], center = TRUE, scale. = TRUE)
ggbiplot(PCA_HD, choices = c(1,2), obs.scale = 1, var.scale = 1, ellipse = TRUE, labels = HD$Product, groups = HD$Component, varname.adjust = 1, varname.size = 2.5)+ scale_color_discrete(name = 'Protein Component') + theme_classic() + theme(legend.direction = 'horizontal', legend.position = 'bottom') 

Principal component analysis of Italian sausages

PCA_IT <- prcomp(IT[,c(5:11)], center = TRUE, scale. = TRUE)
ggbiplot(PCA_IT, obs.scale = 1, var.scale = 1.8, ellipse = TRUE, labels = IT$Product, groups = IT$Component, varname.adjust = 1, varname.size = 2.5)+ scale_color_discrete(name = 'Protein Component') + theme_classic() + theme(legend.direction = 'horizontal', legend.position = 'bottom')

Answering questions

Reuse

Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".

Citation

For attribution, please cite this work as

Tansaraviput (2022, May 11). Data Analytics and Computational Social Science: Sasi Tansaraviput HW5. Retrieved from https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomsnoutsnake900936/

BibTeX citation

@misc{tansaraviput2022sasi,
  author = {Tansaraviput, Sasi},
  title = {Data Analytics and Computational Social Science: Sasi Tansaraviput HW5},
  url = {https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomsnoutsnake900936/},
  year = {2022}
}