This is HW5 using sleep data to show the effects of daily habits on sleep duration, sleep quality and heart rate during sleep
<Sleep project-how sleep is related to our daliy lives?>
The data is from Kaggle (https://www.kaggle.com/danagerous/sleep-data). Sleep was recorded from a Swedish application (iOS) from 180 subjects. 18 people are excluded due to missing information for some categories (i.e., total participants = 162).
variables:
1. What daily habits (e.g., drinking coffee) affect sleep features (quality, duration and heart rate during sleep) and sleep onset/offset time.
1.1 daily habits -> sleep features?
#Bring needed information columns
sleep_data1_1 <- sleep_data[ , c(3,4,7:11)]
###############################sleep equality#################################
# Create new data for sleep quality
set.seed(112)
new_sleep<- matrix(0,4,4)
colnames(new_sleep) <- c("90~100","80~90","70~80","<70")
rownames(new_sleep) <- c("drinking coffee","drinking tea","stress","working out")
for (y in 4:7) {
y1<-y-3
for (x in 1:162) {
if(sleep_data1_1[x,1]> 90 & sleep_data1_1[x,1] <= 100 ) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep[y1,1]=new_sleep[y1,1]+1;
}
}
else if(sleep_data1_1[x,1]> 80 & sleep_data1_1[x,1] <= 90 ) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep[y1,2]=new_sleep[y1,2]+1;
}
}
else if(sleep_data1_1[x,1]> 70 & sleep_data1_1[x,1] <= 80 ) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep[y1,3]=new_sleep[y1,3]+1;
}
}
else if(sleep_data1_1[x,1] <= 70 ) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep[y1,4]=new_sleep[y1,4]+1;
}
}
}
}
###############################Total sleep amount###############################
# Create new data for sleep quality
set.seed(112)
new_sleep1<- matrix(0,4,4)
colnames(new_sleep1) <- c(">8","7~8(typical)","5~7","<=5")
rownames(new_sleep1) <- c("drinking coffee","drinking tea","stress","working out")
for (y in 4:7) {
y1<-y-3
for (x in 1:162) {
if(sleep_data1_1[x,2]> 480) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep1[y1,1]=new_sleep1[y1,1]+1;
}
}
else if(sleep_data1_1[x,2]>=420 & sleep_data1_1[x,2] <= 480 ) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep1[y1,2]=new_sleep1[y1,2]+1;
}
}
else if(sleep_data1_1[x,2]>= 300 & sleep_data1_1[x,2] < 420 ) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep1[y1,3]=new_sleep1[y1,3]+1;
}
}
else if(sleep_data1_1[x,2] < 300 ) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep1[y1,4]=new_sleep1[y1,4]+1;
}
}
}
}
###############################Heart rate######################################
# Create new data for sleep quality
set.seed(112)
new_sleep2<- matrix(0,4,4)
colnames(new_sleep2) <- c(">80","50~80","40~50(typical)","<40")
rownames(new_sleep2) <- c("drinking coffee","drinking tea","stress","working out")
for (y in 4:7) {
y1<-y-3
for (x in 1:162) {
if(sleep_data1_1[x,3]> 80 ) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep2[y1,1]=new_sleep2[y1,1]+1;
}
}
else if(sleep_data1_1[x,3]> 50 & sleep_data1_1[x,3] <= 80 ) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep2[y1,2]=new_sleep2[y1,2]+1;
}
}
else if(sleep_data1_1[x,3]> 45 & sleep_data1_1[x,3] <= 50 ) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep2[y1,3]=new_sleep2[y1,3]+1;
}
}
else if(sleep_data1_1[x,3] <=40 ) {
if(sleep_data1_1[x,y]=="Yes") {
new_sleep2[y1,4]=new_sleep2[y1,4]+1;
}
}
}
}
#######################################plot#####################################
# Grouped barplot
barplot(new_sleep,
border="white",
font.axis=2,
beside=T,
col = 1:nrow(new_sleep),
legend.text = TRUE,
args.legend = list(x = "topright",
inset = c(- 0.001, 0)),
xlab="Sleep quality(%)",
ylab="Numbers",
font.lab=2)
# Grouped barplot
barplot(new_sleep1,
border="white",
font.axis=2,
beside=T,
col = 1:nrow(new_sleep1),
legend.text = TRUE,
args.legend = list(x = "topright",
inset = c(- 0.001, 0)),
xlab="Total sleep time(Hrs)",
ylab="Numbers",
font.lab=2)
# Grouped barplot
barplot(new_sleep2,
border="white",
font.axis=2,
beside=T,
col = 1:nrow(new_sleep2),
legend.text = TRUE,
args.legend = list(x = "topright",
inset = c(- 0.001, 0)),
xlab="Heart rate(BPM)",
ylab="Numbers",
font.lab=2)
1.1 Overall, the number of people who work out are low compared to other daily habits in the three categories. 1) Sleep quality: There seems that less stress leads to better sleep quality. 2) Total time sleep: It looks trivial, but working out slightly plays a role in longer sleeping time. 3) Heart rate: Significantly, having coffee, tea, stress, and working out on the day of sleep shows more increased heart rate during sleep compared to the heart rate range known to be normal during sleep (40~50).
1.2 daily habits -> sleep onset/offset time?
sleep_data1_2<-sleep_data[ , c(1,2,8:11)]
#time variable resetting
sleep_data1_2$Start<-strptime(sleep_data1_2$Start, "%Y-%m-%d %H:%M:%S", tz = "EST5EDT")
sleep_data1_2$Start<-as.numeric(sleep_data1_2$Start)
sleep_data1_2$End<-strptime(sleep_data1_2$End, "%Y-%m-%d %H:%M:%S", tz = "EST5EDT")
sleep_data1_2$End<-as.numeric(sleep_data1_2$End)
# relationship between sleep onset and features
p1 <- ggplot(sleep_data1_2, aes(x=coffee_state, y=Start)) +
geom_boxplot(fill="slateblue", alpha=0.2) +
xlab("coffee_state") +
ylab("sleep onset")
p2 <- ggplot(sleep_data1_2, aes(x=tea_state, y=Start)) +
geom_boxplot(fill="slateblue", alpha=0.2) +
xlab("tea_state") +
ylab("sleep onset")
p3 <- ggplot(sleep_data1_2, aes(x=working_out_state, y=Start)) +
geom_boxplot(fill="slateblue", alpha=0.2) +
xlab("workingout_state") +
ylab("sleep onset")
p4 <- ggplot(sleep_data1_2, aes(x=stress_state, y=Start)) +
geom_boxplot(fill="slateblue", alpha=0.2) +
xlab("stress_state") +
ylab("sleep onset")
# Display both charts side by side thanks to the patchwork package
p1 + p2 + p3 +p4
# relationship between sleep onset and features
p1 <- ggplot(sleep_data1_2, aes(x=coffee_state, y=End)) +
geom_boxplot(fill="slateblue", alpha=0.2) +
xlab("coffee_state") +
ylab("sleep offset")
p2 <- ggplot(sleep_data1_2, aes(x=tea_state, y=End)) +
geom_boxplot(fill="slateblue", alpha=0.2) +
xlab("tea_state") +
ylab("sleep offset")
p3 <- ggplot(sleep_data1_2, aes(x=working_out_state, y=End)) +
geom_boxplot(fill="slateblue", alpha=0.2) +
xlab("workingout_state") +
ylab("sleep offset")
p4 <- ggplot(sleep_data1_2, aes(x=stress_state, y=End)) +
geom_boxplot(fill="slateblue", alpha=0.2) +
xlab("stress_state") +
ylab("sleep offset")
# Display both charts side by side thanks to the patchwork package
p1 + p2 + p3 +p4
# Grouped
ggplot(sleep_data, aes(x=Start, y=End)) +
geom_point(size=2) +
theme_ipsum()
1.2
1)Except for working out that delay the time to fall asleep and time to wake up, other factors don’t affect sleeping onset/offset time. Considering error bar, the effect of working out on the day to sleep is not significant for sleep onset/offset time.
2)Sleeping onset and offset time are in proportion (based on the scatter plot), meaning that the participants sleep more when they go to the bed late.
1.3 sleep features depending on coffee and/or tea?
ggplot(sleep_data, aes(fill=coffee_state, y=Sleep.quality, x=coffee_state)) +
geom_bar(position="dodge", stat="identity") +
scale_fill_viridis(discrete = T, option = "E") +
ggtitle(" Sleep quality depending on coffee and tea intake") +
facet_wrap(~tea_state) +
#facet_wrap(~coffee_state)+
theme_ipsum() +
theme(legend.position="none") +
xlab("Coffee state") +
ylab("Sleep quality")
ggplot(sleep_data, aes(fill=coffee_state, y=Total.sleep.time, x=coffee_state)) +
geom_bar(position="dodge", stat="identity") +
scale_fill_viridis(discrete = T, option = "E") +
ggtitle(" Total sleep time depending on coffee and tea intake") +
facet_wrap(~tea_state) +
#facet_wrap(~coffee_state)+
theme_ipsum() +
theme(legend.position="none") +
xlab("Coffee state") +
ylab("Total sleep time")
ggplot(sleep_data, aes(fill=coffee_state, y=Heart.rate, x=coffee_state)) +
geom_bar(position="dodge", stat="identity") +
scale_fill_viridis(discrete = T, option = "E") +
ggtitle(" Heart rate depending on coffee and tea intake") +
facet_wrap(~tea_state) +
#facet_wrap(~coffee_state)+
theme_ipsum() +
theme(legend.position="none") +
xlab("Coffee state") +
ylab("Heart rate")
1.3 Since the effects of coffee and tea are similarly observed in each category of each sleep characteristic, further analysis of coffee and tea is made using facet_wrap() function.
Sleep quality: This doesn’t show any meaningful result. Sleep quality seems to be not affected by tea or coffee.
Total sleep time: This shows that participants slept the most when they took no coffee but tea. On the other way, total sleep time was the least when they didn’t take any coffee or tea.
Heart rate: Heart rate during sleep appears to be in far outside the normal range on the day the participants drank coffee regardless of tea intake.
# Check correlations
sleep_data2 <- sleep_data[ , c(3,4,7)]
ggpairs(sleep_data2, title=" 3.Corelationship among sleep features")
#relationship between time in bed(TST) and sleep quality
ggplot(data = sleep_data, mapping = aes(x = Total.sleep.time, y = Sleep.quality)) +
geom_point(mapping = aes(color = Sleep.quality)) +
geom_smooth()
2.1 There is a strong correlation between total sleep time and sleep quality (corr: 0.722) .
3. sleep features and sleep onset/offset time affect mood at awake
3.1 sleep features affect mood at awake?
sleep_data3_1 <-sleep_data[ , c(1,2,3,4,5,7)]
#time variable resetting
sleep_data3_1$Start<-strptime(sleep_data3_1$Start, "%Y-%m-%d %H:%M:%S", tz = "EST5EDT")
sleep_data3_1$Start<-as.numeric(sleep_data3_1$Start)
sleep_data3_1$End<-strptime(sleep_data3_1$End, "%Y-%m-%d %H:%M:%S", tz = "EST5EDT")
sleep_data3_1$End<-as.numeric(sleep_data3_1$End)
# relationship between sleep onset and features
ggplot(sleep_data3_1, aes(x=Mood.at.awake, y=Sleep.quality, fill=Mood.at.awake)) + # fill=name allow to automatically dedicate a color for each group
geom_violin() +
ggtitle("Quality") +
theme_ipsum()
ggplot(sleep_data3_1, aes(x=Mood.at.awake, y=Total.sleep.time, fill=Mood.at.awake)) + # fill=name allow to automatically dedicate a color for each group
geom_violin() +
ggtitle("Total sleep time") +
theme_ipsum()
ggplot(sleep_data3_1, aes(x=Mood.at.awake, y=Heart.rate, fill=Mood.at.awake)) + # fill=name allow to automatically dedicate a color for each group
geom_violin() +
ggtitle("Heart rate") +
theme_ipsum()
# Display both charts side by side thanks to the patchwork package
#p1 + p2 +p3
3.1 No seemingly significant results were shown in the relationship between sleep feautres and mood at awake.
3.2 Sleep onset/offset affects mood at awake?
sleep_data3_2 <- sleep_data[ , c(1,2,5)]
###############################Sleep Start######################################
#mood at awake depending on sleep onset (sleep starts)
sleep_data3_2$Start<-strptime(sleep_data3_2$Start, "%Y-%m-%d %H:%M:%S", tz = "EST5EDT")
sleep_data3_2$Start<-as.numeric(sleep_data3_2$Start)
#Plot
sleep_data3_2 %>%
mutate(text = fct_reorder(Mood.at.awake, Start)) %>%
ggplot( aes(y=Mood.at.awake, x=Start, fill=Mood.at.awake)) +
geom_density_ridges(alpha=0.6, stat="binline", bins=20) +
theme_ridges() +
theme(
legend.position="none",
panel.spacing = unit(0.1, "lines"),
strip.text.x = element_text(size = 8)
) +
xlab("Sleep Start Time") +
ylab("Mood at awake")
###############################Sleep End######################################
#mood at awake depending on sleep offset (sleep ends)
sleep_data3_2$End<-strptime(sleep_data3_2$End, "%Y-%m-%d %H:%M:%S", tz = "EST5EDT")
sleep_data3_2$End<-as.numeric(sleep_data3_2$End)
#Plot
sleep_data3_2 %>%
mutate(text = fct_reorder(Mood.at.awake, End)) %>%
ggplot( aes(y=Mood.at.awake, x=End, fill=Mood.at.awake)) +
geom_density_ridges(alpha=0.6, stat="binline", bins=20) +
theme_ridges() +
theme(
legend.position="none",
panel.spacing = unit(0.1, "lines"),
strip.text.x = element_text(size = 8)
) +
xlab("Sleep End Time") +
ylab("Mood at awake")
3.2 When people feel positive mood (e.g., good or great), it doesn’t depend on the time to fall asleep or wake up. However, early/late time to sleep onset and offset is shown to affect negative mood(e.g., bad). Further, early time to go to the bed and to wake up has the higher number of people to feel bad at awake compared to those who have later sleep onset/offset.
All in all, this project shows how daily habits can affect sleep features. I was able to observe
Total sleep amount is likely to be affected by working-out state on the day to have sleep.
Having coffee, tea and stress is shown to affect increased heart rate during sleep (beyond normal range: 40-50). Working out may affect this as well, but its effect looks trivial.
Total sleep time and sleep quality showed a correlation (r=0.722). Additionally, their relationship showed proportional relationship, indicating that those who sleep early have shorter sleep compared to people who have sleep later.
The most total sleep amount was shown with tea but without coffee. Increased heart rate was observed with coffee intake regardless of having tea.
Mood at awake: bad mood at awake seems to be affected by so early/late time to sleep or wake up.
– What is missing (if anything) in your analysis process so far? X axis of time series should be revised in a better way.
– What conclusions can you make about your research questions at this point? Daily habits affect sleep features (such as effects of stress or coffee in sleep quality, sleep duration and heart rate during sleep).
– What do you think a naive reader would need to fully understand your graphs? – Is there anything you want to answer with your dataset, but can’t? I guess all things were clearly explained above.
Text and figures are licensed under Creative Commons Attribution CC BY-NC 4.0. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".
For attribution, please cite this work as
Noh (2022, May 4). Data Analytics and Computational Social Science: HW 5. Retrieved from https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomenoh897837/
BibTeX citation
@misc{noh2022hw, author = {Noh, Eunsol}, title = {Data Analytics and Computational Social Science: HW 5}, url = {https://github.com/DACSS/dacss_course_website/posts/httpsrpubscomenoh897837/}, year = {2022} }