library(tidyverse)
library(ggplot2)
library(readr)
library(stringr)
library(tm)
library(wordcloud)
options(dplyr.summarise.inform = FALSE)
::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE) knitr
HW3 - Darron Bunt
Loading Packages into R Environment
The emphasis in this homework is on exploratory data analysis using both graphics and statistics. You should build on your prior homework - incorporating any feedback and adjusting the code and text as needed. These homeworks are intended to be cumulative.
Loading the Dataset
#import data
<- read_csv("_data/FlagshipTwitterUpdated.csv") FlagshipTwitter
Descriptive Statistics - What is being produced by each college’s account?
The first question that I want to answer is how frequently flagship college Twitter accounts are posting, and what type of post those posts are. Accordingly, I’m going to explore data related to the overall number of posts, and then break this down further into four post types: original posts, retweets, quote tweets, and replies.
Number of posts made by each college in the month of November
First, I’m going to look at how many posts each college made overall during the month of November.
#remove rows where the Page Type isn't twitter
<- subset(FlagshipTwitter, PageType =='twitter')
FlagshipTwitter2 #rename the columns I intend to use for analysis
<- rename(FlagshipTwitter2, c(Tweet = 'Full Text', MentionedAuthors = 'Mentioned Authors', TWFollowers = 'Twitter Followers', TWReply = 'Twitter Reply Count', TWRetweets = 'Twitter Retweets', TWLikes = 'Twitter Likes', Reach = 'Reach (new)', EngType = 'Engagement Type', URL = Url))
FlagshipTwitter3 #put columns I plan to use first
<- select(FlagshipTwitter3, Author, Date, Impressions, Reach, TWLikes, TWRetweets, TWReply, EngType, Sentiment, Hashtags, MentionedAuthors, Tweet, TWFollowers, URL, everything())
FlagshipTwitterUse #separate dates into respective date and time column
<- separate(FlagshipTwitterUse, Date, into = c("Date", "Time"), sep = " ")
TwitterUse2 $Date <- parse_date(TwitterUse2$Date, format = "%m/%d/%Y")
TwitterUse2$Time <- parse_time(TwitterUse2$Time, format = "%H:%M")
TwitterUse2
#count posts made by each college
<- TwitterUse2 %>%
by_college count(Author) %>%
rename("Total_Posts" = "n") %>%
mutate(Total_PostsPerc = (Total_Posts/sum(Total_Posts)*100)) %>%
arrange(desc(Total_PostsPerc))
by_college
# A tibble: 50 × 3
Author Total_Posts Total_PostsPerc
<chr> <int> <dbl>
1 RutgersU 337 5.96
2 UUtah 311 5.50
3 UMich 281 4.97
4 UofAlabama 242 4.28
5 uhmanoa 238 4.21
6 CUBoulder 220 3.89
7 penn_state 210 3.71
8 UNC 203 3.59
9 UofOklahoma 203 3.59
10 UBuffalo 198 3.50
# … with 40 more rows
What we see right off the bat is that post volume varies significantly depending on the flagship college in question - from 337 (Rutgers) to only two (University of South Dakota).
I’m curious about the summary statistics for post volume overall, as this should provide us with greater insight into how the numbers vary across the 50 colleges.
#summary statistics for Posts by College
summary(by_college)
Author Total_Posts Total_PostsPerc
Length:50 Min. : 2.0 Min. :0.03535
Class :character 1st Qu.: 50.5 1st Qu.:0.89254
Mode :character Median : 93.5 Median :1.65253
Mean :113.2 Mean :2.00000
3rd Qu.:155.2 3rd Qu.:2.74390
Max. :337.0 Max. :5.95617
In addition to the difference between the minimum number of posts (two) and the maximum (337), there is also a relatively large difference in the median (93), the mean (113), and the IQR (105).
This leads me to believe that there different posting strategies are being implemented at these different schools.
Number of OG posts, retweets, quote tweet, and comments by each college
To further contextualize this information, I now want to break down of each account’s post volume by the type of post that they were - either original posts, quote tweets, retweets, or comments/replies. This will help us look into not just who posts the most, but what types of posts they are making (and whether different schools are employing strategies that lean more heavily into particular types of posts).
#number of OG posts, retweets, comments made by each college
<- TwitterUse2 %>%
TwitterUse2 replace_na(list(EngType = "OG"))
<- TwitterUse2 %>%
post_type_by_college group_by(Author, EngType) %>%
summarize(Count=n()) %>%
pivot_wider(names_from = EngType, values_from = Count)
<- merge(by_college, post_type_by_college, by="Author")
post_types_by_college post_types_by_college
Author Total_Posts Total_PostsPerc OG QUOTE REPLY RETWEET
1 CUBoulder 220 3.88829975 63 11 30 116
2 IndianaUniv 121 2.13856486 37 2 5 77
3 LSU 147 2.59809120 58 4 3 82
4 Mizzou 188 3.32272888 90 2 10 86
5 OhioState 69 1.21951220 30 1 NA 38
6 OleMiss 27 0.47720042 18 NA 3 6
7 penn_state 210 3.71155885 74 13 4 119
8 RutgersU 337 5.95616826 39 5 1 292
9 uafairbanks 14 0.24743726 3 NA NA 11
10 uarizona 105 1.85577943 23 1 12 69
11 UArkansas 68 1.20183811 33 4 11 20
12 UBuffalo 198 3.49946978 25 5 11 157
13 UCBerkeley 46 0.81300813 27 NA 7 12
14 UConn 105 1.85577943 83 7 1 14
15 UDelaware 41 0.72463768 9 NA NA 32
16 UF 107 1.89112761 49 4 7 47
17 uhmanoa 238 4.20643337 154 NA NA 84
18 uidaho 59 1.04277130 31 1 22 5
19 uiowa 75 1.32555673 74 NA 1 NA
20 UMaine 37 0.65394132 19 2 4 12
21 UMassAmherst 55 0.97207494 42 10 1 2
22 UMich 281 4.96641923 181 7 2 91
23 UMNews 29 0.51254860 25 1 NA 3
24 umontana 34 0.60091905 16 2 2 14
25 UNC 203 3.58784023 103 12 33 55
26 unevadareno 80 1.41392718 62 7 1 10
27 universityofga 189 3.34040297 75 4 4 106
28 universityofky 50 0.88370449 18 2 16 14
29 universityofri 44 0.77765995 18 4 NA 22
30 UnivOfKansas 13 0.22976317 13 NA NA NA
31 UNLincoln 131 2.31530576 49 NA 5 77
32 UNM 38 0.67161541 23 3 2 10
33 UofAlabama 242 4.27712973 74 1 NA 167
34 UofIllinois 108 1.90880170 61 NA NA 47
35 UofMaryland 60 1.06044539 44 1 1 14
36 UofNH 41 0.72463768 38 NA 1 2
37 UofNorthDakota 128 2.26228349 40 11 8 69
38 UofOklahoma 203 3.58784023 65 8 6 124
39 UofSC 74 1.30788264 25 3 8 38
40 uoregon 136 2.40367621 38 7 18 73
41 usd 2 0.03534818 2 NA NA NA
42 UTAustin 158 2.79250619 32 9 9 108
43 UTKnoxville 91 1.60834217 32 21 8 30
44 UUtah 311 5.49664192 80 13 30 188
45 UVA 139 2.45669848 53 3 9 74
46 uvmvermont 52 0.91905267 18 16 3 15
47 UW 96 1.69671262 47 4 10 35
48 UWMadison 91 1.60834217 25 2 NA 64
49 UWyonews 66 1.16648993 66 NA NA NA
50 WestVirginiaU 101 1.78508307 55 3 8 35
<- post_types_by_college %>%
post_prop_college mutate(OGProp = OG/Total_Posts*100) %>%
mutate(QuoteProp = QUOTE/Total_Posts*100) %>%
mutate(ReplyProp = REPLY/Total_Posts*100) %>%
mutate(RetweetProp = RETWEET/Total_Posts*100) %>%
select(Author, OGProp, QuoteProp, RetweetProp, ReplyProp)
post_prop_college
Author OGProp QuoteProp RetweetProp ReplyProp
1 CUBoulder 28.63636 5.0000000 52.727273 13.6363636
2 IndianaUniv 30.57851 1.6528926 63.636364 4.1322314
3 LSU 39.45578 2.7210884 55.782313 2.0408163
4 Mizzou 47.87234 1.0638298 45.744681 5.3191489
5 OhioState 43.47826 1.4492754 55.072464 NA
6 OleMiss 66.66667 NA 22.222222 11.1111111
7 penn_state 35.23810 6.1904762 56.666667 1.9047619
8 RutgersU 11.57270 1.4836795 86.646884 0.2967359
9 uafairbanks 21.42857 NA 78.571429 NA
10 uarizona 21.90476 0.9523810 65.714286 11.4285714
11 UArkansas 48.52941 5.8823529 29.411765 16.1764706
12 UBuffalo 12.62626 2.5252525 79.292929 5.5555556
13 UCBerkeley 58.69565 NA 26.086957 15.2173913
14 UConn 79.04762 6.6666667 13.333333 0.9523810
15 UDelaware 21.95122 NA 78.048780 NA
16 UF 45.79439 3.7383178 43.925234 6.5420561
17 uhmanoa 64.70588 NA 35.294118 NA
18 uidaho 52.54237 1.6949153 8.474576 37.2881356
19 uiowa 98.66667 NA NA 1.3333333
20 UMaine 51.35135 5.4054054 32.432432 10.8108108
21 UMassAmherst 76.36364 18.1818182 3.636364 1.8181818
22 UMich 64.41281 2.4911032 32.384342 0.7117438
23 UMNews 86.20690 3.4482759 10.344828 NA
24 umontana 47.05882 5.8823529 41.176471 5.8823529
25 UNC 50.73892 5.9113300 27.093596 16.2561576
26 unevadareno 77.50000 8.7500000 12.500000 1.2500000
27 universityofga 39.68254 2.1164021 56.084656 2.1164021
28 universityofky 36.00000 4.0000000 28.000000 32.0000000
29 universityofri 40.90909 9.0909091 50.000000 NA
30 UnivOfKansas 100.00000 NA NA NA
31 UNLincoln 37.40458 NA 58.778626 3.8167939
32 UNM 60.52632 7.8947368 26.315789 5.2631579
33 UofAlabama 30.57851 0.4132231 69.008264 NA
34 UofIllinois 56.48148 NA 43.518519 NA
35 UofMaryland 73.33333 1.6666667 23.333333 1.6666667
36 UofNH 92.68293 NA 4.878049 2.4390244
37 UofNorthDakota 31.25000 8.5937500 53.906250 6.2500000
38 UofOklahoma 32.01970 3.9408867 61.083744 2.9556650
39 UofSC 33.78378 4.0540541 51.351351 10.8108108
40 uoregon 27.94118 5.1470588 53.676471 13.2352941
41 usd 100.00000 NA NA NA
42 UTAustin 20.25316 5.6962025 68.354430 5.6962025
43 UTKnoxville 35.16484 23.0769231 32.967033 8.7912088
44 UUtah 25.72347 4.1800643 60.450161 9.6463023
45 UVA 38.12950 2.1582734 53.237410 6.4748201
46 uvmvermont 34.61538 30.7692308 28.846154 5.7692308
47 UW 48.95833 4.1666667 36.458333 10.4166667
48 UWMadison 27.47253 2.1978022 70.329670 NA
49 UWyonews 100.00000 NA NA NA
50 WestVirginiaU 54.45545 2.9702970 34.653465 7.9207921
I can also pull the summary statistics for each type of post. This will help to further contextualize the data regarding the types of posts made by each college.
#number of OG posts, retweets, comments made by each college
<- TwitterUse2 %>%
post_type_by_college replace_na(list(EngType = "OG")) %>%
group_by(Author, EngType) %>%
summarize(Count=n()) %>%
pivot_wider(names_from = EngType, values_from = Count)
<- merge(by_college, post_type_by_college, by="Author") post_types_by_college
#summary statistics for post type by college
summary(post_types_by_college)
Author Total_Posts Total_PostsPerc OG
Length:50 Min. : 2.0 Min. :0.03535 Min. : 2.00
Class :character 1st Qu.: 50.5 1st Qu.:0.89254 1st Qu.: 25.00
Mode :character Median : 93.5 Median :1.65253 Median : 38.50
Mean :113.2 Mean :2.00000 Mean : 47.18
3rd Qu.:155.2 3rd Qu.:2.74390 3rd Qu.: 62.75
Max. :337.0 Max. :5.95617 Max. :181.00
QUOTE REPLY RETWEET
Min. : 1.000 Min. : 1.000 Min. : 2.00
1st Qu.: 2.000 1st Qu.: 2.250 1st Qu.: 14.00
Median : 4.000 Median : 6.500 Median : 42.50
Mean : 5.684 Mean : 8.342 Mean : 60.13
3rd Qu.: 7.750 3rd Qu.:10.000 3rd Qu.: 83.50
Max. :21.000 Max. :33.000 Max. :292.00
NA's :12 NA's :12 NA's :4
Interestingly, retweets are the most common type of post, averaging 60 per author. The IQR for retweets (69) provides an idea of the level of variability in the post volume of this type by college. The number of original posts by college is more consistent, with a mean of 47 and an IQR of 37. Quote tweets are the least utilized type of posts (5.6), while replies are also relatively infrequent (8.3)
Most common day of the week to post
I want to do two things at this step: ensure that I have the day of the week each post was made as a column of data, and then to use that data to break down the number of posts that were made on each day of the week.
#number of posts made on each day of the week
$Weekday <- weekdays(TwitterUse2$Date)
TwitterUse2
<- TwitterUse2 %>%
day_of_week count(Weekday, sort = TRUE,
%>%
) rename("WD_Posts" = "n")
day_of_week
# A tibble: 7 × 2
Weekday WD_Posts
<chr> <int>
1 Tuesday 1167
2 Wednesday 1043
3 Friday 875
4 Monday 838
5 Thursday 746
6 Saturday 557
7 Sunday 432
The most common day of the week to post is Tuesday, with Wednesday close behind. Posts on Fridays and Mondays were next most common, with Thursdays were not far behind. Weekends had the fewest posts.
Most common time of day to post
The time of day that posts are made is also a variable worth considering. I don’t, however, want to look at post time down to the second; instead, I want to break the day up into four equal time periods: overnight (00:00:00 to 05:59:59), morning (06:00:00 to 11:59:59), afternoon (12:00:00 to 17:59:59), and evening (18:00:00 to 23:59:59).
#assign a time period label to each post based on when it was made
<- TwitterUse2 %>%
time_of_day mutate(TimePeriod = format(Time, format="%H:%M:%S")) %>%
mutate(TimePeriod = replace(TimePeriod, TimePeriod >= "00:00:00" & TimePeriod < "05:59:59", "overnight")) %>%
mutate(TimePeriod = replace(TimePeriod, TimePeriod >= "06:00:00" & TimePeriod < "11:59:59", "morning")) %>%
mutate(TimePeriod = replace(TimePeriod, TimePeriod >= "12:00:00" & TimePeriod < "17:59:59", "afternoon")) %>%
mutate(TimePeriod = replace(TimePeriod, TimePeriod >= "18:00:00" & TimePeriod < "23:59:59", "evening")) %>%
select(Author, Date, TimePeriod, Tweet)
time_of_day
# A tibble: 5,658 × 4
Author Date TimePeriod Tweet
<chr> <date> <chr> <chr>
1 UofAlabama 2022-11-30 evening "Juggling, snakes and cooking - oh my! ICY…
2 uhmanoa 2022-11-30 evening "RT @ManoaGrad Summer and Fall 2022 candid…
3 CUBoulder 2022-11-30 evening "RT @CU_PTS We are thrilled to welcome two…
4 UNC 2022-11-30 evening "RT @ChapelHillFD #CHtraffic Alert: Countr…
5 unevadareno 2022-11-30 evening "Save the date! @unevadareno will hold the…
6 uhmanoa 2022-11-30 evening "$3.4 million to grow Native Hawaiian phys…
7 uarizona 2022-11-30 evening "University of Arizona researchers are tea…
8 UArkansas 2022-11-30 evening "RT @ArkansasPBS LIVESTREAMING: Join @Hill…
9 UArkansas 2022-11-30 evening "@prehistormic Your No. 1 song of the year…
10 UUtah 2022-11-30 evening "RT @UofUIT If you haven’t already, check …
# … with 5,648 more rows
Can I pivot this so that I have a count of how many times each account posted in each time frame?
<- time_of_day %>%
time_of_day_by_college group_by(Author, TimePeriod) %>%
summarize(Count=n()) %>%
pivot_wider(names_from = TimePeriod, values_from = Count)
is.na(time_of_day_by_college)] <- 0
time_of_day_by_college[ time_of_day_by_college
# A tibble: 50 × 5
# Groups: Author [50]
Author afternoon evening morning overnight
<chr> <int> <int> <int> <int>
1 CUBoulder 86 114 2 18
2 IndianaUniv 53 62 0 6
3 LSU 80 58 1 8
4 Mizzou 100 75 4 9
5 OhioState 35 31 0 3
6 OleMiss 16 11 0 0
7 penn_state 119 77 4 10
8 RutgersU 140 145 9 43
9 uafairbanks 2 12 0 0
10 uarizona 48 57 0 0
# … with 40 more rows
Number of posts during each time frame
With this information, we can now do a broad summary of posts made during each time frame by all of the colleges in the dataset.
#number of posts made during each time period
<- time_of_day %>%
time_of_day_count count(TimePeriod, sort = TRUE,
%>%
) rename("TimeOfDay" = "n") %>%
mutate(TimeOfDayPerc = (TimeOfDay/sum(TimeOfDay)*100))
time_of_day_count
# A tibble: 4 × 3
TimePeriod TimeOfDay TimeOfDayPerc
<chr> <int> <dbl>
1 evening 2604 46.0
2 afternoon 2287 40.4
3 overnight 676 11.9
4 morning 91 1.61
A relatively equal proportion of posts are made during the evening (46%) and afternoon (40%), with a small portion of posts made overnight (12%).
I am somewhat surprised by the very small proportion of morning posts, which is making me believe that the timestamps in the dataset (i) do not reflect local time when the post was made; (ii) were also likely given in UTC and not in one of the more common time zones in the US. Given this, I will likely not dive too deeply into any exploration of the impact that time of day has on engagement metrics.
Descriptive statistics for the response to posts made by the college accounts
Making posts is certainly an important part of having a social media presence, but how your audience responds to those posts is crucial data that helps to guide social media strategy. Shouting into a vacuum is both inefficient and poor strategy; if no one is consuming and engaging with your content, why invest in creating it to begin with?
While there are several metrics that relate to post engagement – including likes, retweets, and replies by one’s Twitter audience – the metric I want to focus on for this analysis is reach. Reach is an estimate of the number of people that have actually seen/read a given post, and the reach listed in this dataset has been calculated using Brandwatch’s proprietary algorithm.
Average Reach by School
These are the high level statistics for key metrics overall across the entire college dataset.
summary(TwitterUse2[c("Impressions", "Reach", "TWLikes", "TWRetweets", "TWReply")])
Impressions Reach TWLikes TWRetweets
Min. : 4670 Min. : 2882 Min. : 0.00 Min. : 0.000
1st Qu.: 89993 1st Qu.: 15477 1st Qu.: 0.00 1st Qu.: 0.000
Median : 141784 Median : 19519 Median : 0.00 Median : 0.000
Mean : 155459 Mean : 21024 Mean : 34.39 Mean : 5.301
3rd Qu.: 193824 3rd Qu.: 23174 3rd Qu.: 10.75 3rd Qu.: 2.000
Max. :4047025 Max. :1573085 Max. :7244.00 Max. :2259.000
TWReply
Min. : 0.000
1st Qu.: 0.000
Median : 0.000
Mean : 1.136
3rd Qu.: 0.000
Max. :986.000
Given the large difference between the the mean reach (21,024) and the max/min (1,573,085 and 2,882 respectively) this would suggest that there are outliers within the dataset that potentially skew overall results. This trend is similarly seen in the other four metrics as well.
While there is a high degree of variability between the minimum and maximum for each major metric, the IQR is more consistent.
Notably, reach seems to be far more consistent across the flagship college Twitter posts when compared to impressions.
Impressions: Median = 141,784, IQR = 103,831 Reach: Median = 19,519, IQR = 7,697
This gives a quartile based coefficient of variation of 0.73 for impressions, but only 0.39 for reach.
This would seem to suggest that while some accounts have a greater opportunity for their posts to be seen (ie. impressions), that does not necessarily translate into more people actually reading any given post (ie. reach).
Summary statistics by college
Next, I want to find the high level summary statistics (max, min, mean, median) for the posts that were made by each college.
#Engagement summary statistics by college
<- TwitterUse2 %>%
eng_met_by_college group_by(Author) %>%
summarize(
Impressions_Max = max(Impressions, na.rm = TRUE),
Impressions_Min = min(Impressions, na.rm = TRUE),
Impressions_Median = median(Impressions, na.rm = TRUE),
Impressions_Mean = mean(Impressions,na.rm = TRUE),
Reach_Max = max(Reach, na.rm = TRUE),
Reach_Min = min(Reach, na.rm = TRUE),
Reach_Median = median(Reach, na.rm = TRUE),
Reach_Mean = mean(Reach,na.rm = TRUE),
TWLikes_Max = max(TWLikes, na.rm = TRUE),
TWLikes_Min = min(TWLikes, na.rm = TRUE),
TWLikes_Median = median(TWLikes, na.rm = TRUE),
TWLikes_Mean = mean(TWLikes,na.rm = TRUE),
TWRT_Max = max(TWRetweets, na.rm = TRUE),
TWRT_Min = min(TWRetweets, na.rm = TRUE),
TWRT_Median = median(TWRetweets, na.rm = TRUE),
TWRT_Mean = mean(TWRetweets,na.rm = TRUE),
TWReply_Max = max(TWReply, na.rm = TRUE),
TWReply_Min = min(TWReply, na.rm = TRUE),
TWReply_Median = median(TWReply, na.rm = TRUE),
TWReply_Mean = mean(TWReply,na.rm = TRUE))
eng_met_by_college
# A tibble: 50 × 21
Author Impress…¹ Impre…² Impre…³ Impre…⁴ Reach…⁵ Reach…⁶ Reach…⁷ Reach…⁸
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 CUBoulder 194433 89977 90304 94814. 33342 15450 15477 16634.
2 IndianaUniv 417922 87263 87438 109424. 26413 15236 15250 16215.
3 LSU 4047025 359995 362754 425548. 612271 28147 28238 35589.
4 Mizzou 577996 137774 138206. 151897. 91012 18681 18709 21230.
5 OhioState 1380976 381184 381417 463142. 102221 28824 28831 35813.
6 OleMiss 286030 111851 114761 129935. 33628 17036 19983 21809.
7 penn_state 292698 226550 226899 229665. 31571 23161 23179 24187.
8 RutgersU 290287 141602 141797 144131. 49452 18907 18919 19443.
9 uafairbanks 8989 8927 8960 8965 6002 4591 4602 4704.
10 uarizona 288508 168142 168483 176141. 36915 20377 20402 21612.
# … with 40 more rows, 12 more variables: TWLikes_Max <dbl>, TWLikes_Min <dbl>,
# TWLikes_Median <dbl>, TWLikes_Mean <dbl>, TWRT_Max <dbl>, TWRT_Min <dbl>,
# TWRT_Median <dbl>, TWRT_Mean <dbl>, TWReply_Max <dbl>, TWReply_Min <dbl>,
# TWReply_Median <dbl>, TWReply_Mean <dbl>, and abbreviated variable names
# ¹Impressions_Max, ²Impressions_Min, ³Impressions_Median, ⁴Impressions_Mean,
# ⁵Reach_Max, ⁶Reach_Min, ⁷Reach_Median, ⁸Reach_Mean
Relationship between reach and number of followers
The key metric that I’m most interested in is reach, as it is more of a reflection of how many people actually saw a post (as opposed to impressions, which shows the theoretical potential number of people who could have seen a post).
What I’d like to layer onto reach is the number of followers that each account has in order to examine the relationship between reach and number of followers.
The number of followers any Twitter account has can and does vary day-by-day. For simplicity’s sake, I am going to use the follower count for the day in November when they had the greatest number of followers to do my subsequent analysis.
#Number of followers for each college
<- TwitterUse2 %>%
followers_by_college group_by(Author) %>%
summarize(
Followers = max(TWFollowers, na.rm = TRUE))
followers_by_college
# A tibble: 50 × 2
Author Followers
<chr> <dbl>
1 CUBoulder 90492
2 IndianaUniv 87544
3 LSU 363002
4 Mizzou 138271
5 OhioState 381557
6 OleMiss 112905
7 penn_state 227153
8 RutgersU 142012
9 uafairbanks 8989
10 uarizona 168691
# … with 40 more rows
I can now also pull the summary statistics comparing the number of followers at each flagship institution.
#summary statistics for followers by college
summary(followers_by_college)
Author Followers
Length:50 Min. : 4685
Class :character 1st Qu.: 38340
Mode :character Median :110340
Mean :120452
3rd Qu.:175626
Max. :381557
These statistics support the assertion that the number of followers that each account has differs greatly - there is a difference of 376,872 followers between the smallest account (University of Wyoming) and the largest (Ohio State). At 137,286, the IQR is also reflective of a great degree of variability in followers by account.
#pull only data for mean on each key metric for each college
<- select(eng_met_by_college,
mean_data
Author, Impressions_Mean, Reach_Mean, TWLikes_Mean,TWRT_Mean, TWReply_Mean)
#merge followers_by_college and mean_data
<- merge(followers_by_college, mean_data, by="Author")
merged_college
#add posts by college as a data point to what I created above
<- merge(by_college, merged_college, by="Author") %>%
merged_college2 select(Author, Total_Posts, Followers, Impressions_Mean, Reach_Mean, TWLikes_Mean, TWRT_Mean, TWReply_Mean)
merged_college2
Author Total_Posts Followers Impressions_Mean Reach_Mean
1 CUBoulder 220 90492 94814.136 16634.355
2 IndianaUniv 121 87544 109423.727 16215.041
3 LSU 147 363002 425548.429 35588.626
4 Mizzou 188 138271 151896.798 21230.415
5 OhioState 69 381557 463142.203 35812.884
6 OleMiss 27 112905 129935.444 21808.815
7 penn_state 210 227153 229664.771 24186.681
8 RutgersU 337 142012 144131.291 19443.335
9 uafairbanks 14 8989 8965.000 4703.857
10 uarizona 105 168691 176141.267 21611.886
11 UArkansas 68 72687 88440.971 17313.471
12 UBuffalo 198 17009 18247.980 7169.449
13 UCBerkeley 46 234307 249732.174 28040.761
14 UConn 105 107774 120437.343 18834.410
15 UDelaware 41 61415 63680.683 13675.561
16 UF 107 221502 298172.084 40745.673
17 uhmanoa 238 33998 36382.466 10435.895
18 uidaho 59 18157 46936.153 18987.627
19 uiowa 75 132458 142429.987 23278.413
20 UMaine 37 12383 13145.486 6463.865
21 UMassAmherst 55 58311 62245.582 14565.055
22 UMich 281 271119 289980.399 28418.367
23 UMNews 29 116540 132893.828 22054.724
24 umontana 34 15601 30274.647 8382.118
25 UNC 203 143929 148978.256 20448.064
26 unevadareno 80 31603 31857.537 9824.462
27 universityofga 189 137413 151401.614 21126.243
28 universityofky 50 117599 247562.820 86806.720
29 universityofri 44 33890 37335.477 10604.409
30 UnivOfKansas 13 88211 104688.846 19714.615
31 UNLincoln 131 50801 61169.656 13142.099
32 UNM 38 82113 86093.816 16837.342
33 UofAlabama 242 189192 191945.236 22342.430
34 UofIllinois 108 100611 113251.917 18570.306
35 UofMaryland 60 105226 125443.450 19486.200
36 UofNH 41 30984 33012.244 10663.707
37 UofNorthDakota 128 24919 25550.398 8718.867
38 UofOklahoma 203 179627 186626.680 22140.616
39 UofSC 74 210961 251021.068 38443.068
40 uoregon 136 152594 155687.662 21071.382
41 usd 2 13706 16229.500 6765.500
42 UTAustin 158 240438 254754.222 25476.082
43 UTKnoxville 91 155131 176762.396 32204.901
44 UUtah 311 126528 132345.994 19486.926
45 UVA 139 104374 195146.353 25680.345
46 uvmvermont 52 34186 36391.019 10890.558
47 UW 96 177938 188240.583 23090.594
48 UWMadison 91 194519 205753.516 22952.275
49 UWyonews 66 4685 4703.924 3073.136
50 WestVirginiaU 101 197557 224080.109 26044.950
TWLikes_Mean TWRT_Mean TWReply_Mean
1 4.91363636 1.4545455 0.26818182
2 2.90909091 0.7355372 0.10743802
3 79.63265306 21.7142857 0.99319728
4 34.30851064 5.3670213 0.46808511
5 108.89855072 12.0144928 2.04347826
6 82.77777778 9.9629630 1.00000000
7 5.36190476 0.9666667 0.25238095
8 2.27299703 0.6617211 0.13946588
9 0.07142857 0.0000000 0.07142857
10 6.49523810 1.7619048 0.21904762
11 41.67647059 6.8235294 0.85294118
12 3.06565657 0.6868687 0.05555556
13 21.80434783 5.1956522 1.63043478
14 21.43809524 2.8761905 0.31428571
15 8.02439024 1.1219512 0.14634146
16 80.79439252 11.8504673 9.82242991
17 3.81092437 0.9033613 0.07563025
18 64.86440678 16.2542373 5.40677966
19 141.84000000 10.4800000 0.86666667
20 3.29729730 0.8378378 0.24324324
21 9.25454545 2.0545455 0.43636364
22 35.12811388 4.9964413 0.83629893
23 22.72413793 6.5862069 1.20689655
24 23.17647059 3.3529412 0.29411765
25 8.80788177 1.7635468 0.26600985
26 1.57500000 0.4250000 0.12500000
27 41.17989418 4.8994709 0.55555556
28 314.50000000 34.0200000 43.04000000
29 6.52272727 1.1136364 0.15909091
30 70.53846154 5.7692308 1.15384615
31 6.81679389 1.3511450 0.10687023
32 10.10526316 1.7894737 0.68421053
33 12.21487603 1.5454545 0.16528926
34 20.34259259 2.9814815 0.38888889
35 15.98333333 4.6000000 0.35000000
36 6.75609756 1.7560976 0.24390244
37 2.96093750 0.4531250 0.07031250
38 14.91133005 1.7241379 0.22167488
39 277.44594595 59.1081081 2.17567568
40 17.68382353 1.8382353 0.48529412
41 8.50000000 0.5000000 0.00000000
42 17.68987342 2.7721519 0.34177215
43 215.50549451 22.6263736 4.97802198
44 17.24115756 1.7845659 0.45337621
45 161.27338129 26.0215827 1.92086331
46 9.96153846 1.3846154 0.26923077
47 19.38541667 2.9895833 0.50000000
48 6.90109890 2.1098901 0.09890110
49 0.34848485 0.1060606 0.09090909
50 57.63366337 7.0990099 1.09900990
Compare followers and average reach by college
#divide mean key metrics by number of followers
$ImpPerFollower <- merged_college2[,4]/merged_college2[,3]
merged_college2$ReachPerFollower <- merged_college2[,5]/merged_college2[,3]
merged_college2 merged_college2
Author Total_Posts Followers Impressions_Mean Reach_Mean
1 CUBoulder 220 90492 94814.136 16634.355
2 IndianaUniv 121 87544 109423.727 16215.041
3 LSU 147 363002 425548.429 35588.626
4 Mizzou 188 138271 151896.798 21230.415
5 OhioState 69 381557 463142.203 35812.884
6 OleMiss 27 112905 129935.444 21808.815
7 penn_state 210 227153 229664.771 24186.681
8 RutgersU 337 142012 144131.291 19443.335
9 uafairbanks 14 8989 8965.000 4703.857
10 uarizona 105 168691 176141.267 21611.886
11 UArkansas 68 72687 88440.971 17313.471
12 UBuffalo 198 17009 18247.980 7169.449
13 UCBerkeley 46 234307 249732.174 28040.761
14 UConn 105 107774 120437.343 18834.410
15 UDelaware 41 61415 63680.683 13675.561
16 UF 107 221502 298172.084 40745.673
17 uhmanoa 238 33998 36382.466 10435.895
18 uidaho 59 18157 46936.153 18987.627
19 uiowa 75 132458 142429.987 23278.413
20 UMaine 37 12383 13145.486 6463.865
21 UMassAmherst 55 58311 62245.582 14565.055
22 UMich 281 271119 289980.399 28418.367
23 UMNews 29 116540 132893.828 22054.724
24 umontana 34 15601 30274.647 8382.118
25 UNC 203 143929 148978.256 20448.064
26 unevadareno 80 31603 31857.537 9824.462
27 universityofga 189 137413 151401.614 21126.243
28 universityofky 50 117599 247562.820 86806.720
29 universityofri 44 33890 37335.477 10604.409
30 UnivOfKansas 13 88211 104688.846 19714.615
31 UNLincoln 131 50801 61169.656 13142.099
32 UNM 38 82113 86093.816 16837.342
33 UofAlabama 242 189192 191945.236 22342.430
34 UofIllinois 108 100611 113251.917 18570.306
35 UofMaryland 60 105226 125443.450 19486.200
36 UofNH 41 30984 33012.244 10663.707
37 UofNorthDakota 128 24919 25550.398 8718.867
38 UofOklahoma 203 179627 186626.680 22140.616
39 UofSC 74 210961 251021.068 38443.068
40 uoregon 136 152594 155687.662 21071.382
41 usd 2 13706 16229.500 6765.500
42 UTAustin 158 240438 254754.222 25476.082
43 UTKnoxville 91 155131 176762.396 32204.901
44 UUtah 311 126528 132345.994 19486.926
45 UVA 139 104374 195146.353 25680.345
46 uvmvermont 52 34186 36391.019 10890.558
47 UW 96 177938 188240.583 23090.594
48 UWMadison 91 194519 205753.516 22952.275
49 UWyonews 66 4685 4703.924 3073.136
50 WestVirginiaU 101 197557 224080.109 26044.950
TWLikes_Mean TWRT_Mean TWReply_Mean ImpPerFollower ReachPerFollower
1 4.91363636 1.4545455 0.26818182 1.0477626 0.18382127
2 2.90909091 0.7355372 0.10743802 1.2499283 0.18522162
3 79.63265306 21.7142857 0.99319728 1.1723033 0.09803975
4 34.30851064 5.3670213 0.46808511 1.0985441 0.15354207
5 108.89855072 12.0144928 2.04347826 1.2138218 0.09385985
6 82.77777778 9.9629630 1.00000000 1.1508387 0.19316075
7 5.36190476 0.9666667 0.25238095 1.0110576 0.10647749
8 2.27299703 0.6617211 0.13946588 1.0149233 0.13691333
9 0.07142857 0.0000000 0.07142857 0.9973301 0.52329037
10 6.49523810 1.7619048 0.21904762 1.0441652 0.12811523
11 41.67647059 6.8235294 0.85294118 1.2167371 0.23819212
12 3.06565657 0.6868687 0.05555556 1.0728426 0.42150917
13 21.80434783 5.1956522 1.63043478 1.0658332 0.11967530
14 21.43809524 2.8761905 0.31428571 1.1174991 0.17475838
15 8.02439024 1.1219512 0.14634146 1.0368914 0.22267461
16 80.79439252 11.8504673 9.82242991 1.3461372 0.18395172
17 3.81092437 0.9033613 0.07563025 1.0701355 0.30695614
18 64.86440678 16.2542373 5.40677966 2.5850169 1.04574694
19 141.84000000 10.4800000 0.86666667 1.0752841 0.17574185
20 3.29729730 0.8378378 0.24324324 1.0615753 0.52199506
21 9.25454545 2.0545455 0.43636364 1.0674758 0.24978228
22 35.12811388 4.9964413 0.83629893 1.0695687 0.10481879
23 22.72413793 6.5862069 1.20689655 1.1403280 0.18924596
24 23.17647059 3.3529412 0.29411765 1.9405581 0.53728079
25 8.80788177 1.7635468 0.26600985 1.0350816 0.14207049
26 1.57500000 0.4250000 0.12500000 1.0080542 0.31087120
27 41.17989418 4.8994709 0.55555556 1.1017998 0.15374268
28 314.50000000 34.0200000 43.04000000 2.1051439 0.73815866
29 6.52272727 1.1136364 0.15909091 1.1016665 0.31290673
30 70.53846154 5.7692308 1.15384615 1.1868004 0.22349384
31 6.81679389 1.3511450 0.10687023 1.2041034 0.25869765
32 10.10526316 1.7894737 0.68421053 1.0484797 0.20505087
33 12.21487603 1.5454545 0.16528926 1.0145526 0.11809395
34 20.34259259 2.9814815 0.38888889 1.1256415 0.18457530
35 15.98333333 4.6000000 0.35000000 1.1921336 0.18518427
36 6.75609756 1.7560976 0.24390244 1.0654610 0.34416819
37 2.96093750 0.4531250 0.07031250 1.0253380 0.34988833
38 14.91133005 1.7241379 0.22167488 1.0389679 0.12325884
39 277.44594595 59.1081081 2.17567568 1.1898932 0.18222832
40 17.68382353 1.8382353 0.48529412 1.0202738 0.13808788
41 8.50000000 0.5000000 0.00000000 1.1841164 0.49361593
42 17.68987342 2.7721519 0.34177215 1.0595423 0.10595697
43 215.50549451 22.6263736 4.97802198 1.1394395 0.20759810
44 17.24115756 1.7845659 0.45337621 1.0459819 0.15401276
45 161.27338129 26.0215827 1.92086331 1.8696836 0.24604159
46 9.96153846 1.3846154 0.26923077 1.0645007 0.31856777
47 19.38541667 2.9895833 0.50000000 1.0578998 0.12976764
48 6.90109890 2.1098901 0.09890110 1.0577554 0.11799503
49 0.34848485 0.1060606 0.09090909 1.0040393 0.65595227
50 57.63366337 7.0990099 1.09900990 1.1342555 0.13183512
Adding in enrollment data
The Carnegie Foundation for the Advancement of Teaching and the American Council on Education collaborate to provide the Carnegie Classifications. These classifications provide data related to every institution of higher education in the US. Read in data from Carnegie.
#import Author > School Name data
<- read_csv("_data/Author_SchoolName.csv")
TWAuthor2School #import enrollment data from CCIHE
<- read_csv("_data/CCIHE2021PublicData.csv")
EnrollmentData
#combine author > school and merged_college2
<- merged_college2 %>%
twitter_enrollment left_join(TWAuthor2School, by = "Author") %>%
left_join(EnrollmentData, by = "SchoolName") %>%
select(Author, SchoolName, F20Enrollment, Followers, Total_Posts, Reach_Mean, SizeSetting)
$SizeSetting <- str_replace_all(twitter_enrollment$SizeSetting, c("Four-year, large, primarily residential" ="LargePriRez", "Four-year, large, highly residential" = "LargeHighRez", "Four-year, large, primarily nonresidential" = "LargeNonRez", "Four-year, medium, primarily nonresidential" = "MedNonRez", "Four-year, medium, primarily nonresidential" = "MedPriRez", "Four-year, small, highly residential" = "SmallHighRez", "Four-year, medium, primarily residential" = "MedPriRez"))
twitter_enrollment twitter_enrollment
Author
1 CUBoulder
2 IndianaUniv
3 LSU
4 Mizzou
5 OhioState
6 OleMiss
7 penn_state
8 RutgersU
9 uafairbanks
10 uarizona
11 UArkansas
12 UBuffalo
13 UCBerkeley
14 UConn
15 UDelaware
16 UF
17 uhmanoa
18 uidaho
19 uiowa
20 UMaine
21 UMassAmherst
22 UMich
23 UMNews
24 umontana
25 UNC
26 unevadareno
27 universityofga
28 universityofky
29 universityofri
30 UnivOfKansas
31 UNLincoln
32 UNM
33 UofAlabama
34 UofIllinois
35 UofMaryland
36 UofNH
37 UofNorthDakota
38 UofOklahoma
39 UofSC
40 uoregon
41 usd
42 UTAustin
43 UTKnoxville
44 UUtah
45 UVA
46 uvmvermont
47 UW
48 UWMadison
49 UWyonews
50 WestVirginiaU
SchoolName
1 University of Colorado Boulder
2 Indiana University-Bloomington
3 Louisiana State University and Agricultural & Mechanical College
4 University of Missouri-Columbia
5 Ohio State University-Main Campus
6 University of Mississippi
7 The Pennsylvania State University
8 Rutgers University-New Brunswick
9 University of Alaska Fairbanks
10 University of Arizona Global Campus
11 University of Arkansas at Little Rock
12 University at Buffalo
13 University of California-Berkeley
14 University of Connecticut
15 University of Delaware
16 University of Florida
17 University of Hawaii at Manoa
18 University of Idaho
19 University of Iowa
20 University of Maine
21 University of Massachusetts-Amherst
22 University of Michigan-Ann Arbor
23 University of Minnesota-Twin Cities
24 The University of Montana
25 University of North Carolina at Chapel Hill
26 University of Nevada-Reno
27 University of Georgia
28 University of Kentucky
29 University of Rhode Island
30 University of Kansas
31 University of Nebraska-Lincoln
32 University of New Mexico-Main Campus
33 The University of Alabama
34 University of Illinois Urbana-Champaign
35 University of Maryland Global Campus
36 University of New Hampshire-Main Campus
37 University of North Dakota
38 University of Oklahoma-Norman Campus
39 University of South Carolina-Columbia
40 University of Oregon
41 University of South Dakota
42 The University of Texas at Austin
43 The University of Tennessee-Knoxville
44 University of Utah
45 University of Virginia-Main Campus
46 University of Vermont
47 University of Washington-Seattle Campus
48 University of Wisconsin-Madison
49 University of Wyoming
50 West Virginia University
F20Enrollment Followers Total_Posts Reach_Mean SizeSetting
1 37437 90492 220 16634.355 LargePriRez
2 43064 87544 121 16215.041 LargePriRez
3 34285 363002 147 35588.626 LargePriRez
4 31089 138271 188 21230.415 LargePriRez
5 61369 381557 69 35812.884 LargePriRez
6 21014 112905 27 21808.815 LargePriRez
7 89816 227153 210 24186.681 LargePriRez
8 50411 142012 337 19443.335 LargePriRez
9 6813 8989 14 4703.857 MedNonRez
10 31115 168691 105 21611.886 LargeNonRez
11 8899 72687 68 17313.471 MedNonRez
12 32347 17009 198 7169.449 LargePriRez
13 42327 234307 46 28040.761 LargePriRez
14 27215 107774 105 18834.410 LargeHighRez
15 23613 61415 41 13675.561 LargePriRez
16 53372 221502 107 40745.673 LargePriRez
17 18025 33998 238 10435.895 LargePriRez
18 10791 18157 59 18987.627 MedPriRez
19 30318 132458 75 23278.413 LargePriRez
20 11741 12383 37 6463.865 MedPriRez
21 31642 58311 55 14565.055 LargeHighRez
22 47907 271119 281 28418.367 LargePriRez
23 52017 116540 29 22054.724 LargeNonRez
24 9808 15601 34 8382.118 MedPriRez
25 30092 143929 203 20448.064 LargeHighRez
26 20722 31603 80 9824.462 LargeNonRez
27 39147 137413 189 21126.243 LargePriRez
28 29986 117599 50 86806.720 LargePriRez
29 17649 33890 44 10604.409 LargePriRez
30 26744 88211 13 19714.615 LargePriRez
31 25108 50801 131 13142.099 LargePriRez
32 22311 82113 38 16837.342 LargeNonRez
33 37840 189192 242 22342.430 LargePriRez
34 52679 100611 108 18570.306 LargePriRez
35 58526 105226 60 19486.200 LargeNonRez
36 14348 30984 41 10663.707 LargeHighRez
37 13615 24919 128 8718.867 LargePriRez
38 27772 179627 203 22140.616 LargePriRez
39 35470 210961 74 38443.068 LargePriRez
40 21752 152594 136 21071.382 LargePriRez
41 9459 13706 2 6765.500 MedPriRez
42 50476 240438 158 25476.082 LargeNonRez
43 30559 155131 91 32204.901 LargePriRez
44 33081 126528 311 19486.926 LargePriRez
45 25628 104374 139 25680.345 LargePriRez
46 13292 34186 52 10890.558 LargeHighRez
47 48149 177938 96 23090.594 LargePriRez
48 44640 194519 91 22952.275 LargePriRez
49 11829 4685 66 3073.136 LargePriRez
50 26269 197557 101 26044.950 LargePriRez
Comparing followers to enrollment
$TWFolEnr <- twitter_enrollment$Followers/twitter_enrollment$F20Enrollment
twitter_enrollment$TWReachEnr <- twitter_enrollment$Reach/twitter_enrollment$F20Enrollment
twitter_enrollment
<- twitter_enrollment %>%
twitter_enrollment_prop select(SchoolName, TWFolEnr, TWReachEnr)
twitter_enrollment_prop
SchoolName TWFolEnr
1 University of Colorado Boulder 2.4171809
2 Indiana University-Bloomington 2.0328813
3 Louisiana State University and Agricultural & Mechanical College 10.5877789
4 University of Missouri-Columbia 4.4475860
5 Ohio State University-Main Campus 6.2174225
6 University of Mississippi 5.3728467
7 The Pennsylvania State University 2.5290928
8 Rutgers University-New Brunswick 2.8170836
9 University of Alaska Fairbanks 1.3193894
10 University of Arizona Global Campus 5.4215330
11 University of Arkansas at Little Rock 8.1679964
12 University at Buffalo 0.5258293
13 University of California-Berkeley 5.5356392
14 University of Connecticut 3.9600955
15 University of Delaware 2.6008978
16 University of Florida 4.1501536
17 University of Hawaii at Manoa 1.8861581
18 University of Idaho 1.6826059
19 University of Iowa 4.3689557
20 University of Maine 1.0546802
21 University of Massachusetts-Amherst 1.8428355
22 University of Michigan-Ann Arbor 5.6592773
23 University of Minnesota-Twin Cities 2.2404214
24 The University of Montana 1.5906403
25 University of North Carolina at Chapel Hill 4.7829656
26 University of Nevada-Reno 1.5250941
27 University of Georgia 3.5101796
28 University of Kentucky 3.9217968
29 University of Rhode Island 1.9202221
30 University of Kansas 3.2983473
31 University of Nebraska-Lincoln 2.0232993
32 University of New Mexico-Main Campus 3.6803819
33 The University of Alabama 4.9997886
34 University of Illinois Urbana-Champaign 1.9098882
35 University of Maryland Global Campus 1.7979360
36 University of New Hampshire-Main Campus 2.1594647
37 University of North Dakota 1.8302607
38 University of Oklahoma-Norman Campus 6.4679173
39 University of South Carolina-Columbia 5.9475895
40 University of Oregon 7.0151710
41 University of South Dakota 1.4489904
42 The University of Texas at Austin 4.7634123
43 The University of Tennessee-Knoxville 5.0764423
44 University of Utah 3.8247937
45 University of Virginia-Main Campus 4.0726549
46 University of Vermont 2.5719230
47 University of Washington-Seattle Campus 3.6955700
48 University of Wisconsin-Madison 4.3575045
49 University of Wyoming 0.3960605
50 West Virginia University 7.5205375
TWReachEnr
1 0.4443293
2 0.3765336
3 1.0380232
4 0.6828915
5 0.5835664
6 1.0378231
7 0.2692915
8 0.3856963
9 0.6904238
10 0.6945809
11 1.9455524
12 0.2216419
13 0.6624793
14 0.6920599
15 0.5791539
16 0.7634279
17 0.5789678
18 1.7595799
19 0.7678083
20 0.5505378
21 0.4603076
22 0.5931986
23 0.4239907
24 0.8546205
25 0.6795183
26 0.4741078
27 0.5396644
28 2.8949083
29 0.6008504
30 0.7371603
31 0.5234228
32 0.7546655
33 0.5904448
34 0.3525182
35 0.3329495
36 0.7432191
37 0.6403869
38 0.7972280
39 1.0838192
40 0.9687101
41 0.7152447
42 0.5047167
43 1.0538598
44 0.5890670
45 1.0020425
46 0.8193318
47 0.4795654
48 0.5141639
49 0.2597968
50 0.9914710