Blog Post 3 Text

Author

Quinn He

Published

November 11, 2022

#| label: setup
#| warning: false

library(tidyverse)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.3      ✔ forcats 0.5.2

Warning: package 'ggplot2' was built under R version 4.2.2

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

library(RedditExtractoR)

Warning: package 'RedditExtractoR' was built under R version 4.2.2

library(syuzhet)
library(rvest)

Warning: package 'rvest' was built under R version 4.2.2


Attaching package: 'rvest'

The following object is masked from 'package:readr':

    guess_encoding

library(quanteda)

Package version: 3.2.3
Unicode version: 13.0
ICU version: 69.1
Parallel computing: 8 of 8 threads used.
See https://quanteda.io for tutorials and examples.

library(quanteda.textplots)
library(cleanNLP)
library(readr)
library(quanteda.dictionaries)
library(quanteda.sentiment)


Attaching package: 'quanteda.sentiment'

The following object is masked from 'package:quanteda':

    data_dictionary_LSD2015

library(tidytext)
library(DT)
library(quanteda.textstats)

Warning: package 'quanteda.textstats' was built under R version 4.2.2

library(text2vec)

Warning: package 'text2vec' was built under R version 4.2.2

knitr::opts_chunk$set(echo = TRUE)

Data pull from Reddit

top_repub <- find_thread_urls(subreddit = "republicans", sort_by = "top", period = "year")

parsing URLs on page 1...

Warning in file(con, "r"): cannot open URL 'https://www.reddit.com/r/
republicans/top.json?t=year&limit=100': HTTP status was '429 Unknown Error'

Error in value[[3L]](cond): Cannot read from Reddit, check your inputs or internet connection

top_dem <- find_thread_urls(subreddit = "democrats", sort_by = "top", period = "year")

parsing URLs on page 1...

Warning in file(con, "r"): cannot open URL 'https://www.reddit.com/r/democrats/
top.json?t=year&limit=100': HTTP status was '429 Unknown Error'

Error in value[[3L]](cond): Cannot read from Reddit, check your inputs or internet connection

Republican subreddit

top_repub <- top_repub[-1,]

Error in eval(expr, envir, enclos): object 'top_repub' not found

top_repub$type <-"top"

Error in top_repub$type <- "top": object 'top_repub' not found

saveRDS(top_repub, "top_repub.rds")

Error in saveRDS(top_repub, "top_repub.rds"): object 'top_repub' not found

top_repub <- read_rds("top_repub.rds")

Warning in readRDS(con, refhook = refhook): cannot open file 'top_repub.rds': No
such file or directory

Error in readRDS(con, refhook = refhook): cannot open the connection

Democrats subreddit

top_dem <- top_dem[-1,]

Error in eval(expr, envir, enclos): object 'top_dem' not found

top_dem$type <-"top"

Error in top_dem$type <- "top": object 'top_dem' not found

saveRDS(top_dem, "top_dem.rds")

Error in saveRDS(top_dem, "top_dem.rds"): object 'top_dem' not found

top_dem <- read_rds("top_dem.rds")

Warning in readRDS(con, refhook = refhook): cannot open file 'top_dem.rds': No
such file or directory

Error in readRDS(con, refhook = refhook): cannot open the connection

I try to get the comments for both red and blue subreddits. The first one is for the democrat subreddit while the second is for the republican.

dem_url_content <- get_thread_content(top_dem$url[1:500])$comments$comment

Error in lapply(urls, parse_thread_url): object 'top_dem' not found

url_content <- get_thread_content(top_repub$url[1:500])$comments$comment

Error in lapply(urls, parse_thread_url): object 'top_repub' not found

saveRDS(url_content, "url_content.rds")

Error in saveRDS(url_content, "url_content.rds"): object 'url_content' not found

url_content_top <- read_rds("url_content.rds")

Warning in readRDS(con, refhook = refhook): cannot open file 'url_content.rds':
No such file or directory

Error in readRDS(con, refhook = refhook): cannot open the connection

Run below for democrat subreddit to turn it into a cleanable dataset.

saveRDS(dem_url_content, "dem_url_content.rds")

Error in saveRDS(dem_url_content, "dem_url_content.rds"): object 'dem_url_content' not found

dem_comments <- read_rds("dem_url_content.rds")

Warning in readRDS(con, refhook = refhook): cannot open file
'dem_url_content.rds': No such file or directory

Error in readRDS(con, refhook = refhook): cannot open the connection

write.csv(dem_comments, "dem_comments.csv", col.names = T)

Warning in write.csv(dem_comments, "dem_comments.csv", col.names = T): attempt
to set 'col.names' ignored

Error in is.data.frame(x): object 'dem_comments' not found

dem_comments <- read_csv("dem_comments.csv")

Error: 'dem_comments.csv' does not exist in current working directory ('C:/Users/srika/OneDrive/Desktop/DACSS/Text_as_Data_Fall_2022/posts').

dem_content_info <- get_thread_content(top_dem$url[1:500])$comments

Error in lapply(urls, parse_thread_url): object 'top_dem' not found

saveRDS(dem_content_info, "dem_content_info.rds")

Error in saveRDS(dem_content_info, "dem_content_info.rds"): object 'dem_content_info' not found

dem_comments_info <- read_rds("dem_content_info.rds")

Warning in readRDS(con, refhook = refhook): cannot open file
'dem_content_info.rds': No such file or directory

Error in readRDS(con, refhook = refhook): cannot open the connection

write.csv(dem_comments_info, "dem_comments_info.csv", col.names = T)

Warning in write.csv(dem_comments_info, "dem_comments_info.csv", col.names = T):
attempt to set 'col.names' ignored

Error in is.data.frame(x): object 'dem_comments_info' not found

dem_comments_info <- read_csv("dem_comments_info.csv")

Error: 'dem_comments_info.csv' does not exist in current working directory ('C:/Users/srika/OneDrive/Desktop/DACSS/Text_as_Data_Fall_2022/posts').

I rename the republican subreddit comments to an easier name to follow

repub_comments <- read_rds("url_content.rds")

Warning in readRDS(con, refhook = refhook): cannot open file 'url_content.rds':
No such file or directory

Error in readRDS(con, refhook = refhook): cannot open the connection

I convert the rds to a csv for both the republican and democrat subreddits

write.csv(repub_comments, "repub_comments.csv", col.names = T)

Warning in write.csv(repub_comments, "repub_comments.csv", col.names = T):
attempt to set 'col.names' ignored

Error in is.data.frame(x): object 'repub_comments' not found

red_comments <- read_csv("repub_comments.csv")

Error: 'repub_comments.csv' does not exist in current working directory ('C:/Users/srika/OneDrive/Desktop/DACSS/Text_as_Data_Fall_2022/posts').

So it looks like I only was able to get solely the comments, but I’d like to get a little more information.

url_content_info <- get_thread_content(top_repub$url[1:500])$comments

Error in lapply(urls, parse_thread_url): object 'top_repub' not found

Above I am attempting to get addition information on reddit comments (user, date, post responding to, upvotes, downvotes). Below I am just reading them in as rds files like Saaradhaa has done because my other way of getting comments with RedditExtractoR has not worked.

saveRDS(url_content_info, "url_content_info.rds")

Error in saveRDS(url_content_info, "url_content_info.rds"): object 'url_content_info' not found

red_comments_info <- read_rds("url_content_info.rds")

Warning in readRDS(con, refhook = refhook): cannot open file
'url_content_info.rds': No such file or directory

Error in readRDS(con, refhook = refhook): cannot open the connection

write.csv(red_comments_info, "repub_comments_info.csv", col.names = T)

Warning in write.csv(red_comments_info, "repub_comments_info.csv", col.names =
T): attempt to set 'col.names' ignored

Error in is.data.frame(x): object 'red_comments_info' not found

red_comments_info <- read_csv("repub_comments_info.csv")

Error: 'repub_comments_info.csv' does not exist in current working directory ('C:/Users/srika/OneDrive/Desktop/DACSS/Text_as_Data_Fall_2022/posts').

I want to next remove any comments that are [deleted] or [removed] as a user’s comment could have been deleted by OP or removed by a moderator. I still need to remove the auto moderator messages from both subreddits since every post will most likely have an automod comment.

blue_comments <- dem_comments_info %>%
  filter(!(comment %in% c("[removed]", "[deleted]"))) %>% 
  filter(!(author %in% "AutoModerator"))

Error in filter(., !(comment %in% c("[removed]", "[deleted]"))): object 'dem_comments_info' not found

red_comments <- red_comments_info %>% 
  filter(!(comment %in% c("[removed]", "[deleted]")))%>% 
  filter(!(author %in% "AutoModerator"))

Error in filter(., !(comment %in% c("[removed]", "[deleted]"))): object 'red_comments_info' not found

Yay, finally I have my data! I now have all the comments I wanted to get so far. I’ll still have to perform preprocessing techniques on the data. Now is time for preprocessing techniques. Below I turn the blue comments into a corpus, then tokenize it by removing all the excess junk.

Preprocessing /r/democrats

blue_corpus <- corpus(blue_comments$comment)

Error in corpus(blue_comments$comment): object 'blue_comments' not found

blue_tokens <- tokens(blue_corpus,
                      remove_punct = T,
                      remove_symbols = T,
                      remove_url = T,
                      remove_numbers = T)

Error in tokens(blue_corpus, remove_punct = T, remove_symbols = T, remove_url = T, : object 'blue_corpus' not found

blue_tokens <- tokens_select(blue_tokens, selection = "remove", pattern = stopwords("en"))

Error in tokens_select(blue_tokens, selection = "remove", pattern = stopwords("en")): object 'blue_tokens' not found

#I remove words that dont have any meaning to me that were in the network cloud.

blue_tokens <- tokens_remove(blue_tokens, c("back", "really", "less", "saying", "look", "like", "get", "every", "said", "anything", "s", "right", "now", "see"))

Error in tokens_select(x, ..., selection = "remove"): object 'blue_tokens' not found

Democrats DFM

blue_dfm <- blue_tokens%>% 
  tokens_tolower() %>% 
  dfm()

Error in tokens_tolower(.): object 'blue_tokens' not found

  dfm_trim(blue_dfm, min_termfreq = 3)

Error in dfm_trim(blue_dfm, min_termfreq = 3): object 'blue_dfm' not found

Lets look at the most used words in the dfm

topfeatures(blue_dfm, 20)

Error in topfeatures(blue_dfm, 20): object 'blue_dfm' not found

blue_fcm <- fcm(blue_dfm)

Error in fcm(blue_dfm): object 'blue_dfm' not found

I need to create a smaller fcm for the network plot because the current fcm is just too large.

small_fcm_blue <- fcm_select(blue_fcm, pattern = names(topfeatures(blue_fcm, 50)))

Error in fcm_select(blue_fcm, pattern = names(topfeatures(blue_fcm, 50))): object 'blue_fcm' not found

textplot_network(small_fcm_blue, min_freq = 0.5, omit_isolated = T)

Error in textplot_network(small_fcm_blue, min_freq = 0.5, omit_isolated = T): object 'small_fcm_blue' not found

There are still some words I want to get rid of based off the network plot. The words I see on the outside of the network I would expect to be closer to the inside, but this could be because there are some words I just don’t think are relevant.

Preprocessing on /r/republicans corpus

Below I do the same thing I did with the blue comments on the red comments

red_corpus <- corpus(red_comments$comment)

Error in corpus(red_comments$comment): object 'red_comments' not found

red_tokens <- tokens(red_corpus,
                      remove_punct = T,
                      remove_symbols = T,
                      remove_url = T,
                      remove_numbers = T)

Error in tokens(red_corpus, remove_punct = T, remove_symbols = T, remove_url = T, : object 'red_corpus' not found

red_tokens <- tokens_select(red_tokens, selection = "remove", pattern = stopwords("en"))

Error in tokens_select(red_tokens, selection = "remove", pattern = stopwords("en")): object 'red_tokens' not found

#I remove words that dont have any meaning to me that were in the network cloud.

red_tokens <- tokens_remove(red_tokens, c("back", "really", "less", "saying", "look", "like", "get", "every", "said", "anything", "s", "right", "now", "see", "anyone", "one", "say", "take", "much", "last", "never", "changed", "just", "questions", "r", "please", "note"))

Error in tokens_select(x, ..., selection = "remove"): object 'red_tokens' not found

Rebublicans DFM

red_dfm <- red_tokens%>% 
  tokens_tolower() %>% 
  dfm()

Error in tokens_tolower(.): object 'red_tokens' not found

  dfm_trim(red_dfm, min_termfreq = 3)

Error in dfm_trim(red_dfm, min_termfreq = 3): object 'red_dfm' not found

red_fcm <- fcm(red_dfm)

Error in fcm(red_dfm): object 'red_dfm' not found

This is just a simple wordcloud to visually get a gist of some of the most popular words in the subreddit.

textplot_wordcloud(red_dfm, min_count = 10, max_words = 100, color = "red")

Error in textplot_wordcloud(red_dfm, min_count = 10, max_words = 100, : object 'red_dfm' not found

Again, lets see the top terms in the republican subreddit dfm. I’m unsure of what “t” is, but some stemming may take care of that, or it could have some significant meaning within the subreddit (an inside joke perhaps).

topfeatures(red_dfm, 20)

Error in topfeatures(red_dfm, 20): object 'red_dfm' not found

small_fcm_red <- fcm_select(red_fcm, pattern = names(topfeatures(red_fcm, 50)))

Error in fcm_select(red_fcm, pattern = names(topfeatures(red_fcm, 50))): object 'red_fcm' not found

textplot_network(small_fcm_red, min_freq = 0.5, omit_isolated = T, edge_color = "orange")

Error in textplot_network(small_fcm_red, min_freq = 0.5, omit_isolated = T, : object 'small_fcm_red' not found

This network plot seems closer to what I am looking for with the /r/democrats network plot. In both network plots, “people” is at the center of the network. The only problem is I don’t know how they are using the word and in reference to what. I can solve this with a kwic function using “people” as a keyword.

Dictionary Methods

I want to use wordgraphs in the next blog post or final project.

NRC Dictionary

red_nrc_sentiment <- liwcalike(red_corpus, data_dictionary_NRC)

Error in liwcalike(red_corpus, data_dictionary_NRC): object 'red_corpus' not found

ggplot(red_nrc_sentiment)+
  geom_histogram(aes(positive), fill = "orange")

Error in ggplot(red_nrc_sentiment): object 'red_nrc_sentiment' not found

blue_nrc_sentiment <- liwcalike(blue_corpus, data_dictionary_NRC)

Error in liwcalike(blue_corpus, data_dictionary_NRC): object 'blue_corpus' not found

ggplot(blue_nrc_sentiment)+
  geom_histogram(aes(positive), fill = "blue")

Error in ggplot(blue_nrc_sentiment): object 'blue_nrc_sentiment' not found

The graphs are very similar in structure, but the democrats subreddit has far more positive posts than the republicans subreddit, by an extreme margin.

Geninq Dictionary

blue_geninq_sentiment <- liwcalike(blue_corpus, data_dictionary_geninqposneg)

Error in liwcalike(blue_corpus, data_dictionary_geninqposneg): object 'blue_corpus' not found

names(blue_geninq_sentiment)

Error in eval(expr, envir, enclos): object 'blue_geninq_sentiment' not found

ggplot(blue_geninq_sentiment)+
  geom_histogram(aes(positive))

Error in ggplot(blue_geninq_sentiment): object 'blue_geninq_sentiment' not found

red_geninq_sentiment <- liwcalike(red_corpus, data_dictionary_geninqposneg)

Error in liwcalike(red_corpus, data_dictionary_geninqposneg): object 'red_corpus' not found

names(data_dictionary_geninqposneg)

[1] "positive" "negative"

ggplot(red_geninq_sentiment)+
  geom_histogram(aes(positive))

Error in ggplot(red_geninq_sentiment): object 'red_geninq_sentiment' not found

Polarity measures for Geninq and NRC Dictionary

/r/democrat

NRC Polarity

blue_nrc_sentiment$polarity <- blue_nrc_sentiment$positive - blue_nrc_sentiment$negative

Error in eval(expr, envir, enclos): object 'blue_nrc_sentiment' not found

ggplot(blue_nrc_sentiment) +
  geom_histogram(aes(polarity)) +
  theme_bw()

Error in ggplot(blue_nrc_sentiment): object 'blue_nrc_sentiment' not found

Geninq Polarity

blue_geninq_sentiment$polarity <- blue_geninq_sentiment$positive - blue_geninq_sentiment$negative

Error in eval(expr, envir, enclos): object 'blue_geninq_sentiment' not found

ggplot(blue_geninq_sentiment)+
  geom_histogram(aes(polarity))+
  theme_bw()

Error in ggplot(blue_geninq_sentiment): object 'blue_geninq_sentiment' not found

/r/republican Polarity

NRC polarity

red_nrc_sentiment$polarity <- red_nrc_sentiment$positive - red_nrc_sentiment$negative

Error in eval(expr, envir, enclos): object 'red_nrc_sentiment' not found

ggplot(red_nrc_sentiment) +
  geom_histogram(aes(polarity)) +
  theme_bw()

Error in ggplot(red_nrc_sentiment): object 'red_nrc_sentiment' not found

Geninq polarity

red_geninq_sentiment$polarity <- red_geninq_sentiment$positive - red_geninq_sentiment$negative

Error in eval(expr, envir, enclos): object 'red_geninq_sentiment' not found

ggplot(red_geninq_sentiment)+
  geom_histogram(aes(polarity))+
  theme_bw()

Error in ggplot(red_geninq_sentiment): object 'red_geninq_sentiment' not found

It appears these two dictionaries in particular are quite similar, I’ll have to check this with maybe a third dictionary. The only difference is NRC polarity has a higher count.

Dictionary Loughran and McDonald

blue_loughran_mcdonald <- liwcalike(blue_corpus, data_dictionary_LoughranMcDonald)

Error in liwcalike(blue_corpus, data_dictionary_LoughranMcDonald): object 'blue_corpus' not found

red_loughran_mcdonald <- liwcalike(red_corpus, data_dictionary_LoughranMcDonald)

Error in liwcalike(red_corpus, data_dictionary_LoughranMcDonald): object 'red_corpus' not found

ggplot(blue_loughran_mcdonald)+
  geom_histogram(aes(positive))

Error in ggplot(blue_loughran_mcdonald): object 'blue_loughran_mcdonald' not found

ggplot(red_loughran_mcdonald)+
  geom_histogram(aes(positive), fill = "orange")

Error in ggplot(red_loughran_mcdonald): object 'red_loughran_mcdonald' not found

Below I implement the polarity measure.

red_loughran_mcdonald$polarity <- red_loughran_mcdonald$positive - red_loughran_mcdonald$negative

Error in eval(expr, envir, enclos): object 'red_loughran_mcdonald' not found

ggplot(red_loughran_mcdonald)+
  geom_histogram(aes(polarity))+
  theme_bw()

Error in ggplot(red_loughran_mcdonald): object 'red_loughran_mcdonald' not found

Dictionary Moral Foundations Dictionary

I’ll have to find a way to measure or graph these to compare the subreddits from a holistic view. Otherwise, I could find a way to join the data frames together, but I do not think that would benefit me.

liwcalike(blue_corpus, data_dictionary_MFD)

Error in liwcalike(blue_corpus, data_dictionary_MFD): object 'blue_corpus' not found

summary(liwcalike(red_corpus, data_dictionary_MFD))

Error in liwcalike(red_corpus, data_dictionary_MFD): object 'red_corpus' not found

If I want to create my own dictionary, which may be worth looking into, use this “https://quanteda.io/reference/dictionary.html”

Lexicoder Sentiment Dictionary

/r/democrats

I’m having some issues grouping the dfm by date so I can have a timeline at the bottom of the graph. I’ll have to trouble shoot this later.

midt <- c("walker", "hershel", "warnock", "biden", "desantis", "trump", "vote", "fake", "fraud")

toks_midt_blue <- tokens_keep(blue_tokens, pattern = phrase(midt), window = 10)

Error in tokens_select(x, ..., selection = "keep"): object 'blue_tokens' not found

data_dictionary_LSD2015_pos_neg <- data_dictionary_LSD2015[1:2] #selects only negative and positive categories

toks_midt_blue_lsd <- tokens_lookup(toks_midt_blue, dictionary = data_dictionary_LSD2015_pos_neg)

Error in tokens_lookup(toks_midt_blue, dictionary = data_dictionary_LSD2015_pos_neg): object 'toks_midt_blue' not found

dfmat_midt_lsd <- dfm(toks_midt_blue_lsd) %>% 
  dfm_group(groups = date)

Error in dfm(toks_midt_blue_lsd): object 'toks_midt_blue_lsd' not found

matplot(dfmat_midt_lsd, type = "l", lty = 1, col = 1:2,
        ylab = "Frequency", xlab = "")

Error in matplot(dfmat_midt_lsd, type = "l", lty = 1, col = 1:2, ylab = "Frequency", : object 'dfmat_midt_lsd' not found

grid()

Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet

legend("topleft", col = 1:2, legend = colnames(dfmat_midt_lsd), lty = 1, bg = "white")

Error in is.data.frame(x): object 'dfmat_midt_lsd' not found

Dictionary with DFM

Here I create a dfm with the NRC sentiment from the blue comments, then I create a polarity measure for the blue comments using the blue dataframe.

blue_dfm_nrc <- blue_dfm %>% 
  dfm_lookup(data_dictionary_NRC)

Error in dfm_lookup(., data_dictionary_NRC): object 'blue_dfm' not found

blue_df_nrc <- convert(blue_dfm_nrc, to = "data.frame")

Error in convert(blue_dfm_nrc, to = "data.frame"): object 'blue_dfm_nrc' not found

blue_df_nrc$polarity <- (blue_df_nrc$positive - blue_df_nrc$negative)/(blue_df_nrc$positive + blue_df_nrc$negative)

Error in eval(expr, envir, enclos): object 'blue_df_nrc' not found

blue_df_nrc$polarity[(blue_df_nrc$positive + blue_df_nrc$negative) == 0] <- 0

Error in blue_df_nrc$polarity[(blue_df_nrc$positive + blue_df_nrc$negative) == : object 'blue_df_nrc' not found

ggplot(blue_df_nrc) +
  geom_histogram(aes(x=polarity)) +
  theme_bw()

Error in ggplot(blue_df_nrc): object 'blue_df_nrc' not found

So I feel like I did something wrong because the graph is completely symmetrical.

Creating my own dictionary

dictionary()

Error in file.exists(file): invalid 'file' argument

Keywords in Context

Here I will fill in one of the top words once the code loads because I want to see how exactly some of these top words are used with the kwic function. I like using this function because I can pick specific words I want to look at in context of a larger sentence. Just by a glance, in the blue_corpus, people use “they” in reference to talking about the President. For example, “they think that biden…” or “they knew biden…”. In both subreddits, you will get negative sentiment towards the President because people want to express their grievances, but do republicans tend to talk more negatively about him? I’ll check a few other keywords as well to look at discourse at a glance for terms like “ukraine”, “midterm”, and “Walker”.

kwic_blue_biden <- kwic(blue_corpus, "biden")

Error in kwic(blue_corpus, "biden"): object 'blue_corpus' not found

kwic_red_biden <- kwic(red_corpus, "biden")

Error in kwic(red_corpus, "biden"): object 'red_corpus' not found

kwic_blue_ukraine <- kwic(blue_corpus, "Ukraine")

Error in kwic(blue_corpus, "Ukraine"): object 'blue_corpus' not found

kwic_red_ukraine <- kwic(red_corpus, "Ukraine")

Error in kwic(red_corpus, "Ukraine"): object 'red_corpus' not found

kwic_blue_midterm <- kwic(blue_corpus, "midterm")

Error in kwic(blue_corpus, "midterm"): object 'blue_corpus' not found

kwic_red_midterm <- kwic(red_corpus, "midterm")

Error in kwic(red_corpus, "midterm"): object 'red_corpus' not found

kwic_blue_walker <- kwic(blue_corpus, "Walker")

Error in kwic(blue_corpus, "Walker"): object 'blue_corpus' not found

kwic_red_walker <- kwic(red_corpus, "Walker")

Error in kwic(red_corpus, "Walker"): object 'red_corpus' not found

LDA Models for /r/democrats and /r/republicans

library(seededlda)

Error in library(seededlda): there is no package called 'seededlda'

dem_comments_lda <- textmodel_lda(blue_dfm, k = 10)

Error in textmodel_lda(blue_dfm, k = 10): could not find function "textmodel_lda"

dem_terms <- terms(dem_comments_lda, 10)

Error in terms(dem_comments_lda, 10): object 'dem_comments_lda' not found

dem_terms

Error in eval(expr, envir, enclos): object 'dem_terms' not found

gop_comments_lda <- textmodel_lda(red_dfm, k = 10)

Error in textmodel_lda(red_dfm, k = 10): could not find function "textmodel_lda"

gop_terms <- terms(gop_comments_lda, 10)

Error in terms(gop_comments_lda, 10): object 'gop_comments_lda' not found

gop_terms

Error in eval(expr, envir, enclos): object 'gop_terms' not found

I think I’ll want to do LDA modelling based on what we learned in Tutorial 10 in my final project or future blog posts because the tutorial seemed more comprehensive and I noticed words were a bit more similar when grouped when the lambda was changed to various numbers between 0.2 and 0.4.

textplot_keyness(textstat_keyness(blue_dfm))

Error in textstat_keyness(blue_dfm): object 'blue_dfm' not found

Writing csv for data

write_csv(blue_comments, "blue_comments1")

Error in is.data.frame(x): object 'blue_comments' not found

write_csv(red_comments, "red_comments1")

Error in is.data.frame(x): object 'red_comments' not found