Code
library(tidyverse)
::opts_chunk$set(echo = TRUE) knitr
Aanchal Setia
October 10, 2022
Summary of My Results:
I have scraped tweets from three different accounts: BLM NYC, BLM LA, and BLM Chicago so far.
Pre-processing I created Wordclouds to understand the words that were redundant. I found that there were a lot of mentions of different accounts which were not useful for my research question, so I removed them. I also removed hashtags, numbers, emojis, punctuation, links, the phrase “rt”, and I changed all letters to lower case.
Basic Analysis Plan Now that I have formatted tweets from three different chapters, I will run sentiment analysis on three different chapters and compare them.
#Calling Necessary Libraries
Loading required package: NLP
Attaching package: 'NLP'
The following object is masked from 'package:httr':
content
The following object is masked from 'package:ggplot2':
annotate
Attaching package: 'rtweet'
The following object is masked from 'package:purrr':
flatten
Attaching package: 'twitteR'
The following object is masked from 'package:rtweet':
lookup_statuses
The following objects are masked from 'package:dplyr':
id, location
Attaching package: 'lubridate'
The following objects are masked from 'package:base':
date, intersect, setdiff, union
Attaching package: 'scales'
The following object is masked from 'package:purrr':
discard
The following object is masked from 'package:readr':
col_factor
Package version: 3.2.3
Unicode version: 13.0
ICU version: 69.1
Parallel computing: 8 of 8 threads used.
See https://quanteda.io for tutorials and examples.
Attaching package: 'quanteda'
The following object is masked from 'package:tm':
stopwords
The following objects are masked from 'package:NLP':
meta, meta<-
#Getting Twitter Access
[1] "Using direct authentication"
Error in check_twitter_oauth(): OAuth authentication error:
This most likely means that you have incorrectly called setup_twitter_oauth()'
#Set up default authentication for rtweet package
##Creating a function to format tweets
formatting_tweets <- function(tweets)
{
#Removing mentions
tweets$full_text <-str_remove_all(string = tweets$full_text,
pattern = "[@][\\w_-]+" )
#Removing hashtags
tweets$full_text <-str_remove_all(string = tweets$full_text,
pattern = "[#][\\w_-]+" )
#Removing Links
tweets$full_text <-str_remove_all(string = tweets$full_text,
pattern = "http\\S+\\s*" )
#Removing Emojis
tweets$full_text <- iconv(x = tweets$full_text, from = "latin1",
to = "ASCII", sub = "")
#Removing Punctuations
tweets$full_text <- str_remove_all(string = tweets$full_text,
pattern = "[[:punct:]]")
#Changing Case to Lower Case
tweets$full_text <- str_to_lower(string = tweets$full_text)
#Removing Numbers
tweets$full_text <- str_remove_all(string = tweets$full_text,
pattern = "[:digit:]")
#Removing stopwords
tweets$full_text <- removeWords(tweets$full_text,
c(stopwords("en"), "can", "will"))
#Now, I will remove "rt" from the text
tweets$full_text <- gsub("^(rt)","",tweets$full_text)
tweets$full_text <- gsub("amp", "", tweets$full_text)
#Removing Repeated Whitespace
tweets$full_text <- str_squish(string = tweets$full_text)
#Changing the format of time
tweets$created_at <- format(tweets$created_at, format = "%Y")
return(tweets)
}
#Scraping Tweets from BLack Lives Matter's Three Chapters
BLMchapters <- c("BLMNYC", "BLMChi", "BLMLA")
for (i in BLMchapters) {
handle <- gsub(" ", "", paste("@", i))
result <- get_timeline(use = handle, n = 1)
formatted_result <- formatting_tweets(result)
df_name <- i
assign(df_name, data.frame(formatted_result))
}
Error in `default_cached_auth()`:
! No default authentication found. Pick existing auth with:
• auth_as('create_token')
---
title: "Social Media Activists"
author: "Aanchal Setia"
desription: "Initial Data Cleaning"
date: "10/10/2022"
format:
html:
toc: true
code-fold: true
code-copy: true
code-tools: true
categories:
- blog post II
- Aanchal Setia
---
Summary of My Results:
I have scraped tweets from three different accounts: BLM NYC, BLM LA, and
BLM Chicago so far.
Pre-processing
I created Wordclouds to understand the words that were redundant.
I found that there were a lot of mentions of different accounts
which were not useful for my research question, so I removed them.
I also removed hashtags, numbers, emojis, punctuation, links, the phrase "rt",
and I changed all letters to lower case.
Basic Analysis Plan
Now that I have formatted tweets from three different chapters, I will run
sentiment analysis on three different chapters and compare them.
```{r}
#| label: setup
#| warning: false
library(tidyverse)
knitr::opts_chunk$set(echo = TRUE)
```
#Calling Necessary Libraries
```{R}
library(httr)
library(tm)
library(stringr)
library(rtweet)
library(twitteR)
library(purrr)
library(tidytext)
library(dplyr)
library(tidyr)
library(lubridate)
library(scales)
library(broom)
library(ggplot2)
library(quanteda)
library(quanteda.textplots)
```
#Getting Twitter Access
```{r}
consumerkey = ""
consumersecret = ""
accesstoken = ""
accesssecret = ""
options(httr_oauth_cache = T)
setup_twitter_oauth(consumer_key = consumerkey, consumer_secret = consumersecret,
access_token = accesstoken, access_secret = accesssecret)
```
#Set up default authentication for rtweet package
```{R}
#auth_setup_default()
```
```{r}
##Creating a function to format tweets
formatting_tweets <- function(tweets)
{
#Removing mentions
tweets$full_text <-str_remove_all(string = tweets$full_text,
pattern = "[@][\\w_-]+" )
#Removing hashtags
tweets$full_text <-str_remove_all(string = tweets$full_text,
pattern = "[#][\\w_-]+" )
#Removing Links
tweets$full_text <-str_remove_all(string = tweets$full_text,
pattern = "http\\S+\\s*" )
#Removing Emojis
tweets$full_text <- iconv(x = tweets$full_text, from = "latin1",
to = "ASCII", sub = "")
#Removing Punctuations
tweets$full_text <- str_remove_all(string = tweets$full_text,
pattern = "[[:punct:]]")
#Changing Case to Lower Case
tweets$full_text <- str_to_lower(string = tweets$full_text)
#Removing Numbers
tweets$full_text <- str_remove_all(string = tweets$full_text,
pattern = "[:digit:]")
#Removing stopwords
tweets$full_text <- removeWords(tweets$full_text,
c(stopwords("en"), "can", "will"))
#Now, I will remove "rt" from the text
tweets$full_text <- gsub("^(rt)","",tweets$full_text)
tweets$full_text <- gsub("amp", "", tweets$full_text)
#Removing Repeated Whitespace
tweets$full_text <- str_squish(string = tweets$full_text)
#Changing the format of time
tweets$created_at <- format(tweets$created_at, format = "%Y")
return(tweets)
}
```
```{R}
#Scraping Tweets from BLack Lives Matter's Three Chapters
BLMchapters <- c("BLMNYC", "BLMChi", "BLMLA")
for (i in BLMchapters) {
handle <- gsub(" ", "", paste("@", i))
result <- get_timeline(use = handle, n = 1)
formatted_result <- formatting_tweets(result)
df_name <- i
assign(df_name, data.frame(formatted_result))
}
```