Social Media Activists

blog post II

Aanchal Setia

Author

Aanchal Setia

Published

October 10, 2022

Summary of My Results:

I have scraped tweets from three different accounts: BLM NYC, BLM LA, and BLM Chicago so far.

Pre-processing I created Wordclouds to understand the words that were redundant. I found that there were a lot of mentions of different accounts which were not useful for my research question, so I removed them. I also removed hashtags, numbers, emojis, punctuation, links, the phrase “rt”, and I changed all letters to lower case.

Basic Analysis Plan Now that I have formatted tweets from three different chapters, I will run sentiment analysis on three different chapters and compare them.

Code

library(tidyverse)

knitr::opts_chunk$set(echo = TRUE)

#Calling Necessary Libraries

Code

library(httr)
library(tm)

Loading required package: NLP


Attaching package: 'NLP'

The following object is masked from 'package:httr':

    content

The following object is masked from 'package:ggplot2':

    annotate

Code

library(stringr)
library(rtweet)


Attaching package: 'rtweet'

The following object is masked from 'package:purrr':

    flatten

Code

library(twitteR)


Attaching package: 'twitteR'

The following object is masked from 'package:rtweet':

    lookup_statuses

The following objects are masked from 'package:dplyr':

    id, location

Code

library(purrr)
library(tidytext)
library(dplyr)
library(tidyr)
library(lubridate)


Attaching package: 'lubridate'

The following objects are masked from 'package:base':

    date, intersect, setdiff, union

Code

library(scales)


Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor

Code

library(broom)
library(ggplot2)
library(quanteda)

Package version: 3.2.3
Unicode version: 13.0
ICU version: 69.1

Parallel computing: 8 of 8 threads used.

See https://quanteda.io for tutorials and examples.


Attaching package: 'quanteda'

The following object is masked from 'package:tm':

    stopwords

The following objects are masked from 'package:NLP':

    meta, meta<-

Code

library(quanteda.textplots)

#Getting Twitter Access

Code

consumerkey = ""
consumersecret = ""
accesstoken = ""
accesssecret = ""

options(httr_oauth_cache = T)
setup_twitter_oauth(consumer_key = consumerkey, consumer_secret = consumersecret,
                    access_token = accesstoken, access_secret = accesssecret)

[1] "Using direct authentication"

Error in check_twitter_oauth(): OAuth authentication error:
This most likely means that you have incorrectly called setup_twitter_oauth()'

#Set up default authentication for rtweet package

Code

#auth_setup_default()

Code

##Creating a function to format tweets

formatting_tweets <- function(tweets)
{
  #Removing mentions
  tweets$full_text <-str_remove_all(string = tweets$full_text, 
                                    pattern = "[@][\\w_-]+" )
  #Removing hashtags
  tweets$full_text <-str_remove_all(string = tweets$full_text, 
                                    pattern = "[#][\\w_-]+" )
  #Removing Links
  tweets$full_text <-str_remove_all(string = tweets$full_text,
                                    pattern = "http\\S+\\s*" )
  #Removing Emojis
  tweets$full_text <- iconv(x = tweets$full_text, from = "latin1",
                            to = "ASCII", sub = "")
  #Removing Punctuations
  tweets$full_text <- str_remove_all(string = tweets$full_text, 
                                     pattern = "[[:punct:]]")
  #Changing Case to Lower Case
  tweets$full_text <- str_to_lower(string = tweets$full_text)
  #Removing Numbers
  tweets$full_text <- str_remove_all(string = tweets$full_text, 
                                     pattern = "[:digit:]")
  #Removing stopwords
  tweets$full_text <- removeWords(tweets$full_text,
                                  c(stopwords("en"), "can", "will"))
  #Now, I will remove "rt" from the text
  tweets$full_text <- gsub("^(rt)","",tweets$full_text)
  tweets$full_text  <-  gsub("amp", "", tweets$full_text) 
  #Removing Repeated Whitespace
  tweets$full_text <- str_squish(string = tweets$full_text)
  #Changing the format of time
  tweets$created_at <- format(tweets$created_at, format = "%Y")
  
  return(tweets)
}

Code

#Scraping Tweets from BLack Lives Matter's Three Chapters

BLMchapters <- c("BLMNYC",  "BLMChi", "BLMLA")


for (i in BLMchapters) {
  handle <- gsub(" ", "", paste("@", i))
  result <-  get_timeline(use = handle, n = 1) 
  formatted_result <- formatting_tweets(result)
  
  df_name <- i
  assign(df_name, data.frame(formatted_result))
}

Error in `default_cached_auth()`:
! No default authentication found. Pick existing auth with:
• auth_as('create_token')