Thursday, October 10, 2013

How to use TwitterR Package to get twitter data.

I did this on my windows machine using R version 3.0.2 and Rstudio 0.98.

Open R studio and Installer twitterR


#install and load the necessary packages
install.packages("twitterR")
install.packages("wordcloud")
install.packages("tm")
library("twitterR")
library("wordcloud")
library("tm")
#necessary step for Windows
download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")
#to get your consumerKey and consumerSecret Login to your twitter account(dev.twitter.com), create an app and go to app settings and not down the consumer key and ConsumerSecretKey
cred <- OAuthFactory$new(consumerKey='XXX',
consumerSecret='XXX',
#necessary step for Windows
cred$handshake(cainfo="cacert.pem")

# copy the URL and paste in browser- accept the authentication and put the PIN here.
#save for later use for Windows
save(cred, file="twitter authentication.Rdata")
registerTwitterOAuth(cred)
##the cainfo parameter is necessary on Windows
s=searchTwitter('GOOG',cainfo="cacert.pem") s[[1]] me <- getUser("manjeetjakhar", cainfo="cacert.pem") me$getId() me$getFriends(n=6, cainfo="cacert.pem") #get six friend list m_tweets=userTimeline('manjeetjakhar',n=6,cainfo="cacert.pem") #look in to users tweet m_tweets

#create Corpus on Windows
r_stats<- searchTwitter("#Rstats", n=1500, cainfo="cacert.pem")

#save text
r_stats_text <- sapply(r_stats, function(x) x$getText())
#create corpus
r_stats_text_corpus <- Corpus(VectorSource(r_stats_text))
#clean up
r_stats_text_corpus <- tm_map(r_stats_text_corpus, tolower)
r_stats_text_corpus <- tm_map(r_stats_text_corpus, removePunctuation)
r_stats_text_corpus <- tm_map(r_stats_text_corpus, function(x)removeWords(x,stopwords()))
wordcloud(r_stats_text_corpus)


==

#you can also see what's trending
trend <- availableTrendLocations(cainfo="cacert.pem")
head(trend)

## Use tm (text mining) package
 
# build a corpus
mydata.corpus <- Corpus(VectorSource(mydata.vectors))
 
# make each letter lowercase
mydata.corpus <- tm_map(mydata.corpus, tolower)
 
# remove punctuation
mydata.corpus <- tm_map(mydata.corpus, removePunctuation)
 
# remove generic and custom stopwords
my_stopwords <- c(stopwords('english'), 'prolife', 'prochoice')
mydata.corpus <- tm_map(mydata.corpus, removeWords, my_stopwords)
 
# build a term-document matrix
mydata.dtm <- TermDocumentMatrix(mydata.corpus)
 
# inspect the document-term matrix
mydata.dtm
 
# inspect most popular words

findFreqTerms(mydata.dtm, lowfreq=30)

No comments: