Musing on AI Product Management and Fitness: How to use TwitterR Package to get twitter data.

I did this on my windows machine using R version 3.0.2 and Rstudio 0.98.

Open R studio and Installer twitterR

#install and load the necessary packages

install.packages("twitterR")

install.packages("wordcloud")

install.packages("tm")

library("twitterR")

library("wordcloud")

library("tm")

#necessary step for Windows

download.file(url="http://curl.haxx.se/ca/cacert.pem", destfile="cacert.pem")

#to get your consumerKey and consumerSecret Login to your twitter account(dev.twitter.com), create an app and go to app settings and not down the consumer key and ConsumerSecretKey

cred <- OAuthFactory$new(consumerKey='XXX',

consumerSecret='XXX',

requestURL='https://api.twitter.com/oauth/request_token',

accessURL='http://api.twitter.com/oauth/access_token',

authURL='http://api.twitter.com/oauth/authorize')

#necessary step for Windows

cred$handshake(cainfo="cacert.pem")

# copy the URL and paste in browser- accept the authentication and put the PIN here.

#save for later use for Windows

save(cred, file="twitter authentication.Rdata")

registerTwitterOAuth(cred)

##the cainfo parameter is necessary on Windows

s=searchTwitter('GOOG',cainfo="cacert.pem")
s[[1]]
me <- getUser("manjeetjakhar", cainfo="cacert.pem")
me$getId()
me$getFriends(n=6, cainfo="cacert.pem") #get six friend list
m_tweets=userTimeline('manjeetjakhar',n=6,cainfo="cacert.pem") #look in to users tweet
m_tweets

#create Corpus on Windows

r_stats<- searchTwitter("#Rstats", n=1500, cainfo="cacert.pem")

#save text

r_stats_text <- sapply(r_stats, function(x) x$getText())

#create corpus

r_stats_text_corpus <- Corpus(VectorSource(r_stats_text))

#clean up

r_stats_text_corpus <- tm_map(r_stats_text_corpus, tolower) 

r_stats_text_corpus <- tm_map(r_stats_text_corpus, removePunctuation)

r_stats_text_corpus <- tm_map(r_stats_text_corpus, function(x)removeWords(x,stopwords()))

wordcloud(r_stats_text_corpus)

==

#you can also see what's trending

trend <- availableTrendLocations(cainfo="cacert.pem")
head(trend)

## Use tm (text mining) package

# build a corpus

mydata.corpus <- Corpus(VectorSource(mydata.vectors))

# make each letter lowercase

mydata.corpus <- tm_map(mydata.corpus, tolower) 

# remove punctuation 

mydata.corpus <- tm_map(mydata.corpus, removePunctuation)

# remove generic and custom stopwords

my_stopwords <- c(stopwords('english'), 'prolife', 'prochoice')

mydata.corpus <- tm_map(mydata.corpus, removeWords, my_stopwords)

# build a term-document matrix

mydata.dtm <- TermDocumentMatrix(mydata.corpus)

# inspect the document-term matrix

mydata.dtm

# inspect most popular words

findFreqTerms(mydata.dtm, lowfreq=30)
Thursday, October 10, 2013

How to use TwitterR Package to get twitter data.

No comments:

About Me