I did this on my windows machine using R version 3.0.2 and Rstudio 0.98.
Open R studio and Installer twitterR
Open R studio and Installer twitterR
#install and load the necessary packages
install.packages
(
"twitterR"
)
install.packages
(
"wordcloud"
)
install.packages
(
"tm"
)
library
(
"twitterR"
)
library
(
"wordcloud"
)
library
(
"tm"
)
#necessary step for Windows
#to get your consumerKey and consumerSecret Login to your twitter account(dev.twitter.com), create an app and go to app settings and not down the consumer key and ConsumerSecretKey
cred <- OAuthFactory$
new
(consumerKey=
'XXX'
,
consumerSecret=
'XXX'
,
#necessary step for Windows
cred$
handshake
(cainfo=
"cacert.pem"
)
# copy the URL and paste in browser- accept the authentication and put the PIN here.
#save for later use for Windows
save
(cred, file=
"twitter authentication.Rdata"
)
registerTwitterOAuth
(cred)
##the cainfo parameter is necessary on Windows
s=searchTwitter('GOOG',cainfo="cacert.pem")
s[[1]]
me <- getUser("manjeetjakhar", cainfo="cacert.pem")
me$getId()
me$getFriends(n=6, cainfo="cacert.pem") #get six friend list
m_tweets=userTimeline('manjeetjakhar',n=6,cainfo="cacert.pem") #look in to users tweet
m_tweets
#create Corpus on Windows
r_stats<-
searchTwitter
(
"#Rstats"
, n=1500, cainfo=
"cacert.pem"
)
#save text
r_stats_text <-
sapply
(r_stats,
function
(x) x$
getText
())
#create corpus
r_stats_text_corpus <-
Corpus
(
VectorSource
(r_stats_text))
#clean up
r_stats_text_corpus <-
tm_map
(r_stats_text_corpus, tolower)
r_stats_text_corpus <-
tm_map
(r_stats_text_corpus, removePunctuation)
r_stats_text_corpus <-
tm_map
(r_stats_text_corpus,
function
(x)
removeWords
(x,
stopwords
()))
wordcloud
(r_stats_text_corpus)
==
#you can also see what's trending
trend <-
availableTrendLocations
(cainfo=
"cacert.pem"
)
head
(trend)
## Use tm (text mining) package
# build a corpus
mydata.corpus <- Corpus(VectorSource(mydata.vectors))
# make each letter lowercase
mydata.corpus <- tm_map(mydata.corpus, tolower)
# remove punctuation
mydata.corpus <- tm_map(mydata.corpus, removePunctuation)
# remove generic and custom stopwords
my_stopwords <- c(stopwords('english'), 'prolife', 'prochoice')
mydata.corpus <- tm_map(mydata.corpus, removeWords, my_stopwords)
# build a term-document matrix
mydata.dtm <- TermDocumentMatrix(mydata.corpus)
# inspect the document-term matrix
mydata.dtm
# inspect most popular words
findFreqTerms(mydata.dtm, lowfreq=30)
No comments:
Post a Comment