RでTwitterを解析する

http://stackoverflow.com/questions/15058485/oauth-with-twitter-streaming-api-in-r-using-rcurl

https://sites.google.com/site/miningtwitter/questions/talking-about/wordclouds/comparison-cloud

library(RCurl)
library(twitteR)
library(ROAuth)

requestURL <- "https://api.twitter.com/oauth/request_token"
accessURL = "http://api.twitter.com/oauth/access_token"
authURL = "http://api.twitter.com/oauth/authorize"
#consumerKey = "myconsumerkeystring"
#consumerSecret = "myconsumersecretstring"
consumerKey = "wwM3rXUWd48ImpjxljgA"
consumerSecret= "X22dmkq7MjWxV5PoMDs8Qwkq7MOvJjHUMbdutIaw"
Cred <- OAuthFactory$new(consumerKey=consumerKey,
                             consumerSecret=consumerSecret,
                             requestURL=requestURL,
                             accessURL=accessURL, 
                             authURL=authURL)
Cred$handshake(cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl") )
registerTwitterOAuth(Cred)
# att tweets
att_tweets = userTimeline("moyahima", n=1000)
# verizon tweets
ver_tweets = userTimeline("ryooopan", n=1000)
# verizon tweest
mob_tweets = userTimeline("satomacoto", n=1000)
# metro pcs tweets
pcs_tweets = userTimeline("ken_nishi", n=1000)

# get text
att_txt = sapply(att_tweets, function(x) x$getText())
ver_txt = sapply(ver_tweets, function(x) x$getText())
mob_txt = sapply(mob_tweets, function(x) x$getText())
pcs_txt = sapply(pcs_tweets, function(x) x$getText())

clean.text = function(x)
{
   # tolower
   x = tolower(x)
   # remove rt
   x = gsub("rt", "", x)
   # remove at
   x = gsub("@\\w+", "", x)
   # remove punctuation
   x = gsub("[[:punct:]]", "", x)
   # remove numbers
   x = gsub("[[:digit:]]", "", x)
   # remove links http
   x = gsub("http\\w+", "", x)
   # remove tabs
   x = gsub("[ |\t]{2,}", "", x)
   # remove blank spaces at the beginning
   x = gsub("^ ", "", x)
   # remove blank spaces at the end
   x = gsub(" $", "", x)
   return(x)
}

# clean texts
att_clean = clean.text(att_txt)
ver_clean = clean.text(ver_txt)
mob_clean = clean.text(mob_txt)
pcs_clean = clean.text(pcs_txt)

att = paste(att_clean, collapse=" ")
ver = paste(ver_clean, collapse=" ")
mob = paste(mob_clean, collapse=" ")
pcs = paste(pcs_clean, collapse=" ")

# put everything in a single vector
all = c(att, ver, mob, pcs)
# remove stop-words
all = removeWords(all,
c(stopwords("english"), "att", "verizon", "tmobile", "metropcs"))
# create corpus
corpus = Corpus(VectorSource(all))
# create term-document matrix
tdm = TermDocumentMatrix(corpus)
# convert as matrix
tdm = as.matrix(tdm)
# add column names
colnames(tdm) = c("moyahima", "ryooopan", "satomacoto", "ken_nishi")

pdf("CarriersCompCloud.pdf", width=8, height=8)
comparison.cloud(tdm, random.order=FALSE, 
colors = c("#00B2FF", "red", "#FF0099", "#6600CC"),
title.size=1.5, max.words=500)
dev.off()

pdf("CarriersCommCloud.pdf", width=8, height=8)
commonality.cloud(tdm, random.order=FALSE, 
colors = brewer.pal(8, "Dark2"),
title.size=1.5)
dev.off()

比較

共通