イェンスのブログの記事をMDSで可視化

library(tm)
library(RMySQL)
dbconnector <- dbConnect(md,dbname='jens_blog',user='root',password='******')
jens.table <- dbGetQuery(dbconnector,'set names utf8')
jens.table <- dbGetQuery(dbconnector,'select * from articles')
all.jens <- as.vector(.table[,2]) 
all.jens <- as.vector(jens.table[,2]) 
names(all.jens) <- jens.table[,1] 
names(all.jens) <- gsub("\n\t\t\t\t","",names(all.jens))#余分なものを削除
names(all.jens) <- gsub("            ","",names(all.jens)) #同上
#get.tdm()関数を定義
get.tdm <- function(doc.vec){
  doc.corpus <-Corpus(VectorSource(doc.vec))
  control <- list(stopwords=TRUE,removePunctuation=TRUE,removeNumbers=TRUE,minDocFreq=2)
  doc.dtm <- TermDocumentMatrix(doc.corpus,control)
  return(doc.dtm)
}
jens.tdm <- get.tdm(all.jens) 
jens.matrix <- as.matrix(jens.tdm)
jens.mult <- t(jens.matrix) %*% jens.matrix
jens.dist <- dist(jens.mult)
jens.mds <- cmdscale(jens.dist)
plot(jens.mds,type='n')
text(jens.mds,type=rownames(jens.mds))