사례분석: 태블릿(tablet) 데이터에 적용
총 정리 및 복습
mobile <- read.csv("mobile2014.csv", stringsAsFactors = F)
library(tm)
corpus <- Corpus(VectorSource(mobile$Texts))
dtm <- DocumentTermMatrix(corpus,
control = list(tolower = T,
removePunctuation = T,
removeNumbers = T,
stopwords = stopwords("SMART"),
weighting = weightTfIdf))
my.stopwords <- c(stopwords("SMART"), "aaa")
my.stopwords
library(glmnet)
X <- as.matrix(dtm)
Y <- mobile$Sentiment
res.lm <- glmnet(X, Y, family = 'binomial', lambda = 0)
coef.lm <- coef(res.lm)[,1]
pos.lm <- coef.lm[coef.lm > 0]
neg.lm <- coef.lm[coef.lm < 0]
pos.lm <- sort(pos.lm, decreasing = T)
neg.lm <- sort(neg.lm, decreasing = F)
res.lasso <- glmnet(X, Y, family = 'binomial', alpha = 1)
set.seed(12345)
res.lasso <- cv.glmnet(X, Y, family = 'binomial', alpha = 1,
nfolds = 4, type.measure = 'class')
coef.lasso <- coef(res.lasso, s = 'lambda.min')[,1]
pos.lasso <- coef.lasso[coef.lasso > 0]
neg.lasso <- coef.lasso[coef.lasso < 0]
pos.lasso <- sort(pos.lasso, decreasing = T)
neg.lasso <- sort(neg.lasso, decreasing = F)
set.seed(12345)
res.ridge <- cv.glmnet(X, Y, family = 'binomial', alpha = 0,
nfolds = 4, type.measure = 'class')
coef.ridge <- coef(res.ridge, s = 'lambda.min')[,1]
pos.ridge <- coef.ridge[coef.ridge > 0]
neg.ridge <- coef.ridge[coef.ridge < 0]
pos.ridge <- sort(pos.ridge, decreasing = T)
neg.ridge <- sort(neg.ridge, decreasing = F)
set.seed(12345)
res.elastic <- cv.glmnet(X, Y, family = 'binomial', alpha = .5,
nfolds = 4, type.measure = 'class')
coef.elastic <- coef(res.elastic, s = 'lambda.min')[,1]
pos.elastic <- coef.elastic[coef.elastic > 0]
neg.elastic <- coef.elastic[coef.elastic < 0]
pos.elastic <- sort(pos.elastic, decreasing = T)
neg.elastic <- sort(neg.elastic, decreasing = F)
태블릿 데이터에 적용
data.test <- read.csv('tablet2014_test.csv', stringsAsFactors = F)
corpus <- Corpus(VectorSource(data.test$Texts))
dtm.test <- DocumentTermMatrix(corpus,
control = list(tolower = T,
removePunctuation = T,
removeNumbers = T,
stopwords = stopwords("SMART"),
weighting = weightTfIdf,
dictionary = Terms(dtm)))
library(tm.plugin.sentiment)
senti.lm.test <- polarity(dtm.test, names(pos.lm), names(neg.lm))
senti.lasso.test <- polarity(dtm.test, names(pos.lasso), names(neg.lasso))
senti.ridge.test <- polarity(dtm.test, names(pos.ridge), names(neg.ridge))
senti.elastic.test <- polarity(dtm.test, names(pos.elastic), names(neg.elastic))
senti.lm.b.test <- ifelse(senti.lm.test > 0, 1, 0)
senti.lasso.b.test <- ifelse(senti.lasso.test > 0, 1, 0)
senti.ridge.b.test <- ifelse(senti.ridge.test > 0, 1, 0)
senti.elastic.b.test <- ifelse(senti.elastic.test > 0, 1, 0)
library(caret)
confusionMatrix(senti.lm.b.test, data.test$Sentiment)
confusionMatrix(senti.lasso.b.test, data.test$Sentiment)
confusionMatrix(senti.ridge.b.test, data.test$Sentiment)
confusionMatrix(senti.elastic.b.test, data.test$Sentiment)