사례분석: 태블릿(tablet) 데이터에 적용 :: 감정 분석 - mindscale
Skip to content

사례분석: 태블릿(tablet) 데이터에 적용

총 정리 및 복습

mobile <- read.csv("mobile2014.csv", stringsAsFactors = F)

library(tm)
corpus <- Corpus(VectorSource(mobile$Texts))

dtm <- DocumentTermMatrix(corpus,
                          control = list(tolower = T,
                                         removePunctuation = T,
                                         removeNumbers = T,
                                         stopwords = stopwords("SMART"),
                                         weighting = weightTfIdf))

my.stopwords <- c(stopwords("SMART"), "aaa")
my.stopwords

library(glmnet)
X <- as.matrix(dtm)
Y <- mobile$Sentiment

res.lm <- glmnet(X, Y, family = 'binomial', lambda = 0)
coef.lm <- coef(res.lm)[,1]

pos.lm <- coef.lm[coef.lm > 0]
neg.lm <- coef.lm[coef.lm < 0]

pos.lm <- sort(pos.lm, decreasing = T)
neg.lm <- sort(neg.lm, decreasing = F)

res.lasso <- glmnet(X, Y, family = 'binomial', alpha = 1)
set.seed(12345)
res.lasso <- cv.glmnet(X, Y, family = 'binomial', alpha = 1,
                       nfolds = 4, type.measure = 'class')

coef.lasso <- coef(res.lasso, s = 'lambda.min')[,1]
pos.lasso <- coef.lasso[coef.lasso > 0]
neg.lasso <- coef.lasso[coef.lasso < 0]
pos.lasso <- sort(pos.lasso, decreasing = T)
neg.lasso <- sort(neg.lasso, decreasing = F)

set.seed(12345)
res.ridge <- cv.glmnet(X, Y, family = 'binomial', alpha = 0,
                       nfolds = 4, type.measure = 'class')
coef.ridge <- coef(res.ridge, s = 'lambda.min')[,1]
pos.ridge <- coef.ridge[coef.ridge > 0]
neg.ridge <- coef.ridge[coef.ridge < 0]
pos.ridge <- sort(pos.ridge, decreasing = T)
neg.ridge <- sort(neg.ridge, decreasing = F)

set.seed(12345)
res.elastic <- cv.glmnet(X, Y, family = 'binomial', alpha = .5,
                         nfolds = 4, type.measure = 'class')
coef.elastic <- coef(res.elastic, s = 'lambda.min')[,1]
pos.elastic <- coef.elastic[coef.elastic > 0]
neg.elastic <- coef.elastic[coef.elastic < 0]
pos.elastic <- sort(pos.elastic, decreasing = T)
neg.elastic <- sort(neg.elastic, decreasing = F)

태블릿 데이터에 적용

data.test <- read.csv('tablet2014_test.csv', stringsAsFactors = F)

corpus <- Corpus(VectorSource(data.test$Texts))

dtm.test <- DocumentTermMatrix(corpus,
                               control = list(tolower = T,
                                              removePunctuation = T,
                                              removeNumbers = T,
                                              stopwords = stopwords("SMART"),
                                              weighting = weightTfIdf,
                                              dictionary = Terms(dtm)))

library(tm.plugin.sentiment)

senti.lm.test <- polarity(dtm.test, names(pos.lm), names(neg.lm))
senti.lasso.test <- polarity(dtm.test, names(pos.lasso), names(neg.lasso))
senti.ridge.test <- polarity(dtm.test, names(pos.ridge), names(neg.ridge))
senti.elastic.test <- polarity(dtm.test, names(pos.elastic), names(neg.elastic))

senti.lm.b.test <- ifelse(senti.lm.test > 0, 1, 0)
senti.lasso.b.test <- ifelse(senti.lasso.test > 0, 1, 0)
senti.ridge.b.test <- ifelse(senti.ridge.test > 0, 1, 0)
senti.elastic.b.test <- ifelse(senti.elastic.test > 0, 1, 0)

library(caret)

confusionMatrix(senti.lm.b.test, data.test$Sentiment)
confusionMatrix(senti.lasso.b.test, data.test$Sentiment)
confusionMatrix(senti.ridge.b.test, data.test$Sentiment)
confusionMatrix(senti.elastic.b.test, data.test$Sentiment)