추천 시스템에서의 모형 선택 (평점 기준)
추천 시스템에서의 모형 선택 (평점 기준)
모형 평가를 위해서 Traing Set과 Test Set 분할하기
scheme <- evaluationScheme(m, method="split",
train = .8, k = 1, given = 15)
scheme@runsTrain
set.seed(12345)
scheme <- evaluationScheme(m, method="split",
train = .8, k = 1, given = 15)
scheme@runsTrain
평가할 알고리즘 설정하기
algorithms <- list(
"random" = list(name="RANDOM"),
"popular" = list(name="POPULAR"),
"popularZ" = list(name="POPULAR", param=list(normalize = "Z-score")),
"userN10C" = list(name="UBCF", param=list(normalize = NULL, nn = 10, method = 'cosine')),
"userN10P" = list(name="UBCF", param=list(normalize = NULL, nn = 10, method = 'pearson')),
"userN50C" = list(name="UBCF", param=list(normalize = NULL, nn = 50, method = 'cosine')),
"userN50P" = list(name="UBCF", param=list(normalize = NULL, nn = 50, method = 'pearson')),
"userC50C" = list(name="UBCF", param=list(normalize = 'center', nn = 50, method = 'cosine')),
"userC50P" = list(name="UBCF", param=list(normalize = 'center', nn = 50, method = 'pearson')),
"userZ50C" = list(name="UBCF", param=list(normalize = 'Z-score', nn = 50, method = 'cosine')),
"userZ50P" = list(name="UBCF", param=list(normalize = 'Z-score', nn = 50, method = 'pearson')),
"userZ100C" = list(name="UBCF", param=list(normalize = 'Z-score', nn = 100, method = 'cosine')),
"userZ100P" = list(name="UBCF", param=list(normalize = 'Z-score', nn = 100, method = 'pearson')),
"userZ500C" = list(name="UBCF", param=list(normalize = 'Z-score', nn = 500, method = 'cosine')),
"userZ500P" = list(name="UBCF", param=list(normalize = 'Z-score', nn = 500, method = 'pearson'))
)
Training Set으로 각 알고리즘에 대해서 학습 후 Test Set을 이용하여 정확도 평가하기
results <- evaluate(scheme, algorithms, type='ratings')
각 모형에 대한 정확도 확인하기
names(results)
getConfusionMatrix(results[['random']])
getConfusionMatrix(results[['popular']])
for (i in names(results))
{
print(i)
print(getConfusionMatrix(results[[i]]))
}
각 모형에 대한 정확도를 그림으로 나타내기
plot(results)
User Based CF외에 Item Based CF와 SVD를 추가하여 모형 평가하기
algorithms <- list(
"random" = list(name="RANDOM"),
"popular" = list(name="POPULAR"),
"popularZ" = list(name="POPULAR", param=list(normalize = "Z-score")),
"userN10C" = list(name="UBCF", param=list(normalize = NULL, nn = 10, method = 'cosine')),
"userZ500C" = list(name="UBCF", param=list(normalize = 'Z-score', nn = 500, method = 'cosine')),
"itemZ100PF" = list(name="IBCF", param=list(normalize = 'Z-score', k = 100, method = 'pearson', normalize_sim_matrix = F)),
"itemZ100PT" = list(name="IBCF", param=list(normalize = 'Z-score', k = 100, method = 'pearson', normalize_sim_matrix = T)),
"itemZ100CF" = list(name="IBCF", param=list(normalize = 'Z-score', k = 100, method = 'cosine', normalize_sim_matrix = F)),
"itemZ100CT" = list(name="IBCF", param=list(normalize = 'Z-score', k = 100, method = 'cosine', normalize_sim_matrix = T)),
"itemZ500PT" = list(name="IBCF", param=list(normalize = 'Z-score', k = 500, method = 'pearson', normalize_sim_matrix = T)),
"itemZ500CT" = list(name="IBCF", param=list(normalize = 'Z-score', k = 500, method = 'cosine', normalize_sim_matrix = T)),
'SVDZ10PT' = list(name="SVD", param=list(normalize = 'Z-score', k = 10)),
'SVDZ50PT' = list(name="SVD", param=list(normalize = 'Z-score', k = 50)),
'SVDZ100PT' = list(name="SVD", param=list(normalize = 'Z-score', k = 100)),
)
results <- evaluate(scheme, algorithms, type='ratings')
for (i in names(results))
{
print(i)
print(getConfusionMatrix(results[[i]]))
}
plot(results)