yamamoto — Feb 10, 2014, 1:38 PM
# Titanic
rm(list=ls(all=TRUE))
# load file
filepath <- "C:/R/data/train_mv.csv"
X0 <- read.csv(filepath)
attach(X0)
# data
Sex <- as.numeric(Sex)
X1 <- cbind(Sex,Age,Pclass,SibSp,Parch,Fare)
Y1 <- X0$Survived
X1 <- data.frame(X1)
X1_ALL <- data.frame(X1,Y1)
# SVM
library(e1071)
Loading required package: class
# gaussian kernel
x0 <- svm(as.factor(Y1)~Sex+Age+Pclass+SibSp+Parch+Fare,data=X1_ALL, type ="C-classification", kernel="radial", cross=10, probability = TRUE)
# parameter tuning
costRange = seq(0.1,5,0.1)
param <- tune.svm(as.factor(Y1)~Sex+Age+Pclass+SibSp+Parch+Fare,data=X1_ALL,gamma=x0$gamma, cost=costRange, tunecontrol = tune.control(sampling="cross", cross=10))
# parameter, sigma, cost
sigma0 <- x0$gamma
cost <- as.numeric(param$best.parameter[2])
# re-learning
x0 <- svm(as.factor(Y1)~Sex+Age+Pclass+SibSp+Parch+Fare,data=X1_ALL, type ="C-classification", kernel="radial", cross=10, probability = TRUE, gamma=sigma0, C=cost)
# Cross-validation, ROC
library(Epi)
Attaching package: 'Epi'
以下のオブジェクトはマスクされています (from 'package:base') :
merge.data.frame
cross <- 10 # 10-fold CV
pp <- NaN;AUC <- NaN;k <- 1
for(i in 1:cross){
x <- svm(as.factor(Y1)~Sex+Age+Pclass+SibSp+Parch+Fare,data=X1_ALL[-seq(i,nrow(X1_ALL),cross),], type ="C-classification", kernel="radial", cross=10, probability = TRUE, gamma=sigma0, C=cost)
z <- predict(x,X1_ALL[seq(i,nrow(X1_ALL),cross),],probability=TRUE)
pp[seq(i,nrow(X1_ALL),cross)] <- attr(z,"probabilities")[,2]
}
ROC(test=pp,stat=Y1)
# clear
detach(X0)
rm(Sex)
# test data
filepath_test <- "C:/R/data/test_mv.csv"
X1_test <- read.csv(filepath_test)
attach(X1_test)
# data
Sex <- as.numeric(Sex)
X_test <- cbind(Sex,Age,Pclass,SibSp,Parch,Fare)
X2_ALL <- data.frame(X_test)
# prediction
qqq1 <- predict(x0,X2_ALL)