Está en la página 1de 2

# Random Forest

DatosIris<-read.table("C:\\datosIris.txt", header=TRUE,sep=";")
set.seed(1234)
ind <- sample(2, nrow(DatosIris), replace=TRUE, prob=c(0.7, 0.3))
MACHINE LEARNING trainData <- DatosIris[ind==1,]
testData <- DatosIris[ind==2,]
ALGORITHMS IN R library(randomForest)
myFormula<-Species ~ .
rf <- randomForest(myFormula, data=trainData, ntree=100, proximity=TRUE)
table(predict(rf), trainData$Species)
print(rf)
attributes(rf)
plot(rf)
# Arbol de decision importance(rf)
DatosIris<-read.table("C:\\datosIris.txt", header=TRUE,sep=";") varImpPlot(rf)
set.seed(1234) irisPred <- predict(rf, newdata=testData)
ind <- sample(2, nrow(DatosIris), replace=TRUE, prob=c(0.7, 0.3)) table(irisPred, testData$Species)
trainData <- DatosIris[ind==1,] plot(margin(rf, testData$Species))
testData <- DatosIris[ind==2,]
library(party) # Logistic Regression
myFormula <- Species ~ Sepal.Length + DatosIris<-read.table("C:\\datosIris.txt", header=TRUE,sep=";")
Sepal.Width + Petal.Length + Petal.Width newCol<-data.frame(isVirginica=(DatosIris$Species=='virginica'))
iris_ctree <- ctree(myFormula, data=trainData) DatosIris<-cbind(DatosIris,newCol)
# check the prediction set.seed(88)
table(predict(iris_ctree), trainData$Species) ind <- sample(2, nrow(DatosIris), replace=TRUE, prob=c(0.7, 0.3))
print(iris_ctree) trainData <- DatosIris[ind==1,]
plot(iris_ctree) testData <- DatosIris[ind==2,]
plot(iris_ctree, type="simple") library(caTools)
# predict on test data #logistic regression model
testPred <- predict(iris_ctree, newdata = testData) myFormula<-isVirginica~Sepal.Length+Sepal.Width+Petal.Length+Sepal.Width
table(testPred, testData$Species) model <- glm (myFormula, data = trainData, family = "binomial")
summary(model)
# Linear regression prob <- predict(model, newdata=testData, type = 'response')
prestige<-read.table("C:\\prestige.txt", header=TRUE,sep=',') round(prob,3)
indPrest <- sample(2, nrow(prestige), #confusion matrix
replace=TRUE, prob=c(0.7, 0.3)) table(testData$isVirginica, prob > 0.5)
trainprestige <- prestige[indPrest==1,] #ROCR Curve
testprestige <- prestige[indPrest==2,] #install.packages("ROCR")
myFormula<-prestige~education+income library(ROCR)
model<-lm(myFormula, data=trainprestige) ROCRpred <- prediction(prob, testData$isVirginica)
model$coefficients ROCRperf <- performance(ROCRpred, 'tpr','fpr')
prediction <- predict(model, newdata=testprestige) plot(ROCRperf, colorize = TRUE, text.adj = c(-0.2,1.7))
cor(prediction, testprestige$prestige)
residuals(model) # Apriori
summary(model) titanic<-read.table("C:\\titanic.txt", header=TRUE,sep=";")
plot(model) library(arules)
# find association rules with default settings
#K-means rules.all <- apriori(titanic)
irisData<-read.table("C:\\datosIris.txt", header=TRUE,sep=";") rules.all
newiris <- irisData inspect(rules.all)
newiris$Species <- NULL # rules with rhs containing "Survived" only
kmeans.result <- kmeans(newiris, 3) rules <- apriori(titanic, control = list(verbose=F),
table(irisData$Species, kmeans.result$cluster) parameter = list(minlen=2, supp=0.005, conf=0.8),
plot(newiris[c("Sepal.Length", "Sepal.Width")], appearance = list(rhs=c("Survived=No", "Survived=Yes"), default="lhs"))
col = kmeans.result$cluster) quality(rules) <- round(quality(rules), digits=3)
# plot cluster centers rules.sorted <- sort(rules, by="lift")
points(kmeans.result$centers[,c("Sepal.Length", inspect(rules.sorted)
"Sepal.Width")], col=1:3, pch=8, cex=2) # find redundant rules
subset.matrix <- is.subset(rules.sorted, rules.sorted)
subset.matrix[lower.tri(subset.matrix, diag=T)] <- NA
# Support vector machine redundant <- colSums(subset.matrix, na.rm=T) >= 1
library(e1071) which(redundant)
testidx <- which(1:length(iris[,1])%%5 == 0) # remove redundant rules
iristrain <- iris[-testidx,] rules.pruned <- rules.sorted[!redundant]
iristest <- iris[testidx,] inspect(rules.pruned)
tune <- tune.svm(Species~., data=iristrain, library(arulesViz)
gamma=10^(-6:-1), cost=10^(1:4)) plot(rules.all)
model <- svm(Species~., data=iristrain, method='C- plot(rules.all, method='grouped')
classification',kernel='radial', probability=T, plot(rules.all, method='graph')
gamma=0.001, cost=10000)
prediction <- predict(model, iristest, probability=T)
plot(rules.all, method='graph', control=list(type='items')) 1
plot(rules.all, method='paracoord', control=list(reorder=TRUE))
table(iristest$Species, prediction)
# neural network # Naive Bayes
library(neuralnet) library(e1071)
set.seed(101) # handle categorical and numeric input variables, output must be categorical
size.sample <- 50 testidx <- which(1:length(iris[,1])%%5 == 0)
iristrain <- iris[sample(1:nrow(iris), size.sample),] iristrain <- iris[-testidx,]
nnet_iristrain <- iristrain iristest <- iris[testidx,]
# Binarize the categorical output model <- naiveBayes(Species~., data=iristrain)
nnet_iristrain <- cbind(nnet_iristrain, iristrain$Species == 'setosa') prediction <- predict(model, iristest[,-5])
nnet_iristrain <- cbind(nnet_iristrain, iristrain$Species == 'versicolor') table(prediction, iristest[,5])
nnet_iristrain <- cbind(nnet_iristrain, iristrain$Species == 'virginica')
names(nnet_iristrain)[6] <- 'Issetosa'
names(nnet_iristrain)[7] <- 'Isversicolor' #K-Nearest Neighbor
names(nnet_iristrain)[8] <- 'Isvirginica' library(class)
myFormula=Issetosa+Isversicolor+Isvirginica ~ testidx <- which(1:length(iris[,1])%%5 == 0)
Sepal.Length+Sepal.Width +Petal.Length iristrain <- iris[-testidx,]
+Petal.Width iristest <- iris[testidx,]
nn <- neuralnet(myFormula,data=nnet_iristrain, hidden=c(3)) train_input <- as.matrix(iristrain[,-5])
plot(nn) train_output <- as.vector(iristrain[,5])
mypredict <- compute(nn, iris[-5])$net.result test_input <- as.matrix(iristest[,-5])
maxidx <- function(arr) { prediction <- knn(train_input, test_input, train_output, k=5)
return(which(arr == max(arr))) table(prediction, iristest$Species)
}
idx <- apply(mypredict, c(1), maxidx)
# dimensionality reduction algoritms(1)
prediction <- c('setosa', 'versicolor', 'virginica')[idx]
library(stats)
table(prediction, iris$Species)
pca<-princomp(train, cor=TRUE)
train_reduced<-predict(pca,train)
test_reduced<-predict(pca,test)
#Gradient Boosting
library(gbm) # dimensionality reduction algoritms(2)
iris2 <- iris require(graphics)
newcol = data.frame(isVersicolor=(iris2$Species=='versicolor')) library(stats)
iris2 <- cbind(iris2, newcol) princomp(USArrests, cor = TRUE) # =^= prcomp(USArrests, scale=TRUE)
iris2[45:55,] summary(pc.cr <- princomp(USArrests, cor = TRUE))
formula <- isVersicolor ~ Sepal.Length + Sepal.Width + Petal.Length + loadings(pc.cr) # note that blank entries are small but not zero
Petal.Width plot(pc.cr) # shows a screeplot.
model <- gbm(formula, data=iris2, n.trees=1000, biplot(pc.cr)
interaction.depth=2,distribution=bernoulli) ## Formula interface
prediction <- predict.gbm(model, iris2[45:55,], type=response, princomp(~ ., data = USArrests, cor = TRUE)
n.trees=1000) ## NA-handling
round(prediction, 3) USArrests[1, 2] <- NA
summary(model) pc.cr <- princomp(~ Murder + Assault + UrbanPop, data = USArrests, na.action
= na.exclude, cor = TRUE)
pc.cr$scores[1:5, ]
#Time Series Analysis ## (Simple) Robust PCA:
f <- decompose(AirPassengers) ## Classical:
# seasonal figures (pc.cl <- princomp(stackloss))
f$figure ## Robust:
plot(f$figure, type="b", xaxt="n", xlab="") (pc.rob <- princomp(stackloss, covmat = MASS::cov.rob(stackloss)))
# get names of 12 months in English words
monthNames <- months(ISOdate(2011,1:12,1))
# label x-axis with month names
# las is set to 2 for vertical label orientation #Hierarchical Clustering
axis(1, at=1:12, labels=monthNames, las=2) irisData<-read.table("C:\\datosIris.txt", header=TRUE,sep=";")
plot(f) idx <- sample(1:dim(irisData)[1], 40)
fit <- arima(AirPassengers, order=c(1,0,0), list(order=c(2,1,0), irisSample <- irisData[idx,]
period=12)) irisSample$Species <- NULL
fore <- predict(fit, n.ahead=24) hc <- hclust(dist(irisSample), method="ave")
# error bounds at 95% confidence level plot(hc, hang = -1, labels=irisData$Species[idx])
U <- fore$pred + 2*fore$se # cut tree into 3 clusters
L <- fore$pred - 2*fore$se rect.hclust(hc, k=3)
ts.plot(AirPassengers, fore$pred, U, L, col=c(1,2,4,4), lty = c(1,1,2,2)) groups <- cutree(hc, k=3)
legend(topleft, c(Actual, Forecast, Error Bounds (95%
Confidence)), col=c(1,2,4), lty=c(1,1,2))

También podría gustarte