rm(list=ls()) source("makeBinary.r") source("sigmoid.r") #library(R.matlab) ptm<-proc.time() ## =============== Load Training Data =============== cat(paste("\n Loading data ... \n")) dataMatrix<-read.table("kaggleDigitTrain1.csv",sep=",",header=TRUE) X<-as.matrix(dataMatrix[,seq(3,786)]) Y<-as.matrix(dataMatrix[,2]) numDataFiles<-8 for(dataFileNum in 2:numDataFiles){ dataMatrix<-read.table(paste("kaggleDigitTrain",dataFileNum,".csv",sep=""),sep=",",header=TRUE) X<-rbind(X,as.matrix(dataMatrix[,seq(3,786)])) Y<-rbind(Y,as.matrix(dataMatrix[,2])) } rm(dataMatrix) cat(paste("\n Data loading complete. \n\n")) ## =============== Separate and Configure Data =============== cat(paste("\n Configuring data and setting model parameters... \n")) Y[(Y==0)]<-10 X<-(X/255)^2 # Standardize the square of input values m<-3 # N-Fold factor YCV<-Y[seq(1,floor(length(Y)/m))] XCV<-X[seq(1,floor((dim(X)[1])/m)),] Y<-Y[seq(floor(length(Y)/m)+1,length(Y))] X<-X[seq(floor((dim(X)[1])/m)+1,dim(X)[1]),] yBinary<-makeBinary(Y) yBinaryCV<-makeBinary(YCV) cat(paste("\n Configuration complete. \n")) ## =============== Set Parameters of Neural Network =============== numHiddenLayer1Units<-100 numHiddenLayer2Units<-50 numHiddenLayer3Units<-75 Thetas1<-matrix(runif((dim(X)[2]+1)*numHiddenLayer1Units,min=-1,max=1),ncol=(dim(X)[2]+1)) Thetas2<-matrix(runif((numHiddenLayer1Units+1)*numHiddenLayer2Units,min=-1,max=1),ncol=(numHiddenLayer1Units+1)) Thetas3<-matrix(runif((numHiddenLayer2Units+1)*numHiddenLayer3Units,min=-1,max=1),ncol=(numHiddenLayer2Units+1)) Thetas4<-matrix(runif((numHiddenLayer3Units+1)*length(table(Y)),min=-1,max=1),ncol=(numHiddenLayer3Units+1)) lambda<-0.05 alpha<-0.65 # The greater alpha is the steeper the cost curve will be, that is, learning is faster. maxEpochs<-1000 J<-rep(0,maxEpochs) JCV<-rep(0,maxEpochs) A1<-matrix(as.numeric(cbind(rep(1,dim(X)[1]),X)),ncol=dim(X)[2]+1) A1CV<-matrix(as.numeric(cbind(rep(1,dim(XCV)[1]),XCV)),ncol=dim(XCV)[2]+1) cat(paste("\n Model parameters set. \n\n")) ptm<-proc.time() ## =============== Train Neural Network =============== cat(paste("\n\n Training Neural Network ... \n\n")) for (epoch in 1:maxEpochs){ alpha<-(alpha*0.998)^((100+epoch)/100) ## Initialize gradient parts gradient1<-0 gradient2<-0 gradient3<-0 gradient4<-0 ## Feed-Forward z1<-(A1%*%t(Thetas1))-(A1%*%t(Thetas1*matrix(round(runif(dim(Thetas1)[1]*dim(Thetas1)[2])),ncol=dim(Thetas1)[2]))) g1<-sigmoid(z1) g1CV<-sigmoid(A1CV%*%t(Thetas1)) A2<-matrix(as.numeric(cbind(rep(1,dim(g1)[1]),g1)),ncol=dim(g1)[2]+1) A2CV<-matrix(as.numeric(cbind(rep(1,dim(g1CV)[1]),g1CV)),ncol=dim(g1CV)[2]+1) z2<-(A2%*%t(Thetas2))-(A2%*%t(Thetas2*matrix(round(runif(dim(Thetas2)[1]*dim(Thetas2)[2])),ncol=dim(Thetas2)[2]))) g2<-sigmoid(z2) g2CV<-sigmoid(A2CV%*%t(Thetas2)) A3<-matrix(as.numeric(cbind(rep(1,dim(g2)[1]),g2)),ncol=dim(g2)[2]+1) A3CV<-matrix(as.numeric(cbind(rep(1,dim(g2CV)[1]),g2CV)),ncol=dim(g2CV)[2]+1) z3<-(A3%*%t(Thetas3))-(A3%*%t(Thetas3*matrix(round(runif(dim(Thetas3)[1]*dim(Thetas3)[2])),ncol=dim(Thetas3)[2]))) g3<-sigmoid(z3) g3CV<-sigmoid(A3CV%*%t(Thetas3)) A4<-matrix(as.numeric(cbind(rep(1,dim(g3)[1]),g3)),ncol=dim(g3)[2]+1) A4CV<-matrix(as.numeric(cbind(rep(1,dim(g3CV)[1]),g3CV)),ncol=dim(g3CV)[2]+1) A5<-sigmoid(A4%*%t(Thetas4)) A5CV<-sigmoid(A4CV%*%t(Thetas4)) ## Back-Propagation delta5<-(matrix(rep(A5,length(table(Y))),ncol=length(table(Y)))-yBinary) delta4<-t(t(matrix(Thetas4[,-1],ncol=(dim(Thetas4)[2]-1)))%*%t(delta5))*(A4[,-1]*(1-A4[,-1])) delta3<-t(t(matrix(Thetas3[,-1],ncol=(dim(Thetas3)[2]-1)))%*%t(delta4))*(A3[,-1]*(1-A3[,-1])) delta2<-t(t(matrix(Thetas2[,-1],ncol=(dim(Thetas2)[2]-1)))%*%t(delta3))*(A2[,-1]*(1-A2[,-1])) delta1<-t(t(matrix(Thetas1[,-1],ncol=(dim(Thetas1)[2]-1)))%*%t(delta2))*(A1[,-1]*(1-A1[,-1])) gradient4<-(t(A4[,-1])%*%as.matrix(delta5)) gradient3<-(t(A3[,-1])%*%as.matrix(delta4)) gradient2<-(t(A2[,-1])%*%as.matrix(delta3)) gradient1<-(t(A1[,-1])%*%as.matrix(delta2)) ## A bias term is taken into account in the following by excluding column one (i.e. "seq(2,dim(Theta)[2])") Thetas4[,seq(2,dim(Thetas4)[2])]<-Thetas4[,seq(2,dim(Thetas4)[2])]-t((alpha/(dim(X)[1]))*gradient4)+(lambda/(dim(X)[1]))*Thetas4[,seq(2,dim(Thetas4)[2])] Thetas3[,seq(2,dim(Thetas3)[2])]<-Thetas3[,seq(2,dim(Thetas3)[2])]-t((alpha/(dim(X)[1]))*gradient3)+(lambda/(dim(X)[1]))*Thetas3[,seq(2,dim(Thetas3)[2])] Thetas2[,seq(2,dim(Thetas2)[2])]<-Thetas2[,seq(2,dim(Thetas2)[2])]-t((alpha/(dim(X)[1]))*gradient2)+(lambda/(dim(X)[1]))*Thetas2[,seq(2,dim(Thetas2)[2])] Thetas1[,seq(2,dim(Thetas1)[2])]<-Thetas1[,seq(2,dim(Thetas1)[2])]-t((alpha/(dim(X)[1]))*gradient1)+(lambda/(dim(X)[1]))*Thetas1[,seq(2,dim(Thetas1)[2])] J[epoch]<-sum((-1/(dim(X)[1]))*(colSums(yBinary*log(A5)+(1-yBinary)*log(1-A5)))) if((epoch==1)|(epoch%%5==0)){ cat(paste("\n Neural Network Epoch ",epoch," Cost | ",J[epoch],"\n")) show(proc.time()-ptm) } JCV[epoch]<-sum((-1/(dim(XCV)[1]))*(colSums(yBinaryCV*log(A5CV)+(1-yBinaryCV)*log(1-A5CV)))) } show(proc.time()-ptm) rm(yBinary) rm(yBinaryCV) timeStamp<- format(Sys.time(), "%Y%m%d%H%M") write.csv(Thetas1,paste("Thetas/",timeStamp,"Thetas1-L1-",numHiddenLayer1Units,"u-L2-",numHiddenLayer2Units,"u-L3-",numHiddenLayer3Units,"u-L4-",length(table(Y)),"u-",maxEpochs,"epochs.csv",sep=""),row.names=FALSE) write.csv(Thetas2,paste("Thetas/",timeStamp,"Thetas2-L1-",numHiddenLayer1Units,"u-L2-",numHiddenLayer2Units,"u-L3-",numHiddenLayer3Units,"u-L4-",length(table(Y)),"u-",maxEpochs,"epochs.csv",sep=""),row.names=FALSE) write.csv(Thetas3,paste("Thetas/",timeStamp,"Thetas3-L1-",numHiddenLayer1Units,"u-L2-",numHiddenLayer2Units,"u-L3-",numHiddenLayer3Units,"u-L4-",length(table(Y)),"u-",maxEpochs,"epochs.csv",sep=""),row.names=FALSE) write.csv(Thetas4,paste("Thetas/",timeStamp,"Thetas4-L1-",numHiddenLayer1Units,"u-L2-",numHiddenLayer2Units,"u-L3-",numHiddenLayer3Units,"u-L4-",length(table(Y)),"u-",maxEpochs,"epochs.csv",sep=""),row.names=FALSE) plot(J,type="l",col="blue", xlim=c(0,maxEpochs+1),ylim=c(0,max(max(J),max(JCV)))) lines(JCV,col="red") show(sum(YCV==((max.col(A5CV))))/length(YCV)) grid()