## ======================================================================== ## processANN.r ## ======================================================================== ####### Implement Artificial Neural Network Process ####### source(paste(getwd(),"/ANN/ANNFeedForwardLayer.r",sep="")) source(paste(getwd(),"/ANN/ANNBackPropagation.r",sep="")) ptm<-proc.time() splitRatio<-0.8 ## Separate validation set and training set with "splitRatio" defined above: (mTest<-floor((1-splitRatio)*dim(X)[1])) xTest<-X[seq(1,mTest),] yTest<-Y[seq(1,mTest),] # <-------- Specify output variable xTrain<-X[seq(mTest+1,dim(X)[1]),] yTrain<-Y[seq(mTest+1,dim(Y)[1]),] # <-------- Specify output variable ## Initialize activation function: #source(paste(getwd(),"/ANN/ActivationFunctions/activationFunctionSigmoid.r",sep="")) #source(paste(getwd(),"/ANN/ActivationFunctions/activationFunctionHyperbolicTangent.r",sep="")) #source(paste(getwd(),"/ANN/ActivationFunctions/activationFunctionTLU.r",sep="")) numHiddenLayers<-6 numHiddenLayerUnits<-c(223,171,123,71,23,7) (numLayerUnits<-as.numeric(c(dim(X)[2],numHiddenLayerUnits,dim(Y)[2]))) ## Initialize theta parameters with random numbers between -1 and 1: for (thetaLayerNum in 1:(numHiddenLayers+1)){ assign(paste("Theta",thetaLayerNum,sep=""),matrix(runif((numLayerUnits[thetaLayerNum]+1)*numLayerUnits[thetaLayerNum+1],min=-1,max=1),ncol=(numLayerUnits[thetaLayerNum]+1))) } lambda<-0.15 alpha<-0.008 max_epochs<-8 mTrain<-dim(xTrain)[1] J<-matrix(rep(0,max_epochs*numLayerUnits[length(numLayerUnits)]),ncol=numLayerUnits[length(numLayerUnits)]) #J<-array(rep(0,max_epochs*numLayerUnits[length(numLayerUnits)]*length(numLayerUnits)),dim=c(max_epochs,length(numLayerUnits),numLayerUnits[length(numLayerUnits)])) #source("cvFeedForwardLayer.r") #cvJ<-matrix(rep(0,max_epochs*numLayerUnits[length(numLayerUnits)]),ncol=numLayerUnits[length(numLayerUnits)]) #cvJ<-array(rep(0,max_epochs*numLayerUnits[length(numLayerUnits)]*length(numLayerUnits)),dim=c(max_epochs,length(numLayerUnits),numLayerUnits[length(numLayerUnits)])) for (epoch in 1:max_epochs){ ## Initialize gradient parts for (numGradient in 1:(numHiddenLayers+1)){ assign(paste("gradient",numGradient,sep=""),0) } ## Feed Forward Process: a1<-matrix(as.numeric(cbind(rep(1,dim(xTrain)[1]),as.matrix(xTrain))),ncol=(dim(xTrain)[2]+1)) for (layerCount in 1:(numHiddenLayers+1)){ g<-ANNFeedForwardLayer(layerCount) assign(paste("a",layerCount+1,sep=""),g) if(dim(get(paste("a",layerCount+1,sep="")))[2]!=numLayerUnits[layerCount+1]){ assign(paste("a",layerCount+1,sep=""),t(get(paste("a",layerCount+1,sep="")))) assign(paste("a",layerCount+1,sep=""),cbind(rep(1,dim(get(paste("a",layerCount+1,sep="")))[1]),get(paste("a",layerCount+1,sep="")))) }else{ assign(paste("a",layerCount+1,sep=""),cbind(rep(1,dim(get(paste("a",layerCount+1,sep="")))[1]),get(paste("a",layerCount+1,sep="")))) } } a<-get(paste("a",numHiddenLayers+2,sep="")) a<-as.matrix(a[,-1],ncol=1) # <-- The "-1" is for the bias term # Cross Validation: # b1<-matrix(as.numeric(cbind(rep(1,dim(xTest)[1]),as.matrix(xTest))),ncol=(dim(xTest)[2]+1)) # for (layerCount in 1:(numHiddenLayers+1)){ # cvg<-ANNFeedForwardLayer(layerCount) # assign(paste("b",layerCount+1,sep=""),cvg) # if(dim(get(paste("b",layerCount+1,sep="")))[2]!=numLayerUnits[layerCount+1]){ # assign(paste("b",layerCount+1,sep=""),t(get(paste("b",layerCount+1,sep="")))) # assign(paste("b",layerCount+1,sep=""),cbind(rep(1,dim(get(paste("b",layerCount+1,sep="")))[1]),get(paste("b",layerCount+1,sep="")))) # }else{ # assign(paste("b",layerCount+1,sep=""),cbind(rep(1,dim(get(paste("b",layerCount+1,sep="")))[1]),get(paste("b",layerCount+1,sep="")))) # } # } # b<-get(paste("b",numHiddenLayers+2,sep="")) # b<-as.matrix(b[,-1],ncol=1) ## Back Propagation: assign(paste("delta",(numHiddenLayers+2),sep=""),(a-yTrain)) Thetas<-ANNBackPropagation(numHiddenLayers) ## Unroll Theta parameters from back propagation numRows<-0 numColumns<-0 vectorLocation<-c(1,rep(0,numHiddenLayers+1)) for (layerCount in 1:(numHiddenLayers+1)){ numRows<-Thetas[vectorLocation[layerCount]] numColumns<-Thetas[vectorLocation[layerCount]+1] ## Bias term is "get(paste("Theta",layerCount,sep=""))[,1]" in following line assign(paste("Theta",layerCount,sep=""),cbind(get(paste("Theta",layerCount,sep=""))[,1],matrix(Thetas[seq(vectorLocation[layerCount]+2,(vectorLocation[layerCount]+numRows*numColumns+1))],ncol=numColumns))) vectorLocation[layerCount+1]<-vectorLocation[layerCount]+numRows*numColumns+2 } Thetas[which((abs(Thetas)>10))] J[epoch,]<-(-1/mTrain)*(colSums(yTrain*log(a)+(1-yTrain)*log(1-a))) #cvJ[epoch,]<-(-1/mTest)*(colSums(yTest*log(b)+(1-yTest)*log(1-b))) } library(graphics) op <- par(mfrow = c(ceiling(dim(J)[2]/3),dim(J)[2]%%3), new=TRUE, pty="s", mar = .1+ c(2,2,2,1)) for (r in 1:dim(J)[2]) { plot(seq(1,max_epochs),J[seq(1,max_epochs),r], main=paste("J(Theta",r,")",sep=""),xlab="Epoch",ylab="J",col="blue",type="l") #lines(seq(1,max_epochs),cvJ[seq(1,max_epochs)], col="blue") grid() } par(op) (min(J)) Time<-(proc.time() - ptm) cat("min J: ",signif(min(J),5)," | lambda: ",lambda," | alpha: ",alpha," | runtime: ",signif(Time[3],5)," | epochs: ",max_epochs," | hidden layers: ",numHiddenLayers," | hidden layer units: ",numHiddenLayerUnits,sep=" ") # numHiddenLayerUnits does not include the bias term # Determine the current revision of the "submission" versionLetter<-"a" fName<-paste0("thetas",format(Sys.Date(),"%Y%m%d"),versionLetter,".csv") if (file.exists(fName)){ versionLetter<-"b" fName<-paste0("thetas",format(Sys.Date(),"%Y%m%d"),versionLetter,".csv") if (file.exists(fName)){ versionLetter<-"c" fName<-paste0("thetas",format(Sys.Date(),"%Y%m%d"),versionLetter,".csv") if (file.exists(fName)){ versionLetter<-"d" fName<-paste0("thetas",format(Sys.Date(),"%Y%m%d"),versionLetter,".csv") if (file.exists(fName)){ versionLetter<-"e" fName<-paste0("thetas",format(Sys.Date(),"%Y%m%d"),versionLetter,".csv") } } } } # Override naming of file: #fName<-"submission20140825e.csv" write.csv(Thetas, file = fName, row.names = FALSE) # The following results did not have absorbance measurements included (i.e. there were only 16 independent variables/columns): # min J: 0.1356 | lambda: 0.21 | alpha: 0.005 | runtime: 30.89 | epochs: 700 | hidden layers: 3 | hidden layer units: 100 15 30 # min J: 0.13649 | lambda: 0.21 | alpha: 0.003 | runtime: 64.23 | epochs: 1000 | hidden layers: 5 | hidden layer units: 100 25 50 12 25 # min J: 0.3881 | lambda: 0.15 | alpha: 0.001 | runtime: 116.13 | epochs: 1200 | hidden layers: 5 | hidden layer units: 100 75 50 25 10 # min J: 0.18787 | lambda: 0.15 | alpha: 0.001 | runtime: 3812 | epochs: 2000 | hidden layers: 3 | hidden layer units: 100 75 50 # min J: 0.18603 | lambda: 0.15 | alpha: 0.001 | runtime: 3812.5 | epochs: 2000 | hidden layers: 3 | hidden layer units: 100 75 50 # min J: 0.17234 | lambda: 0.15 | alpha: 0.005 | runtime: 4770.9 | epochs: 2500 | hidden layers: 4 | hidden layer units: 100 75 50 25 # min J: -11.512 | lambda: 0.15 | alpha: 0.005 | runtime: 3939.6 | epochs: 2000 | hidden layers: 4 | hidden layer units: 100 75 50 25 #write.csv(cbind(PIDN=as.character(testPIDN), Prediction), "predictions.csv", row.names=FALSE)