rm(list=ls()) ## Create example data frame: # Sunny<-"Sunny"; Overcast<-"Overcast"; Rain<-"Rain" # Outlook<-c(Sunny,Sunny,Overcast,Rain,Rain,Rain,Overcast,Sunny,Sunny,Rain,Sunny,Overcast,Overcast,Rain) # Hot<-"Hot"; Mild<-"Mild"; Cool<-"Cool" # Temperature<-c(Hot,Hot,Hot,Mild,Cool,Cool,Cool,Mild,Cool,Mild,Mild,Mild,Hot,Mild) # High<-"High"; Normal<-"Normal" # Humidity<-c(High,High,High,High,Normal,Normal,Normal,High,Normal,Normal,Normal,High,Normal,High) # Weak<-"Weak"; Strong<-"Strong" # Wind<-c(Weak,Strong,Weak,Weak,Weak,Strong,Strong,Weak,Weak,Weak,Strong,Strong,Weak,Strong) # Yes<-"Yes"; No<-"No" # PlayTennis<-c(No,No,Yes,Yes,Yes,No,Yes,No,Yes,Yes,Yes,Yes,Yes,No) # (S<-as.data.frame(cbind(Outlook,Temperature,Humidity,Wind,PlayTennis))) # currentDir<-getwd() # S<-read.table(paste(currentDir,"/DataSets/VIY1/VI1Y1.csv",sep=""),sep=",",header=TRUE) # S<-S[,-(dim(S)[2])] # S<-S[,-1] # attach(S) S<-iris attach(S) outputVarColNumbers<-c(dim(S)[2]) #c(5) numOutputVar<-length(outputVarColNumbers) for (outputVarCount in 1:numOutputVar){ assign(paste("numOutputVarValues.",names(S)[outputVarColNumbers[outputVarCount]],sep=""),length(table(S[,outputVarColNumbers[outputVarCount]]))) assign(paste(names(S)[outputVarColNumbers[outputVarCount]],"Entropy",sep=""),0) for (valCount in 1:length(table(S[,outputVarColNumbers[outputVarCount]]))){ assign(paste("p.",names(S)[outputVarColNumbers[outputVarCount]],".",valCount,sep=""),(sum(S[,outputVarColNumbers[outputVarCount]]==names(table(S[,outputVarColNumbers[outputVarCount]]))[valCount]))/dim(S)[1]) assign(paste(names(S)[outputVarColNumbers[outputVarCount]],"Entropy",sep=""),get(paste(names(S)[outputVarColNumbers[outputVarCount]],"Entropy",sep=""))-get(paste("p.",names(S)[outputVarColNumbers[outputVarCount]],".",valCount,sep=""))*log2(get(paste("p.",names(S)[outputVarColNumbers[outputVarCount]],".",valCount,sep="")))) } } inputVarColNumbers<-seq(1,(dim(S)[2]-1)) #c(1,2,3,4) outputVarColNumbers<-c(dim(S)[2]) #c(5) numInputVar<-length(inputVarColNumbers) numOutputVar<-length(outputVarColNumbers) for (inputVarCount in 1:numInputVar){ ## ["Outlook","Temperature","Humidity","Wind"] inputVarName<-names(S[,inputVarColNumbers])[inputVarCount] numInputVarVal<-length(table(S[,inputVarColNumbers[inputVarCount]])) #assign(paste("informationGain.",inputVarName,sep=""),get(paste(names(S)[outputVarColNumbers[outputVarCount]],"Entropy",sep=""))) assign(paste("informationGain.",inputVarName,sep=""),get(paste(names(S)[outputVarColNumbers[1]],"Entropy",sep=""))) for (inputVarValCount in 1:numInputVarVal){ ## ["Sunny","Overcast","Rain"] inputVarValName<-names(table(S[,inputVarColNumbers[inputVarCount]]))[inputVarValCount] for (outputVarCount in 1:numOutputVar){ ## ["PlayTennis"] outputVarName<-names(S)[outputVarColNumbers[outputVarCount]] numOutputVarVal<-length(table(S[,outputVarColNumbers[outputVarCount]])) assign(paste("Entropy.",inputVarName,".",inputVarValName,".",outputVarName,sep=""),0) for (outputVarValCount in 1:numOutputVarVal){ ## ["Yes", "No"] outputVarValName<-names(table(S[,outputVarColNumbers[outputVarCount]]))[outputVarValCount] assign(paste("p.",inputVarName,".",inputVarValName,".",outputVarName,".",outputVarValName,sep=""),(sum(get(inputVarName)==inputVarValName & get(outputVarName)==outputVarValName)/sum(get(inputVarName)==inputVarValName))) assign(paste("Entropy.",inputVarName,".",inputVarValName,".",outputVarName,sep=""),get(paste("Entropy.",inputVarName,".",inputVarValName,".",outputVarName,sep=""))-get(paste("p.",inputVarName,".",inputVarValName,".",outputVarName,".",outputVarValName,sep=""))*log2(get(paste("p.",inputVarName,".",inputVarValName,".",outputVarName,".",outputVarValName,sep="")))) } } G<-((sum(get(inputVarName)==inputVarValName))/(dim(S)[1]))*get(paste("Entropy.",inputVarName,".",inputVarValName,".",outputVarName,sep="")) assign(paste("informationGain.",inputVarName,sep=""),get(paste("informationGain.",inputVarName,sep=""))-G) } }