% ***** REGULARIZED GRADIENT DESCENT QUADRATIC REGRESSION ***** % % m: Number of data samples % n: Number of factors or dependent variables % p: Index value of a single sample within the set of m samples % q: Index value of an individual feature within the % ith sample's feature (or variable) vector % x: Feature vector with n+1 features, i.e. x=[x0 x1 x2 x3] & x0=1 % theta: Parameter vector with at least n+1 parameters, % i.e. theta=[theta0 theta1 theta2 theta3] % h: "Hypothesis" or prediction model, i.e. h=theta*transpose(x) % J: Cost function dependent on the parameter values in vector theta clear all % Data Points x1=[-2.4 8.2 1.9 12.1 -2.1 9.9 -9 -1 9.2 1]; x2=[ 4.6 2.9 2.7 3.2 4.0 3.4 10 8 -3.4 2]; y=[ 0 1 0 1 0 1 0 1 1 0]; % Initialize parameters theta1(1)=-1; % This value changes each iteration theta2(1)=1; % This value changes each iteration theta3(1)=1; % This value changes each iteration % Initialize "momentum", alpha alpha=[0.8 0.6 0.4]; % These values do not change during interation m=length(y); % Number of data points g=theta1(1)+theta2(1).*x1+theta3(1).*(x2); h=1./(1+exp(-g)); % Hypothesized model % J(1)=(1/m)*sum((h-y).^2); % Cost function J(1)=-(1/m)*sum(y.*log(h)+(1-y).*log(1-h)); e=1; count=2; % count is the number of iterations that have occured plus one lambda=1; % Regularization constant [A,IX]=sortrows(x1',1); while (e>0.0001) theta1(count)=theta1(count-1)-alpha(1).*(2/m).*sum(h-y); theta2(count)=theta2(count-1)-alpha(2).*(2/m).*sum((h-y).*x1); theta3(count)=theta3(count-1)-alpha(3).*(2/m).*sum((h-y).*x2); g=theta1(count)+theta2(count).*x1+theta3(count).*(x2); h=1./(1+exp(-g)); % J(count)=(1/m)*(sum((h-y).^2)+lambda.*(theta1(count)^2+theta2(count)^2+theta3(count)^2)); J(count)=-(1/m)*sum(y.*log(h)+(1-y).*log(1-h)); e=abs(J(count)-J(count-1)); count=count+1; plot(A,g) axis([-10 12 -10 12]) hold on for n=1:m if y(n)==1 plot(x1(n),x2(n),'ro') hold on else plot(x1(n),x2(n),'bx') hold on end grid on hold on end pause(0.1) hold off end figure loglog([1:count-1],J) grid hold off