% ***** REGULARIZED GRADIENT DESCENT QUADRATIC REGRESSION ***** %

% m:  Number of data samples
% n:  Number of factors or dependent variables
% p:  Index value of a single sample within the set of m samples
% q:  Index value of an individual feature within the
%     ith sample's feature (or variable) vector
% x:  Feature vector with n+1 features, i.e. x=[x0 x1 x2 x3] & x0=1
% theta:  Parameter vector with at least n+1 parameters,
%         i.e. theta=[theta0 theta1 theta2 theta3]
% h:  "Hypothesis" or prediction model, i.e. h=theta*transpose(x)
% J:  Cost function dependent on the parameter values in vector theta

clear all

% Data Points
x1=[-2.4  8.2  1.9  12.1  -2.1  9.9  -9  -1   9.2  1];
x2=[ 4.6  2.9  2.7  3.2    4.0  3.4  10   8  -3.4  2];
y=[  0    1     0    1      0    1    0   1   1    0];


% Initialize parameters
theta1(1)=-1; % This value changes each iteration
theta2(1)=1; % This value changes each iteration
theta3(1)=1; % This value changes each iteration

% Initialize "momentum", alpha
alpha=[0.8 0.6 0.4];  % These values do not change during interation

m=length(y);  % Number of data points

g=theta1(1)+theta2(1).*x1+theta3(1).*(x2);
h=1./(1+exp(-g));  % Hypothesized model

%  J(1)=(1/m)*sum((h-y).^2);  % Cost function
J(1)=-(1/m)*sum(y.*log(h)+(1-y).*log(1-h));

e=1;
count=2;  % count is the number of iterations that have occured plus one
lambda=1;  % Regularization constant

[A,IX]=sortrows(x1',1);    

while (e>0.0001)
    
    theta1(count)=theta1(count-1)-alpha(1).*(2/m).*sum(h-y);
    theta2(count)=theta2(count-1)-alpha(2).*(2/m).*sum((h-y).*x1);
    theta3(count)=theta3(count-1)-alpha(3).*(2/m).*sum((h-y).*x2);
    g=theta1(count)+theta2(count).*x1+theta3(count).*(x2);
    h=1./(1+exp(-g));
    % J(count)=(1/m)*(sum((h-y).^2)+lambda.*(theta1(count)^2+theta2(count)^2+theta3(count)^2));
    J(count)=-(1/m)*sum(y.*log(h)+(1-y).*log(1-h));
    e=abs(J(count)-J(count-1));
    
    count=count+1;

    plot(A,g)
    axis([-10 12 -10 12])
    hold on
    
    for n=1:m
        if y(n)==1
            plot(x1(n),x2(n),'ro')
            hold on
        else
            plot(x1(n),x2(n),'bx')
            hold on
        end
        grid on
        hold on
    end
    
    pause(0.1)
    hold off
    
    
    
end

figure
loglog([1:count-1],J)
grid
hold off