function [J, gradient, parameter, center, phi, outputModelNew] = ...
    obliqueAllGlobalLossFunction(wRed, w0, xRegressor, zRegressor, output, outputModel, ...
    weightedOutputWorstLM, phiWorstLM, smoothness, optGrad, dataWeighting)
% OBLIQUEGLOBALLOSSFUNCTION splits the worst LM of current net and estimates parameters for the newly generated
% LMs. Afterwards it evaluates new model and calculates the global loss function value for optimization.
%
%
% [J, gradient, parameter, center, phi, outputModelNew] = ...
%     obliqueGlobalLossFunction(wRed, w0, xRegressor, zRegressor, output, outputModel, ...
%     weightedOutputWorstLM, phiWorstLM, smoothness, optGrad, dataWeighting)
%
%
% INPUT
%
%   wRed:                   (nz x 1)   Weigth vector for sigmoid direction.
%   w0:                     (1 x 1)    Sigmoid offset value. Kept constant during optimization.
%   xRegressor:             (N x nx)   Regression matrix for rule consequents (LM estimation).
%   zRegressor:             (N x nz)   Regression matrix for rule premises (Splitting). 
%   output:                 (N x q)    Measured training data output.
%   outputModel:            (N x q)    Current model output.
%   weightedOutputWorstLM:  (N x q)    Weighted model output of current worst LM that is splitted.
%   phiWorstLM:             (N x 1)    Validity function values of the worst LM.
%   smoothness:             (1 x 1)    Value for interpolation smoothness.
%   optGrad:                (1 x 1)    Flag for the application of an analytical gradient, if true.
%   dataWeighting:          (N x 1)    Weighting of the data samples. Needed for LM estimation.
%
%
% OUTPUT
%
%   J:                      (1 x 1)    Current global loss function value after splitting.
%   gradient:               (nz x 1)   Derivatives of Loss function with respect to wRed.
%   parameter:              cell(1,2)  New LM parameter values after splitting.
%   center:                 (2 x nz)   New LM center values after splitting.
%   phi:                    (N x 2)    Validity functions for the two newly generated LMs.
%   outputModelNew:         (N x 1)    Model output including the two newly generated LMs.
%

% HILOMOT Nonlinear System Identification Toolbox
% Benjamin Hartmann, 25-November-2011
% Institute of Mechanics & Automatic Control, University of Siegen, Germany
% Copyright (c) 2011 by Prof. Dr.-Ing. Oliver Nelles


%% 1) Calculate the validity functions for the two newly generated LMs with splitting parameter w

% Parameter vector assembly, because only w0 is kept constant during optimization
w  = [wRed; w0];

% Get some constants
numberOfInputs      = size(zRegressor,2);
numberOfxRegressors = size(xRegressor,2);
[numberOfSamples numberOfOutputs]= size(output);
zLowerBound         = min(zRegressor);
zUpperBound         = max(zRegressor);


%
% 1.1) Calculate centers with crisp validity functions
%

% Normalize and correct weight vector
deltaCenter = 0.01*(zUpperBound - zLowerBound);             % Choose a small value (sharp transition)
kappa       = 20/(norm(w)*norm(deltaCenter)*smoothness);    % Factor that normalizes the sigmoid parameters


% Splitting functions
psi      = 1./(1+exp( kappa * ( w0 + zRegressor*wRed )));
psiComp  = 1-psi;

% Validity functions
phi = bsxfun(@times, phiWorstLM, [psi psiComp]);            
% bsxfun for better performance, equal to:
% phi      = zeros(numberOfSamples,2);
% phi(:,1) = phiWorstLM.*psi;
% phi(:,2) = phiWorstLM.*psiComp;


%
% 1.2) Calculate validity functions with correctly smoothed transitions
%

% Calculate centers from validity functions and data distribution (crisp transitions)
center = zeros(2,numberOfInputs);
for dim = 1:numberOfInputs
    center(1,dim) = zRegressor(:,dim)'*phi(:,1)/(sum(phi(:,1))+eps);
    center(2,dim) = zRegressor(:,dim)'*phi(:,2)/(sum(phi(:,2))+eps);
end

% Update phi with center info (correctly smoothed transition)
deltaCenter = center(1,:) - center(2,:);                    % Updated distance between LM-centers
kappa       = 20/(norm(w)*norm(deltaCenter)*smoothness);    % Factor that normalizes the sigmoid parameters

% Splitting functions
psi      = 1./(1+exp( kappa * ( w0 + zRegressor*wRed )));
psiComp  = 1-psi;

% Validity functions
phi = bsxfun(@times, phiWorstLM, [psi psiComp]);



%% 2) Estimate parameters of the newly generated LMs

% Initialize cell for coefficient storage
parameter = cell(1,2);

% Loop over the two newly generated LMs
for lm = 1:2
    
    % Define vectors/matrices for local estimation
    r = sqrt(phi(:,lm).*dataWeighting);             % Weighting vector for data
    rMat = r(:,ones(numberOfxRegressors,1));        % Generate an N x nx matrix [r r ... r]
    xRegressorWeighted = xRegressor.*rMat;          % Weighted regression matrix
    rMat = r(:,ones(numberOfOutputs,1));            % Generate an N x q matrix [r r ... r]
    outputWeighted = output.*rMat;                  % Weighted output matrix
    
    % LS estimation, generate an nx x q parameter matrix using Q-R-factorization for fast computation
    parameter{1,lm} = xRegressorWeighted \ outputWeighted;
    
end



%% 3) Calculate weighted output for the two newly generated LMs

% Procedure explanation:
%
% Due to the hierarchical model structure, it is possible to substract the
% weighted output of the worstLM from the overall model output in order to
% add the weighted local model outputs that result from splitting the worst
% LM. This approach accelerates the split optimization.
%
% outputModel    = sum_i( phi_i*yHat_i ) + phi_worstLM*yHat_worstLM
% outputModelNew = sum_i( phi_i*yHat_i ) +  phi_newLM1*yHat_newLM1   + phi_newLM2*yHat_newLM2
%
% Difference:
% outputModelNew - outputModel = { phi_newLM1*yHat_newLM1 + phi_newLM2*yHat_newLM2 } - phi_worstLM*yHat_worstLM
%
%                                                        ||                                       ||
% This leads to:
% outputModelNew - outputModel =                 weightedOutputNewLM                 -   weightedOutputWorstLM 

weightedOutputNewLM = zeros(numberOfSamples,numberOfOutputs); 
if numberOfOutputs == 1
    weightedOutputNewLM = sum(phi.*(xRegressor * [parameter{:}]),2);
else
    for out = 1:numberOfOutputs % Loop over all outputs
        para_out = cell2mat(cellfun(@(x) x(:,out), parameter, 'UniformOutput', false));
        weightedOutputNewLM(:,out) = sum(phi.*(xRegressor * para_out),2);
    end
end

% Update model output
outputModelNew = outputModel - weightedOutputWorstLM + weightedOutputNewLM;



%% 4) Calculate global loss function value (NRMSE), multiple outputs possible

% weightPenalty = 0.001*sqrt(w'*w); % Needed to make the optimum unique by favoring short distance vectors
weightPenalty     = 0;
error2            = (output-outputModelNew).^2; % N x q squared error matrix
outputMean        = mean(output,1);
outputDifference2 = (output-outputMean(ones(numberOfSamples,1),:)).^2;
J = sqrt(sum(sum(error2.*dataWeighting(:,ones(1,numberOfOutputs)),2))/sum(sum(outputDifference2,2))) + weightPenalty;







%
% Plotting
%


% figure(23)
% view(-37.5,30)
% hold off
% surf(reshape(zRegressor(:,1),25,25),reshape(zRegressor(:,2),25,25),reshape(phiWorstLM.*outputWorstLM,25,25)); alpha(0.3)
% hold on
% surf(reshape(zRegressor(:,1),25,25),reshape(zRegressor(:,2),25,25),reshape(weightedOutputNewLM,25,25))
% hold off


% figure(24)
% set(gcf,'Position',[229 150 1131 451])
% subplot(121)
% view(-37.5,30)
% hold off
% surf(reshape(zRegressor(:,1),25,25),reshape(zRegressor(:,2),25,25),reshape(output,25,25)); alpha(0.3)
% hold on
% surf(reshape(zRegressor(:,1),25,25),reshape(zRegressor(:,2),25,25),reshape(outputModelNew,25,25))
% hold off
% subplot(122)
% hold off
% for k = 1:2
%     contour(reshape(zRegressor(:,1),25,25),reshape(zRegressor(:,2),25,25),reshape(phi(:,k),25,25),0.5,'LineColor','k','LineWidth',1.5)
%     hold on
% end
% hold off

% pause
% close

% surf(reshape(zRegressor(:,1),25,25),reshape(zRegressor(:,2),25,25),reshape(outputModelNew(:,end),25,25))


end

