% Test CWRU semi-supervised anomaly detection.
% Programs for paper "Bearing semi-supervised anomaly detection using only
% normal data", by A.Băltoiu and B.Dumitrescu.
% Training on normal data from one speed, detection on normal+faulty data
% from another speed.

% The Dictionary Learning toolbox is available at https://github.com/pirofti/dl-box
% B. Dumitrescu and P. Irofti, Dictionary Learning Algorithms and Applications, Springer, 2018

% Permission to use, copy, modify, and/or distribute this software for any
% purpose with or without fee is hereby granted, provided that the above
% copyright notice and this permission notice appear in all copies.

% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

% BD 21.08.2025
% AB 10.07.2025

dir_name = "CWRU/"; % relative path to data folder 

% normal files names
normal_files = [97,98,99,100]; % one for each fan size + 1 for HP (0,1,2,3)

% fault files names
loc_inner_ball = {[278, 274, 270],[282, 286, 290]};  % + 1 for HP (0,1,2,3)
loc_outter = {[298 310 0;...
    299 309 316;...
    300 311 317;...
    301 312 318],...
    [294 313 315;...
    295 0 0 ;...
    296 0 0 ;...
    297 0 0],...
    [302 0 0;...
    305 0 0 ;...
    306 0 0 ;...
    307 0 0]};

T = 100;        % sample size
Ntrain = 1220;  % number of normal signals to train on;
fss = 3;        % number of fault sizes

use_normalization = 1;
use_features = 1;

ntests = 5;                 % number of independent tests

spars = [3, 8, 14, 20];     % DL sparsity parameter
overs = [2, 3];             % DL overcompleteness parameter
niters = [50, 200];         % DL number of iterations parameter

rng(0)
for t = 1:ntests
        % load and split train (normal) data
        [Y_train, Y_normal_test] = load_split_normal_data(dir_name, normal_files, T, Ntrain);

        if use_features
          Y_train = tf_features(Y_train);
        end

        if use_normalization
            [Y_train, c_train, s_train] = normalize(Y_train'); % keep center and scale for test normalization
            Y_train = Y_train';
        end

        for ss = 1:length(spars)
            s = spars(ss);

            for oo = 1:length(overs)
                o = overs(oo);

                for iiter = 1:length(niters)
                    iter = niters(iiter);
                    
                    % Train DL
                    D = train_DL(Y_train, s, o, iter);

                    % Test DL
                    for loc = 1:5 % location
                        % 1=inner, 2=ball, 3=ortho, 4=center, 5=opposite
                        for fs = 1:fss % fault size
                            for ahp = 1:4 % test motor load    
                                % select normal data for testing for current ahp
                                Y_normal_test_hp = Y_normal_test{ahp};

                                % load faulty data for test
                                [Y_ano, warn] = load_faulty_data(dir_name, loc_inner_ball, loc_outter, fs, ahp, T, loc);                              

                                if warn == 1  % no fault file for this loc-size config
                                    continue
                                end
                                Nano = size(Y_ano,2);

                                if use_features
                                    Y_normal_test_hp = tf_features(Y_normal_test_hp);
                                    Y_ano = tf_features(Y_ano);
                                end

                                if use_normalization
                                     Y_normal_test_hp = normalize(Y_normal_test_hp', 'center', c_train, 'scale', s_train);
                                     Y_normal_test_hp = Y_normal_test_hp';
                                     Y_ano = normalize(Y_ano', 'center', c_train, 'scale', s_train);
                                     Y_ano = Y_ano';
                                end

                                Y_test = [Y_normal_test_hp Y_ano];
                                labels_test = [zeros(size(Y_normal_test_hp,2),1); ones(Nano,1)];

                                [scores, estimates] = compute_estimates(Y_test, D, s, Nano);

                                % save the roc values in two separate i/o variables
                                if loc < 3
                                    [~,~,~,roc_auc_i(loc,ss,oo,iiter,ahp,fs,t)] = perfcurve(labels_test,scores,1);
                                else 
                                    [~,~,~,roc_auc_o(loc-2,ss,oo,iiter,ahp,fs,t)] = perfcurve(labels_test,scores,1);
                                end
                            end
                        end
                    end
                end
            end
        end
end

save("res_norma_all_train_DLfeat_i.mat", "roc_auc_i");
save("res_norma_all_train_DLfeat_o.mat", "roc_auc_o");


function [Y_normal_train, Y_normal_test] = load_split_normal_data(dir_name, normal_files, T, Ntrain)
    Ntrain_hp = round(Ntrain/4);
    Y_normal_train = [];
    for hp = 1:4 % train motor load
        n_file_number = normal_files(hp);
        n_file_name = dir_name+num2str(n_file_number)+".mat";
        load(n_file_name);

        if n_file_number<100
            n_data_name = "X0"+n_file_number+"_FE_time"; % training on fan-end data
        else n_data_name = "X"+n_file_number+"_FE_time";
        end

        Y_normal_hp = buffer(eval(n_data_name),T);
        Y_normal_hp = Y_normal_hp(:,1:end-1); % omit zero-padded last signal

        p = randperm(size(Y_normal_hp,2));
        Y_normal_hp = Y_normal_hp(:,p);

        % save one quarter of Ntrain data for training
        Y_normal_train_hp = Y_normal_hp(:,1:Ntrain_hp);
        Y_normal_train = [Y_normal_train Y_normal_train_hp]; % concatenate with the rest of hp training data

        % reserve Ntrain data for testing
        Y_normal_test_hp = Y_normal_hp(:,Ntrain_hp+1:Ntrain+Ntrain_hp-1);  % omit last frame - it may contain artificial zeros        
        Y_normal_test{hp} = Y_normal_test_hp;
    end
end

function [Y_ano, warn] = load_faulty_data(dir_name, loc_files, loc_outter, fs, ahp, T, loc)
    if loc < 3 % inner, ball
        ano_files = loc_files{loc};
        ano_file_fs_number = ano_files(fs);
        ano_file_hp_number = ano_file_fs_number+ahp-1;
    else       % outter
        ano_files = loc_outter{loc-2}; % loc files variabiable for outter
        ano_file_hp_number = ano_files(ahp, fs);
    end
    
    if ano_file_hp_number == 0  % no fault file for loc-size config
        Y_ano = 0; % mock value
        warn = 1;
    else
        warn = 0;
        ano_file_name = dir_name+num2str(ano_file_hp_number)+".mat";
        load(ano_file_name);
        ano_data_name = "X"+ano_file_hp_number+"_FE_time";
        Y_ano = buffer(eval(ano_data_name),T);
        Y_ano = Y_ano(:,1:end-1); % omit zero-padded last signal
    end
end

function D = train_DL(Y_train, s, o, iternum)
    m = size(Y_train, 1);
    n = o*m;
    D0 = randn(m, n);
    D0 = normc(D0);
    [D,~,~] = DL(Y_train, D0, s, iternum);
end

function [err, estimates] = compute_estimates(Y_test, D, s, Nano)
    Ntest = size(Y_test,2);

    err = zeros(Ntest,1);
    for ii = 1 : size(Y_test,2)
        y = Y_test(:,ii);
        x = omp(y, D, s);
        err(ii) = norm(y - D*x);
    end

    [sorted_errs,~] = sort(err,'descend');
    cutoff = sorted_errs(Nano);
    estimates = zeros(Ntest,1);
    estimates(err>=cutoff) = 1;
end