% Test CWRU semi-supervised anomaly detection.
% Programs for paper "Bearing semi-supervised anomaly detection using only
% normal data", by A.Băltoiu and B.Dumitrescu.
% Training on normal data from all speeds, detection on normal+faulty data
% from individual speeds.

% BD 23.08.2025

clear

use_features = 1; % 1 - use features, 0 - use directly the time domain signal
use_normalization = 1;

method_nr = 12;   % 0 - LOF, 1 - IF, 2 - RRCF, 3 - OCSVM
                  % 10 - OC-TVreg (One-Class anomaly detection with Total Variation regularization)
                  % 11 - OC-TVregr (not reported in the paper)
                  % 12 - OC-TVreg with Euclidean distance
K_Lreg = 20;      % OC-TVreg parameters: number of neighbors
lam_Lreg = 0.1;   %                      trade-off parameter
S_Lreg = 1200;
P_Lreg = 2;

lof_distance = 'euclidean'; %'cosine'; %euclidean';

dir_name = "CWRU/"; % relative path to data folder 

% normal file names
normal_files = [97,98,99,100]; % one for each fan size + 1 for HP (0,1,2,3)

% ano_file names
loc_inner_ball = {[278, 274, 270],[282, 286, 290]};  % + 1 for HP (0,1,2,3)
loc_outter = {[298 310 0;...
    299 309 316;...
    300 311 317;...
    301 312 318],...
    [294 313 315;...
    295 0 0 ;...
    296 0 0 ;...
    297 0 0],...
    [302 0 0;...
    305 0 0 ;...
    306 0 0 ;...
    307 0 0]};

T = 100; % sample size
Ntrain = 1220; % number of normal signals to train on;

fss = 3; % number of fault sizes

ntests = 5; %5;

rng(0)
for t = 1:ntests
        t
        % load and split train (normal) data
        [Y_train, Y_normal_test] = load_split_normal_data(dir_name, normal_files, T, Ntrain);

        if use_features
          Y_train = tf_features(Y_train);
        end

        if use_normalization
            [Y_train, c_train, s_train] = normalize(Y_train'); % keep center and scale for test normalization
            Y_train = Y_train';
        end

        m = size(Y_train,2); % number of features

        % train detector
        switch method_nr
          case 0
            ad_model = lof(Y_train', Distance=lof_distance);
          case 1
            ad_model = iforest(Y_train');
          case 2
            ad_model = rrcforest(Y_train');
          case 3
            ad_model = ocsvm(Y_train', KernelScale='auto');
        end

                    for loc = 1:5 % location
                      % 1=inner, 2=ball, 3=ortho, 4=center, 5=opposite

                        for fs = 1:fss % fault size

                            for ahp = 1:4 % test motor load

                                % select normal data for testing for current ahp
                                Y_normal_test_hp = Y_normal_test{ahp};

                                % load faulty data for test
                                [Y_ano, warn] = load_faulty_data(dir_name, loc_inner_ball, loc_outter, fs, ahp, T, loc);                              

                                if warn == 1  % no fault file for this loc-size config
                                    continue
                                end
                                Nano = size(Y_ano,2);

                                if use_features
                                    Y_normal_test_hp = tf_features(Y_normal_test_hp);
                                    Y_ano = tf_features(Y_ano);
                                end

                                if use_normalization
                                     Y_normal_test_hp = normalize(Y_normal_test_hp', 'center', c_train, 'scale', s_train);
                                     Y_normal_test_hp = Y_normal_test_hp';
                                     Y_ano = normalize(Y_ano', 'center', c_train, 'scale', s_train);
                                     Y_ano = Y_ano';
                                end

                                Y_test = [Y_normal_test_hp Y_ano];
                                labels_test = [zeros(size(Y_normal_test_hp,2),1); ones(Nano,1)];

                                % compute anomaly scores
                                tic
                                if method_nr < 10
                                  [~, scores] = isanomaly(ad_model, Y_test');
                                else
                                  switch method_nr
                                    case 10
                                      scores = octvreg(Y_train', Y_test', K_Lreg, lam_Lreg);
                                    case 11
                                      scores = reg_lapl_rand_semi(Y_train', Y_test', K_Lreg, lam_Lreg, S_Lreg, P_Lreg);
                                    case 12  % Euclidean distance
                                      scores = octvreg(Y_train', Y_test', K_Lreg, lam_Lreg, 1);
                                  end
                                end
                                t_test(loc,ahp,fs,t) = toc;

                                % save the roc values in two separate i/o variables
                                if loc < 3
                                    [~,~,~,roc_auc_i(loc,ahp,fs,t)] = perfcurve(labels_test,scores,1);
                                else 
                                    [~,~,~,roc_auc_o(loc-2,ahp,fs,t)] = perfcurve(labels_test,scores,1);
                                end

%                                if roc_auc_i(loc,ahp,fs,t) == 1 % third figure was saved
%                                  plot(-scores)
%                                  pause
%                                end
                            end
                        end
                    end

                %end
            %end
        %end
end

fprintf("Average test time: %f\n", mean(t_test(find(t_test~=0))))

% display overall average
xi=roc_auc_i(find(roc_auc_i~=0));
xo=roc_auc_o(find(roc_auc_o~=0));
fprintf("ROC AUC average: %f\n", mean([xi;xo]))

% Save results
if method_nr == 10 || method_nr == 12
  file_save_name = strcat("res_lreg_all_train_", num2str(method_nr), "_", num2str(K_Lreg), "_", num2str(lam_Lreg));
  if use_features
    save(strcat(file_save_name, "_i.mat"), "roc_auc_i")
    save(strcat(file_save_name, "_o.mat"), "roc_auc_o")
  else
    save(strcat(file_save_name, "_i_t.mat"), "roc_auc_i")
    save(strcat(file_save_name, "_o_t.mat"), "roc_auc_o")
  end
elseif method_nr == 11
  file_save_name = strcat("res_lreg_all_train_11_", num2str(K_Lreg), "_", num2str(lam_Lreg), "_", num2str(S_Lreg), "_", num2str(P_Lreg));
  if use_features
    save(strcat(file_save_name, "_i.mat"), "roc_auc_i")
    save(strcat(file_save_name, "_o.mat"), "roc_auc_o")
  else
    save(strcat(file_save_name, "_i_t.mat"), "roc_auc_i")
    save(strcat(file_save_name, "_o_t.mat"), "roc_auc_o")
  end
else
  save("res_semi_all_train_i.mat", "roc_auc_i");
  save("res_semi_all_train_o.mat", "roc_auc_o");
end

% --------------- functions ------------------------

function [Y_normal_train, Y_normal_test] = load_split_normal_data(dir_name, normal_files, T, Ntrain)
    Ntrain_hp = round(Ntrain/4);
    Y_normal_train = [];
    for hp = 1:4 % train motor load
        n_file_number = normal_files(hp);
        n_file_name = dir_name+num2str(n_file_number)+".mat";
        load(n_file_name);

        if n_file_number<100
            n_data_name = "X0"+n_file_number+"_FE_time"; % training on fan-end data
        else n_data_name = "X"+n_file_number+"_FE_time";
        end

        Y_normal_hp = buffer(eval(n_data_name),T);
        Y_normal_hp = Y_normal_hp(:,1:end-1); % omit zero-padded last signal

        p = randperm(size(Y_normal_hp,2));
        Y_normal_hp = Y_normal_hp(:,p);

        % save one quarter of Ntrain data for training
        Y_normal_train_hp = Y_normal_hp(:,1:Ntrain_hp);
        Y_normal_train = [Y_normal_train Y_normal_train_hp]; % concatenate with the rest of hp training data

        % reserve Ntrain data for testing
        Y_normal_test_hp = Y_normal_hp(:,Ntrain_hp+1:Ntrain+Ntrain_hp-1);  % omit last frame - it may contain artificial zeros        
        Y_normal_test{hp} = Y_normal_test_hp;
    end
end

function [Y_ano, warn] = load_faulty_data(dir_name, loc_files, loc_outter, fs, ahp, T, loc)
    if loc < 3 % inner, ball
        ano_files = loc_files{loc};
        ano_file_fs_number = ano_files(fs);
        ano_file_hp_number = ano_file_fs_number+ahp-1;
    else     % outter
        ano_files = loc_outter{loc-2}; % loc files variabiable for outter
        ano_file_hp_number = ano_files(ahp, fs);
    end
    
    if ano_file_hp_number == 0  % no fault file for loc-size config
        Y_ano = 0; % mock value
        warn = 1;
    else
        warn = 0;
        ano_file_name = dir_name+num2str(ano_file_hp_number)+".mat";
        load(ano_file_name);
        ano_data_name = "X"+ano_file_hp_number+"_FE_time";
        Y_ano = buffer(eval(ano_data_name),T);
        Y_ano = Y_ano(:,1:end-1); % omit zero-padded last signal
    end
end
