[1]:

import tensorflow as tf
from sklearn.metrics import classification_report, accuracy_score, balanced_accuracy_score, roc_auc_score, mutual_info_score, normalized_mutual_info_score, adjusted_mutual_info_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import OneHotEncoder
from scipy.stats import mode
import numpy as np

def ook(t):
  lb = LabelBinarizer()
  y_ook = lb.fit_transform(t)

  if len(np.unique(t))==2:
    y_ook = np.concatenate((1-y_ook.astype(bool), y_ook), axis = 1)

  return y_ook



def evaluation_metrics(y_true, y_pred, print_result=True):
    acc = 0
    auc = 0
    auc_sk = 0
    #mi = 0
    nmi = 0
    #ami = 0
    bacc = 0

    # Accuracy
    #report = classification_report(y_pred.argmax(axis=1), y_true.ravel(), output_dict=True)
    acc = accuracy_score( y_true.ravel(), y_pred.argmax(axis=1))  #report['accuracy']

    # Balanced accuracy
    bacc = balanced_accuracy_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze(), adjusted=True)

   # # Mutual Information
   # mi = mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())

    # Normalized Mutual Information
    nmi = normalized_mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())

    # Adjusted Mutual Information
    #ami = adjusted_mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())

    # AUC (Tensorflow)
    auc_metric = tf.keras.metrics.AUC(from_logits=True)
    auc_metric.update_state(y_true, y_pred.argmax(axis=1).astype('float'))
    auc = auc_metric.result().numpy()
    auc_metric.reset_states()

    # AUC (scikit-learn)
    auc_sk = roc_auc_score(ook(y_true), y_pred)

    if print_result:
        print("Accuracy: {:.4f}".format(acc))
        print("Balanced Accuracy: {:.4f}".format(bacc))
        print("Mutual Information: {:.4f}".format(mi))
        print("Normalized Mutual Information: {:.4f}".format(nmi))
        print("Adjusted Mutual Information: {:.4f}".format(ami))
        print("AUC (Tensorflow): {:.4f}".format(auc))
        print("AUC (scikit-learn): {:.4f}".format(auc_sk))

    return acc, auc, auc_sk, nmi, bacc   # mi, , ami

Subclassing for RCDNN¶

[2]:

!git clone https://github.com/Jectrianama/GCCE_TEST.git

Cloning into 'GCCE_TEST'...
remote: Enumerating objects: 861, done.
remote: Counting objects: 100% (454/454), done.
remote: Compressing objects: 100% (238/238), done.
remote: Total 861 (delta 240), reused 397 (delta 210), pack-reused 407
Receiving objects: 100% (861/861), 38.84 MiB | 9.53 MiB/s, done.
Resolving deltas: 100% (408/408), done.

[3]:

import os
os.chdir('/content/GCCE_TEST/Models')
from  keras_ma_gcce import *
from labels_generation import MA_Clas_Gen
os.chdir('../../')

[4]:

#cargar datos desde drive otros dataset
FILEID = "1AU8pTtCLihBjCZjWITaAzpnEuL4RO436"
#https://drive.google.com/file/d/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436/view?usp=sharing
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id="$FILEID -O  DataGCCE.zip && rm -rf /tmp/cookies.txt
!unzip -o DataGCCE.zip
!dir

--2023-02-13 17:23:20--  https://docs.google.com/uc?export=download&confirm=&id=1AU8pTtCLihBjCZjWITaAzpnEuL4RO436
Resolving docs.google.com (docs.google.com)... 142.251.163.113, 142.251.163.100, 142.251.163.139, ...
Connecting to docs.google.com (docs.google.com)|142.251.163.113|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/ejbc6j3nf9amr31o7n28lecb6ec6rd5c/1676308950000/07591141114418430227/*/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436?e=download&uuid=42cd46c3-8e3e-446b-9fc3-0dd6f32ac042 [following]
Warning: wildcards not supported in HTTP.
--2023-02-13 17:23:21--  https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/ejbc6j3nf9amr31o7n28lecb6ec6rd5c/1676308950000/07591141114418430227/*/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436?e=download&uuid=42cd46c3-8e3e-446b-9fc3-0dd6f32ac042
Resolving doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)... 172.253.115.132, 2607:f8b0:4004:c06::84
Connecting to doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)|172.253.115.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 38377 (37K) [application/x-zip-compressed]
Saving to: ‘DataGCCE.zip’

DataGCCE.zip        100%[===================>]  37.48K  --.-KB/s    in 0s

2023-02-13 17:23:21 (87.3 MB/s) - ‘DataGCCE.zip’ saved [38377/38377]

Archive:  DataGCCE.zip
  inflating: new-thyroid.csv
  inflating: tic-tac-toe-endgame.csv
  inflating: balance-scale.csv
  inflating: file.csv
balance-scale.csv  file.csv   new-thyroid.csv  tic-tac-toe-endgame.csv
DataGCCE.zip       GCCE_TEST  sample_data

[5]:

#cargar datos desde drive acceso libre
FILEID = "1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW"
#https://drive.google.com/file/d/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW/view?usp=sharing
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id="$FILEID -O MADatasets.zip && rm -rf /tmp/cookies.txt
!unzip -o MADatasets.zip
!dir

--2023-02-13 17:23:21--  https://docs.google.com/uc?export=download&confirm=t&id=1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW
Resolving docs.google.com (docs.google.com)... 142.251.163.113, 142.251.163.100, 142.251.163.139, ...
Connecting to docs.google.com (docs.google.com)|142.251.163.113|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/crie1jtml02ra09q71kdq7ckgq4hj72h/1676308950000/07591141114418430227/*/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW?e=download&uuid=99eb878a-14f5-406d-b656-88d23bd48141 [following]
Warning: wildcards not supported in HTTP.
--2023-02-13 17:23:22--  https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/crie1jtml02ra09q71kdq7ckgq4hj72h/1676308950000/07591141114418430227/*/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW?e=download&uuid=99eb878a-14f5-406d-b656-88d23bd48141
Resolving doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)... 172.253.115.132, 2607:f8b0:4004:c06::84
Connecting to doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)|172.253.115.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 156530728 (149M) [application/zip]
Saving to: ‘MADatasets.zip’

MADatasets.zip      100%[===================>] 149.28M  65.7MB/s    in 2.3s

2023-02-13 17:23:24 (65.7 MB/s) - ‘MADatasets.zip’ saved [156530728/156530728]

Archive:  MADatasets.zip
  inflating: MADatasets/util.py
  inflating: MADatasets/Iris1.mat
  inflating: MADatasets/Integra_Labels.mat
  inflating: MADatasets/MAGenerationClassification.py
  inflating: MADatasets/Voice.mat
  inflating: MADatasets/Iris.mat
  inflating: MADatasets/Sinthetic.mat
  inflating: MADatasets/MAGenerationClassification_1.py
  inflating: MADatasets/Bupa1.mat
  inflating: MADatasets/TicTacToe1.mat
  inflating: MADatasets/Wine.mat
  inflating: MADatasets/Breast1.mat
  inflating: MADatasets/Breast.mat
  inflating: MADatasets/Music.mat
  inflating: MADatasets/Pima.mat
  inflating: MADatasets/Ionosphere.mat
  inflating: MADatasets/TicTacToe.mat
  inflating: MADatasets/VoiceData.m
  inflating: MADatasets/util_1.py
  inflating: MADatasets/Ionosphere1.mat
  inflating: MADatasets/__pycache__/util_1.cpython-37.pyc
  inflating: MADatasets/Bupa.mat
  inflating: MADatasets/Wine1.mat
  inflating: MADatasets/__pycache__/util.cpython-37.pyc
  inflating: MADatasets/Pima1.mat
  inflating: MADatasets/Segmentation1.mat
  inflating: MADatasets/Western.mat
  inflating: MADatasets/Integra_Preprocesamiento_Seg_Caracterizacion_time_frec.mat
  inflating: MADatasets/Western1.mat
  inflating: MADatasets/Segmentation.mat
  inflating: MADatasets/Skin_NonSkin.mat
  inflating: MADatasets/Skin_NonSkin1.mat
  inflating: MADatasets/Occupancy1.mat
  inflating: MADatasets/Polarity.mat
  inflating: MADatasets/Occupancy.mat
balance-scale.csv  GCCE_TEST       new-thyroid.csv
DataGCCE.zip       MADatasets      sample_data
file.csv           MADatasets.zip  tic-tac-toe-endgame.csv

Load Data¶

[6]:

#load data
import scipy.io as sio
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf #importar tensorflow
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import numpy as np
database = 'Ocupancy'#['bupa1', 'breast-cancer-wisconsin1','pima-indians-diabetes1', 'ionosphere1', 'tic-tac-toe1', 'iris1', 'wine1', 'segmentation1']


import pandas as pd
from sklearn.preprocessing import LabelEncoder

dfo= pd.read_csv(r'/content//file.csv')
dfo

#Removing the HumidityRatio attribute which is least correlated to the target attribute
t=dfo['Occupancy'].values
X = dfo.drop(['HumidityRatio','Occupancy'], axis = 1).values

t=t+1

Labels Generation¶

[7]:

import random
import warnings
import numpy as np
#import climin
from functools import partial
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

#Defining the Sigmoid function and Softmax function
def Sigmoid(f_r):
    lam_r = 1/(1 + np.exp(-f_r))
    return lam_r

def MAjVot(Y, K):
    N,R = Y.shape
    Yhat = np.zeros((N,1))
    for n in range(N):
        votes = np.zeros((K,1))
        for r in range(R):
            for k in range(K):
                if Y[n,r] == k+1:
                    votes[k] = votes[k]+1
        Yhat[n] = np.argmax(votes) + 1
    return Yhat


def  MA_Clas_Gen(Xtrain,ytrain,R,NrP):

    N = len(ytrain)
    K = len(np.unique(ytrain))
    Kn = np.unique(ytrain)
    aux = 0
    A = np.zeros((K,1))
    for k in Kn:
        A[aux] = (ytrain == k).sum()
        aux = aux + 1
    per = np.min(A)
    if N < 25000:
        Xtrain = TSNE(n_components=1,perplexity=per/2).fit_transform(Xtrain)
    else:
        Xtrain = np.sum(Xtrain,1)
    # Xtrain = Xtrain-Xtrain.min()
    # Xtrain = Xtrain/Xtrain.max()
    #ytrain = y
    # scaler = StandardScaler()
    # scalerY = scaler.fit(y)
    # ytrain = scaler.fit_transform(y)


    #miny = y.min()
    #ytrain = y-miny
    #maxy = ytrain.max()
    #ytrain = ytrain/maxy

    # Xtrain = np.sum(Xtrain, axis=1)
    Xtrain = Xtrain - Xtrain.min()
    #print(Xtrain.min(), Xtrain.max())
    Xtrain = Xtrain/Xtrain.max()
    Xtrain = Xtrain.reshape((N,1))
    yprueba = np.ones((N,1))


    u_q = np.empty((Xtrain.shape[0],3))
    u_q[:,0,None] = 4.5*np.cos(2*np.pi*Xtrain + 1.5*np.pi) - \
                               3*np.sin(4.3*np.pi*Xtrain + 0.3*np.pi)

    u_q[:,1,None] = 4.5*np.cos(1.5*np.pi*Xtrain + 0.5*np.pi) + \
                       5*np.sin(3*np.pi*Xtrain + 1.5*np.pi)

    u_q[:,2,None] = 1

    W = []
    # q=1
    Wq1 = np.array(([[0.4],[0.7],[-0.5],[0],[-0.7]]))
    W.append(Wq1)
    # q=2
    Wq2 = np.array(([[0.4],[-1.0],[-0.1],[-0.8],[1.0]]))
    W.append(Wq2)
    Wq3 = np.array(([[3.1],[-1.8],[-0.6],[-1.2],[1.0]]))
    W.append(Wq3)


    F_r = []
    Lam_r = []
    for r in range(R):
        f_r = np.zeros((Xtrain.shape[0], 1))
        # rho_r = np.zeros((Xtrain.shape[0], 1))
        for q in range(3):
            f_r += W[q][r].T*u_q[:,q,None]
        F_r.append(f_r)
        lam_r = Sigmoid(f_r)
        lam_r[lam_r>0.5] = 1
        lam_r[lam_r<=0.5] = 0
        Lam_r.append(lam_r)
    plt.plot(Xtrain,Lam_r[2],'rx')
    plt.show()
    seed = 0
    np.random.seed(seed)
    Ytrain = np.ones((N, R))
    for r in range(R):
        aux = ytrain.copy()
        for n in range(N):
            if Lam_r[r][n] == 0:
                labels = np.arange(1, K+1)
                a = np.where(labels==ytrain[n])
                labels = np.delete(labels, a)
                idxlabels = np.random.permutation(K-1)
                aux[n] = labels[idxlabels[0]]
        Ytrain[:,r] = aux.flatten()

   # Ytrain = (Ytrain*maxy) + miny

    iAnn = np.zeros((N, R), dtype=int) # this indicates if the annotator r labels the nth sample.
    Nr = np.ones((R), dtype=int)*int(np.floor(N*NrP))
    for r in range(R):
        if r < R-1:
            indexR = np.random.permutation(range(N))[:Nr[r]]
            iAnn[indexR,r] = 1
        else:
            iSimm = np.sum(iAnn, axis=1)
            idxZero = np.asarray([i for (i, val) in enumerate(iSimm) if val == 0])
            Nzeros = idxZero.shape[0]
            idx2Choose = np.arange(N)
            if Nzeros == 0:
                indexR = np.random.permutation(range(N))[:Nr[r]]
                iAnn[indexR,r] = 1
            else:
                idx2Choose = np.delete(idx2Choose, idxZero)
                N2chose = idx2Choose.shape[0]
                idxNoZero = np.random.permutation(N2chose)[:(Nr[r] - Nzeros)]
                idxTot = np.concatenate((idxZero, idx2Choose[idxNoZero]))
                iAnn[idxTot,r] = 1

    # Now, we verify that all the samples were labeled at least once
    Nr = (np.sum(iAnn,0))
    iSimm = np.sum(iAnn, axis=1)
    if np.asarray([i for (i, val) in enumerate(iSimm) if val == 0]).sum() == 0:
        ValueError("all the samples must be labeled at least once")

    # Finally, if iAnn=0 we assign a reference value to indicate a missing value
    Vref = -1e-20
    for r in range(R):
        Ytrain[iAnn[:,r] == 0, r] = Vref

    return Ytrain, iAnn, Lam_r


def CrossVal(X, pp, Nk):
    N = X.shape[0]
    Ntr = int(N*pp)
    Nte = N - Ntr
    idxtr = np.zeros((Ntr,Nk))
    idxte = np.zeros((Nte,Nk))

    for i in range(Nk):
        index = np.random.permutation(range(N))
        idxtr[:,i] = index[:Ntr]
        idxte[:,i] = index[Ntr:]

    return idxtr, idxte

[8]:

Y, iAnn, Lam_r = MA_Clas_Gen(X ,t, R=5, NrP=1)

/usr/local/lib/python3.8/dist-packages/sklearn/manifold/_t_sne.py:780: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2.
  warnings.warn(
/usr/local/lib/python3.8/dist-packages/sklearn/manifold/_t_sne.py:790: FutureWarning: The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.
  warnings.warn(

../_images/notebooks_Occupancy_GCE_11_1.png

[9]:

Y = Y - 1
t = t - 1

[10]:

from sklearn.metrics import classification_report
for i in range(Y.shape[1]):
    print('annotator',i+1)
    print(classification_report(t,Y[:,i]))
    unique, counts = np.unique(Y[:,i], return_counts=True)
    plt.figure()
    plt.bar(unique, counts)
#     unique, counts = np.unique(Y_test[5], return_counts=True)
#     plt.bar(unique, counts)

    plt.title('Class Frequency for Y_true')
    plt.xlabel('Class')
    plt.ylabel('Frequency')

annotator 1
              precision    recall  f1-score   support

           0       1.00      0.76      0.86      1693
           1       0.71      1.00      0.83       972

    accuracy                           0.85      2665
   macro avg       0.85      0.88      0.85      2665
weighted avg       0.89      0.85      0.85      2665

annotator 2
              precision    recall  f1-score   support

           0       1.00      0.33      0.50      1693
           1       0.46      1.00      0.63       972

    accuracy                           0.58      2665
   macro avg       0.73      0.67      0.57      2665
weighted avg       0.80      0.58      0.55      2665

annotator 3
              precision    recall  f1-score   support

           0       0.74      0.96      0.84      1693
           1       0.85      0.41      0.56       972

    accuracy                           0.76      2665
   macro avg       0.80      0.69      0.70      2665
weighted avg       0.78      0.76      0.73      2665

annotator 4
              precision    recall  f1-score   support

           0       0.81      0.46      0.59      1693
           1       0.46      0.81      0.59       972

    accuracy                           0.59      2665
   macro avg       0.64      0.64      0.59      2665
weighted avg       0.69      0.59      0.59      2665

annotator 5
              precision    recall  f1-score   support

           0       0.48      0.54      0.51      1693
           1       0.00      0.00      0.00       972

    accuracy                           0.34      2665
   macro avg       0.24      0.27      0.26      2665
weighted avg       0.31      0.34      0.32      2665

../_images/notebooks_Occupancy_GCE_13_1.png

../_images/notebooks_Occupancy_GCE_13_2.png

../_images/notebooks_Occupancy_GCE_13_3.png

../_images/notebooks_Occupancy_GCE_13_4.png

../_images/notebooks_Occupancy_GCE_13_5.png

Split data¶

[11]:

import numpy.matlib
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
Ns = 1
ss = ShuffleSplit(n_splits=Ns, test_size=0.3,random_state =123)
for train_index, test_index in ss.split(X):
    print(test_index)
    X_train, X_test,Y_train,Y_test = X[train_index,:], X[test_index,:],Y[train_index,:], Y[test_index,:]
    Y_true_train, Y_true_test = t[train_index].reshape(-1,1), t[test_index].reshape(-1,1)
print(X_train.shape, Y_train.shape, Y_true_train.shape)

[ 315  235 1480 1538 1087 1009 1677 1094  343 2175 1455 2535 2641  798
  417 2238 1781 1445   85 1828 2029 1034 1067 1458 2455  732 2434 2247
 2113 2386  689  853 2200 2500  179 1245 1758 1927  709 1826 2529 1099
  805  601 1022 1730 2135 1361 2294 2644  206  420  878 2093 1400 1518
  934 1622 1279  613  957  190   33 1450  335 1371 2634  103  304 1228
 2242 2384 1637  673  222 2340 1765  360  289   25   45 1937  537 1237
 1183 2095 2375 2379 2390 2170 2607  589 1833 2628  354  189 2016 1286
 1116  556 1918  937 1740  803 2435  257  807 1697 2494 2538  249  683
 1673 1391  871 1064 1364  814  184  498 1898 1017 1055  906  645  242
  684  779  784 1696 1226  423  966 2596  649 1008 2402  101 1283 1529
 2417 1449  616 1107 1845 1011 1572 2525 2380 2651  704 1089  466 1853
 2462 1726 2502 2479 1710 1050  469  220  336  263 1535  554 2590 1273
 2543  861  449 1224 1957 2537 1106  114 1213 2194  830  973 1504 2168
  777 1975  996 2025 2094  707  994 2315 1970  748 1982 2430 1511   43
 2291 1822 1547  958 2312 1598 1733 1587 1659  856 1654 2028 2648 2627
  148 1347 1624 1688  383  565 2572 2296  381  714   58 1032 2160  353
 2021 1000 1788 2445  524 2331 2420 1191 1278 1113  208  132 1621  875
 2400  319  800  646 2353  403 1205 1203 1685  364  129 1941 2039 1426
  591  252 2048 1489 1247  477 1633 1036 1603  706  440  576 1821 1523
 2561 2488  618  590 1362 1464 1945  395 2458 1556 2041  632 1527  720
 2337 2116 1260 1915 2211  108  658 2096 1374 1717  897 2421 2512 2003
 1809  310 1997 1394  949 2395 1706 1791 2223 2215 2047 2271 1536  243
 1310 1351   21  850 1385  723  511 2067  785  970 1840  822  439 2064
 1456 1178 2098 2410 2595 2164 1083 2622 2253 2155 1027 1206 2002 2437
  680 1727 1955 1842 1678  647 1690 1739  547 1413 1102  476 2612 1578
 1832  368  569  116   12 1227 1127 1416 2088 1907 1837  471 1373  270
 1356 2505  742 1448   24 2358  594 1938 2459 1269 2657 1925  621 2404
 1143 1015 1751 2626  493   31 2653 1430  518  221  431 1319  982  167
  671 2138  792 1867  841  110  799 2246 2115  239 1928 2317 2264  379
 1878  553 1959 1397 1611 1943 1983  224   75 2316  402  256 1002  746
 2035   63 1375 1801 2144 1265 1689 2280   18 2601 1911 2078 1342 1330
 2580  141 1383 2642 1103  328 2233  813 1066 2472 1605 1724 2598  610
  127 2051  948 1193  971 2178  682 2165 1266  988 1599  730 1077 2258
 2325  266 2092  207 2411  215 1195 1415 2469  895 1065 1167 2192   50
 1166 1035 1186 1802  196 2262 2578 2554 1156  857  380 2209  767  638
 1827 2143 2506  984  501 2456  627 2112 1208 1923 1964 2287   86 2374
  976  419 1379 1952 1380 1298 1285  597 1233 1569 2303 2569  429 1676
 1482 2332  502 1559   26  738 1117 2012 2134 2329 2102 1200 1920 1703
  991  426  327 1701  835 2151 1912 1411 1600 2497 2368 2423 1737 2193
 1910 1834  693 1090 1549 2288  352 2487 1216  558 1848 1144 2311 2083
   28 2361 1814 1420  641 2489 1657  279   72  536  909 2157  112  891
 1204  563 1098 1924  318 1046  735  797 2549 1037 1750  917  838 1042
  675  216 2565 2589  529 1179 1097 1612 2621  326 1123 1462  120 2348
 2292    5   30 2152  292 2084 1068  667 1743 1617 1471  351  749   91
 1461 2623 1020 1412 2511  593 2544 2114 2425 2352 2158 1668  228 1854
  903 2180 1671 1121  452  585 2239 2101 2214  615  463 1334 1469 1058
  100  444 1721 1808 1476 1187 1360 1670 2049 1830 1451 1798  969 2486
 2045 1052  633 1933  952  515  968  809   13  961 2310  717 1843 1051
  981 2136  410 2053  421 1967 1119 2068 1976  388  198 1695 2522 2594
  564 1446 2520 2243 2119  698 1012 2190  183  155 1340 2508 1623  500
  188 1613 2133 2122  409 1344 2476 1043 2231  406  926  504 1463 1459
 1256  775  118  804 1960  275 1306  611 1754 2454 2406  914  630 2658
 2218 1308    7  820 2422  574 1779  595  204  728  276 1276 2440 1199
  972  978 1483 1887   74  236 1234  187 1324  923  375 1805  349  350
  776 2221 2228 2020 1773 1280  606  237 2091  715  623  546 1295 2324
   84 1800 1079 1995 2279  756 2126 1473  882 2199  734 1440  157 1846
 1155 1818  193 2367 1965 2081  995 2245 1573  229   56 2125 1700  458
 1126 2278]
(1865, 4) (1865, 5) (1865, 1)

Apply MinMaxScaler¶

[12]:

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

Testing the Class¶

[13]:

from sklearn.metrics import classification_report, balanced_accuracy_score, roc_auc_score
from sklearn.metrics import normalized_mutual_info_score, mutual_info_score, adjusted_mutual_info_score
import pandas as pd
l1 =0.01
NUM_RUNS =10
custom_loss = "GCE"


results = []
for i in range(NUM_RUNS):

    print("iteration: " + str(i))

    MA = Keras_MA_GCCE(epochs=50,batch_size=32,R=5, K=len(np.unique(Y_true_train)), dropout=0.25, learning_rate=0.01,optimizer='Adam',
                l1_param=l1,  validation_split=0, verbose=0, q=0.1, neurons=4, loss = custom_loss )

    MA.fit(X_train, Y_train)
    MA.plot_history()

    # Generate the predictions for the current run
    pred_2 = MA.predict(X_test)

    acc, auc, auc_sk,  nmi,  bacc = evaluation_metrics(Y_true_test,  pred_2[:,Y.shape[1]:], print_result=False)  # mi, ami,

    # Save the results for the current run to the list of dictionaries
    results.append({
        #'run': i,
        'accuracy': acc,
        'balanced_accuracy': bacc,
       # 'mutual_information': mi,
        'normalized_mutual_information': nmi,
     #   'adjusted_mutual_information': ami,
        'auc_tensorflow': auc,
        'auc_scikit_learn': auc_sk,
    })

# Convert the list of dictionaries to a DataFrame
df = np.round(pd.DataFrame(results)*100, 2)

# Calculate the mean and standard deviation of each metric
mean = np.round(df.mean(),2)
std = np.round(df.std(),2)

iteration: 0

../_images/notebooks_Occupancy_GCE_19_1.png

25/25 [==============================] - 0s 1ms/step
iteration: 1

../_images/notebooks_Occupancy_GCE_19_3.png

25/25 [==============================] - 0s 1ms/step
iteration: 2

../_images/notebooks_Occupancy_GCE_19_5.png

25/25 [==============================] - 0s 2ms/step
iteration: 3

../_images/notebooks_Occupancy_GCE_19_7.png

25/25 [==============================] - 0s 1ms/step
iteration: 4

../_images/notebooks_Occupancy_GCE_19_9.png

25/25 [==============================] - 0s 1ms/step
iteration: 5

../_images/notebooks_Occupancy_GCE_19_11.png

25/25 [==============================] - 0s 2ms/step
iteration: 6

../_images/notebooks_Occupancy_GCE_19_13.png

25/25 [==============================] - 0s 1ms/step
iteration: 7

../_images/notebooks_Occupancy_GCE_19_15.png

25/25 [==============================] - 0s 1ms/step
iteration: 8

../_images/notebooks_Occupancy_GCE_19_17.png

25/25 [==============================] - 0s 1ms/step
iteration: 9

../_images/notebooks_Occupancy_GCE_19_19.png

25/25 [==============================] - 0s 2ms/step

[14]:

df

[14]:

	accuracy	balanced_accuracy	normalized_mutual_information	auc_tensorflow	auc_scikit_learn
0	96.00	92.01	74.68	96.000000	98.90
1	96.12	92.37	75.37	96.180000	98.85
2	96.12	91.85	75.01	95.930000	98.84
3	95.38	90.37	71.53	95.180000	99.11
4	95.62	91.26	72.94	95.629997	99.00
5	97.12	94.58	80.62	97.290001	99.20
6	96.62	94.33	79.32	97.169998	98.99
7	95.62	91.26	72.94	95.629997	98.79
8	95.75	91.80	73.79	95.900002	98.72
9	96.50	93.97	78.42	96.989998	98.93

[15]:

mean

[15]:

accuracy                         96.08
balanced_accuracy                92.38
normalized_mutual_information    75.46
auc_tensorflow                   96.19
auc_scikit_learn                 98.93
dtype: float64

[16]:

std

[16]:

accuracy                         0.54
balanced_accuracy                1.43
normalized_mutual_information    3.02
auc_tensorflow                   0.72
auc_scikit_learn                 0.15
dtype: float64

[17]:

result_df = pd.concat([mean.rename('Mean'), std.rename('Std')], axis=1)

[18]:

result_df

[18]:

	Mean	Std
accuracy	96.08	0.54
balanced_accuracy	92.38	1.43
normalized_mutual_information	75.46	3.02
auc_tensorflow	96.19	0.72
auc_scikit_learn	98.93	0.15

[19]:

# Save the DataFrame to an excel file
result_df.to_excel(database + custom_loss + ".xlsx")