[1]:

import tensorflow as tf
from sklearn.metrics import classification_report, accuracy_score, balanced_accuracy_score, roc_auc_score, mutual_info_score, normalized_mutual_info_score, adjusted_mutual_info_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import OneHotEncoder
from scipy.stats import mode
import numpy as np

def ook(t):
  lb = LabelBinarizer()
  y_ook = lb.fit_transform(t)

  if len(np.unique(t))==2:
    y_ook = np.concatenate((1-y_ook.astype(bool), y_ook), axis = 1)

  return y_ook



def evaluation_metrics(y_true, y_pred, print_result=True):
    acc = 0
    auc = 0
    auc_sk = 0
    #mi = 0
    nmi = 0
    #ami = 0
    bacc = 0

    # Accuracy
    #report = classification_report(y_pred.argmax(axis=1), y_true.ravel(), output_dict=True)
    acc = accuracy_score( y_true.ravel(), y_pred.argmax(axis=1))  #report['accuracy']

    # Balanced accuracy
    bacc = balanced_accuracy_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze(), adjusted=True)

   # # Mutual Information
   # mi = mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())

    # Normalized Mutual Information
    nmi = normalized_mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())

    # Adjusted Mutual Information
    #ami = adjusted_mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())

    # AUC (Tensorflow)
    auc_metric = tf.keras.metrics.AUC(from_logits=True)
    auc_metric.update_state(y_true, y_pred.argmax(axis=1).astype('float'))
    auc = auc_metric.result().numpy()
    auc_metric.reset_states()

    # AUC (scikit-learn)
    auc_sk = roc_auc_score(ook(y_true), y_pred)

    if print_result:
        print("Accuracy: {:.4f}".format(acc))
        print("Balanced Accuracy: {:.4f}".format(bacc))
        print("Mutual Information: {:.4f}".format(mi))
        print("Normalized Mutual Information: {:.4f}".format(nmi))
        print("Adjusted Mutual Information: {:.4f}".format(ami))
        print("AUC (Tensorflow): {:.4f}".format(auc))
        print("AUC (scikit-learn): {:.4f}".format(auc_sk))

    return acc, auc, auc_sk, nmi, bacc   # mi, , ami

Subclassing for RCDNN¶

[2]:

!git clone https://github.com/Jectrianama/GCCE_TEST.git

Cloning into 'GCCE_TEST'...
remote: Enumerating objects: 869, done.
remote: Counting objects: 100% (462/462), done.
remote: Compressing objects: 100% (246/246), done.
remote: Total 869 (delta 243), reused 397 (delta 210), pack-reused 407
Receiving objects: 100% (869/869), 39.16 MiB | 12.10 MiB/s, done.
Resolving deltas: 100% (411/411), done.

[3]:

import os
os.chdir('/content/GCCE_TEST/Models')
#from  keras_ma_gcce import *
from labels_generation import MA_Clas_Gen
os.chdir('../../')

[4]:

#cargar datos desde drive otros dataset
FILEID = "1AU8pTtCLihBjCZjWITaAzpnEuL4RO436"
#https://drive.google.com/file/d/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436/view?usp=sharing
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id="$FILEID -O  DataGCCE.zip && rm -rf /tmp/cookies.txt
!unzip -o DataGCCE.zip
!dir

--2023-02-13 17:55:31--  https://docs.google.com/uc?export=download&confirm=&id=1AU8pTtCLihBjCZjWITaAzpnEuL4RO436
Resolving docs.google.com (docs.google.com)... 142.250.152.139, 142.250.152.113, 142.250.152.138, ...
Connecting to docs.google.com (docs.google.com)|142.250.152.139|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/imatc23c4tjkk0pa09ojfrfs4moa9ud4/1676310900000/07591141114418430227/*/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436?e=download&uuid=2f27a04d-d2b3-4cf7-b4a8-307fef9ccbee [following]
Warning: wildcards not supported in HTTP.
--2023-02-13 17:55:32--  https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/imatc23c4tjkk0pa09ojfrfs4moa9ud4/1676310900000/07591141114418430227/*/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436?e=download&uuid=2f27a04d-d2b3-4cf7-b4a8-307fef9ccbee
Resolving doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)... 142.250.125.132, 2607:f8b0:4001:c2f::84
Connecting to doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)|142.250.125.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 38377 (37K) [application/x-zip-compressed]
Saving to: ‘DataGCCE.zip’

DataGCCE.zip        100%[===================>]  37.48K  --.-KB/s    in 0s

2023-02-13 17:55:32 (78.9 MB/s) - ‘DataGCCE.zip’ saved [38377/38377]

Archive:  DataGCCE.zip
  inflating: new-thyroid.csv
  inflating: tic-tac-toe-endgame.csv
  inflating: balance-scale.csv
  inflating: file.csv
balance-scale.csv  file.csv   new-thyroid.csv  tic-tac-toe-endgame.csv
DataGCCE.zip       GCCE_TEST  sample_data

[5]:

#cargar datos desde drive acceso libre
FILEID = "1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW"
#https://drive.google.com/file/d/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW/view?usp=sharing
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id="$FILEID -O MADatasets.zip && rm -rf /tmp/cookies.txt
!unzip -o MADatasets.zip
!dir

--2023-02-13 17:55:32--  https://docs.google.com/uc?export=download&confirm=t&id=1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW
Resolving docs.google.com (docs.google.com)... 142.250.152.139, 142.250.152.113, 142.250.152.138, ...
Connecting to docs.google.com (docs.google.com)|142.250.152.139|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/8sku5s1gjq7dgj1trb79a1m7h0ct3ae7/1676310900000/07591141114418430227/*/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW?e=download&uuid=3bd37258-38fe-4341-a2bd-1d62526fac4c [following]
Warning: wildcards not supported in HTTP.
--2023-02-13 17:55:32--  https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/8sku5s1gjq7dgj1trb79a1m7h0ct3ae7/1676310900000/07591141114418430227/*/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW?e=download&uuid=3bd37258-38fe-4341-a2bd-1d62526fac4c
Resolving doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)... 142.250.125.132, 2607:f8b0:4001:c2f::84
Connecting to doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)|142.250.125.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 156530728 (149M) [application/zip]
Saving to: ‘MADatasets.zip’

MADatasets.zip      100%[===================>] 149.28M  70.7MB/s    in 2.1s

2023-02-13 17:55:35 (70.7 MB/s) - ‘MADatasets.zip’ saved [156530728/156530728]

Archive:  MADatasets.zip
  inflating: MADatasets/util.py
  inflating: MADatasets/Iris1.mat
  inflating: MADatasets/Integra_Labels.mat
  inflating: MADatasets/MAGenerationClassification.py
  inflating: MADatasets/Voice.mat
  inflating: MADatasets/Iris.mat
  inflating: MADatasets/Sinthetic.mat
  inflating: MADatasets/MAGenerationClassification_1.py
  inflating: MADatasets/Bupa1.mat
  inflating: MADatasets/TicTacToe1.mat
  inflating: MADatasets/Wine.mat
  inflating: MADatasets/Breast1.mat
  inflating: MADatasets/Breast.mat
  inflating: MADatasets/Music.mat
  inflating: MADatasets/Pima.mat
  inflating: MADatasets/Ionosphere.mat
  inflating: MADatasets/TicTacToe.mat
  inflating: MADatasets/VoiceData.m
  inflating: MADatasets/util_1.py
  inflating: MADatasets/Ionosphere1.mat
  inflating: MADatasets/__pycache__/util_1.cpython-37.pyc
  inflating: MADatasets/Bupa.mat
  inflating: MADatasets/Wine1.mat
  inflating: MADatasets/__pycache__/util.cpython-37.pyc
  inflating: MADatasets/Pima1.mat
  inflating: MADatasets/Segmentation1.mat
  inflating: MADatasets/Western.mat
  inflating: MADatasets/Integra_Preprocesamiento_Seg_Caracterizacion_time_frec.mat
  inflating: MADatasets/Western1.mat
  inflating: MADatasets/Segmentation.mat
  inflating: MADatasets/Skin_NonSkin.mat
  inflating: MADatasets/Skin_NonSkin1.mat
  inflating: MADatasets/Occupancy1.mat
  inflating: MADatasets/Polarity.mat
  inflating: MADatasets/Occupancy.mat
balance-scale.csv  GCCE_TEST       new-thyroid.csv
DataGCCE.zip       MADatasets      sample_data
file.csv           MADatasets.zip  tic-tac-toe-endgame.csv

[6]:

# -*- coding: utf-8 -*-
import tensorflow as tf
import matplotlib.pyplot as plt
#from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import tensorflow_probability as tfp
from tensorflow.keras import regularizers
from sklearn.base import  BaseEstimator, TransformerMixin, ClassifierMixin
from sklearn.model_selection import train_test_split
tf.keras.backend.clear_session()
#tf.random.set_seed(42)
import types
import tempfile
class Keras_MA_RCDNN(): #transformer no va
 #Constructor __init__. Special method: identified by a double underscore at either side of their name
 #work in the background
 # initialize data members to the object. lets the class initialize the object’s attributes and serves no other purpose.
    def __init__(self,epochs=100,batch_size=30,R=5, K=2, dropout=0.5, learning_rate=1e-3,optimizer='Adam',
                  l1_param=0, validation_split=0.3, verbose=1, q = 0.1):
        self.epochs=epochs
        self.dropout=dropout
        self.batch_size = batch_size
        self.learning_rate=learning_rate
        self.l1_param=l1_param
        self.l2_param=l1_param
        self.validation_split = validation_split
        self.verbose = verbose
        self.optimizer = optimizer
        self.R=R
        self.K=K
        self.q = q



    def custom_RCDNN_loss(self, y_true, y_pred):  #ytrue \in N x R,  ypred \in N x (R+K) -> PRI->JULI

        #Input ytrue: samples (N) x  annotators (R)
        #Input ypred: samples (N) x  annotators+classes (R+K)

        #Ground truth estimation samples (N) x  Classes(K)
        pred = y_pred[:,self.R:]
        pred = tf.clip_by_value(pred, clip_value_min=1e-9, clip_value_max=1-1e-9) #estabilidad numerica de la funcion de costo
        # Annotators reliability prediction: samples (N) x  annotators (R)
        ann_ = y_pred[:,:self.R]
        #Y_true 1-K: samples (N) x  Classes(1-K), annotators (R)
        Y_true = tf.one_hot(tf.cast(y_true, dtype=tf.int32), depth=self.K, axis=1)
        #Y_pred 1 - K: samples (N) x  Classes(1-K), annotators (R)
        Y_hat = tf.repeat(tf.expand_dims(pred,-1), self.R, axis = -1)
        #loss computation
        p_logreg = tf.math.reduce_prod(tf.math.pow(Y_hat, Y_true), axis=1)
        temp1 = ann_*tf.math.log(p_logreg)
        temp2 = (1 - ann_)*tf.math.log(1/self.K)*tf.reduce_sum(Y_true,axis=1)
        # temp2 = (tf.ones(tf.shape(ann_)) - ann_)*tf.math.log(1/K)
        # print(tf.reduce_mean(Y_true,axis=1).numpy())
        return -tf.math.reduce_sum((temp1 + temp2))


    def GCCE_MA_loss(self, y_true, y_pred):
        # print(y_true,y_pred)
       # q = 0.1
        pred = y_pred[:, self.R:]
        pred = tf.clip_by_value(pred, clip_value_min=1e-9, clip_value_max=1)
        ann_ = y_pred[:, :self.R]
        # ann_ = tf.clip_by_value(ann_, clip_value_min=1e-9, clip_value_max=1-1e-9)
        Y_true = tf.one_hot(tf.cast(y_true, dtype=tf.int32), depth=self.K, axis=1)
        Y_hat = tf.repeat(tf.expand_dims(pred,-1), self.R, axis = -1)

        p_gcce = Y_true*(1 - Y_hat**self.q)/self.q
        temp1 = ann_*tf.math.reduce_sum(p_gcce, axis=1)

        # p_logreg = tf.math.reduce_prod(tf.math.pow(Y_hat, Y_true), axis=1)
        # temp1 = ann_*tf.math.log(p_logreg)
        # temp2 = (1 - ann_)*tf.math.log(1/K)*tf.reduce_sum(Y_true,axis=1)
        # aux = tf.repeat(tf.reduce_sum(pred*tf.math.log(pred),axis=1,keepdims=True), R, axis = 1)
        # tf.print(tf.shape(aux))
        # print(tf.shape(aux))
        # temp2 = (1 - ann_)*aux*tf.reduce_sum(Y_true,axis=1)
        # temp2 = (tf.ones(tf.shape(ann_)) - ann_)*tf.math.log(1/K)
        # print(tf.reduce_mean(Y_true,axis=1).numpy())
        # Y_true_1 = tf.clip_by_value(Y_true, clip_value_min=1e-9, clip_value_max=1)
        # p_logreg_inv = tf.math.reduce_prod(tf.math.pow(Y_true_1, Y_hat), axis=1)
        # temp2 = (1 - ann_)*tf.math.log(p_logreg_inv)
        temp2 = (1 - ann_)*(1-(1/self.K)**self.q)/self.q*tf.reduce_sum(Y_true,axis=1)
        return tf.math.reduce_sum((temp1 + temp2))


    def PRI_MA_loss(self, y_true, y_pred): #,  -> PRI
        #Input ytrue: samples (N) x  annotators (R)
        #Input ypred: samples (N) x  annotators+classes (R+K)
        #PRI MA
        cce_M = tf.keras.losses.CategoricalCrossentropy(reduction='none',axis=-1)
        cce_C = tf.keras.losses.CategoricalCrossentropy(reduction='none',axis=1)



        #N = tf.cast(y_true.shape[0],dtype=tf.int32)
        N = tf.shape(y_true)[0]
        #Ground truth estimation samples (N) x  Classes(K)
        y_pred_tf = y_pred[:,self.R:]
        # Annotators reliability prediction: samples (N) x  annotators (R)
        lambda_R_tf = y_pred[:,:self.R] #tf.ones(shape=(N,R))
        #Y_true 1 K: samples (N) x  Classes(1-K), annotators (R)
        Y_true_1K = tf.one_hot(tf.cast(y_true,dtype=tf.int32),depth=self.K,axis=1)
        #Y_pred 1 - K: samples (N) x  Classes(1-K), annotators (R)
        Y_hat = tf.repeat(tf.expand_dims(y_pred_tf,-1), self.R, axis = -1)
        #marginal entropy
        #cce along the K classes -> ypred_tf
        Hm_ = cce_M(y_pred_tf,y_pred_tf)
        #cross entropy
        #cce along the K classes -> Y_hat
        Hc_ = cce_C(Y_true_1K,Y_hat)
        #PRI MA computation
        LH = tf.linalg.matmul(lambda_R_tf,Hc_,transpose_a=True) # \Lambda^\top H_c
        temp1 = tf.linalg.trace(LH) #trace(LH)
        Hm1L = tf.linalg.matmul(tf.ones(shape=(N,self.R))-lambda_R_tf,tf.ones(shape=(self.R,1)))# 1_N 1_R^\top - Lambda
        Hm_t = tf.reshape(Hm_,shape=(N,1))
        temp2 = tf.squeeze(tf.linalg.matmul(Hm_t,Hm1L,transpose_a=True)) # Hm^\top Hm1L
        loss_c = temp1+ temp2 #return loss

        return loss_c


    def fit(self, X, Y):
        #input X numpy array first dimension samples (N)x features (P)
        #input Y numpy array vector len = samples (N) x  annotators (R)
        P = X.shape[1]
        if self.optimizer == "Adam":
            opt = tf.keras.optimizers.Adam(learning_rate=self.learning_rate, clipnorm=1.0)
        elif self.optimizer == "SGD":
            opt = tf.keras.optimizers.SGD(learning_rate=self.learning_rate, clipnorm=1.0)
        else:
            opt=self.optimizer

        #capa de entrada
        input_l = tf.keras.layers.Input(shape=(X_train.shape[1]), name='entrada')
        #capas densas
        h1 = tf.keras.layers.Dense(int(P*4*(self.K+self.R)),activation='selu',name='h1',
                              kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param))(input_l)#argumento de entrada
        h2 = tf.keras.layers.Dense(int(P*(self.K+self.R)),activation='selu',name='h2',
                              kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param))(h1)
        dout = tf.keras.layers.Dropout(rate=self.dropout)(h2)
        output_R = tf.keras.layers.Dense(self.R,activation="sigmoid",
                                    kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param), name= 'out_R_RCDNN' )(dout)

        output_K = tf.keras.layers.Dense(self.K,activation="softmax",
                                    kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param), name= 'out_K_RCDNN')(dout)

        output = tf.keras.layers.concatenate([output_R, output_K])
        self.model = tf.keras.Model(inputs= input_l,outputs=output)
        self.model.compile(loss=self.custom_RCDNN_loss, optimizer=opt)

        self.history = self.model.fit(X, Y, epochs=self.epochs, validation_split=self.validation_split,   #
                                      batch_size=self.batch_size,verbose=self.verbose)

        return self



    def predict(self, X, *_):
       #input X numpy array first dimension samples (N)x features (P)
      return  self.model.predict(X)



    def fit_predict(self,X,y):
        #input X numpy array first dimension samples (N)x features (P)
        #input Y numpy array vector len = samples (N) x  annotators (R)
        self.fit(X,y)
        return self.predict(X)

     #graphics
    def plot_history(self):
        pd.DataFrame(self.history.history).plot(figsize=(8, 5))
        plt.grid(True)
        #plt.gca().set_ylim(0, 1)
        #save_fig("keras_learning_curves_plot")
        plt.show()
        return

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)

        return self



    def get_params(self, deep=True):
        return { 'l1_param':self.l1_param, 'dropout':self.dropout, 'optimizer':self.optimizer,
                'learning_rate':self.learning_rate, 'batch_size':self.batch_size,
                'epochs':self.epochs, 'verbose':self.verbose, 'validation_split':self.validation_split,
                'R':self.R, 'K':self.K, 'q':self.q
                }


        return self

Load Data¶

[7]:

#load data
import scipy.io as sio
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf #importar tensorflow
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import numpy as np
database = 'Wine' #['bupa1', 'breast-cancer-wisconsin1','pima-indians-diabetes1', 'ionosphere1', 'tic-tac-toe1', 'iris1', 'wine1', 'segmentation1']


path_ = 'MADatasets/'+ database+ '.mat'
Xdata = sio.loadmat(path_)
Xdata.keys()

[7]:

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y', 'Y', 'iAnn', 'Exp', 'idxtr', 'idxte'])

[8]:

X = Xdata['X']
Y = Xdata['Y']
t = Xdata['y'].reshape(-1)
print('X',X.shape,'t',t.shape,'Y',Y.shape)

X (178, 13) t (178,) Y (178, 5)

[9]:

[9]:

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3], dtype=uint8)

Labels Generation¶

[10]:

Y, iAnn, Lam_r = MA_Clas_Gen(X ,t, R=5, NrP=1)

/usr/local/lib/python3.8/dist-packages/sklearn/manifold/_t_sne.py:780: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2.
  warnings.warn(
/usr/local/lib/python3.8/dist-packages/sklearn/manifold/_t_sne.py:790: FutureWarning: The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.
  warnings.warn(

../_images/notebooks_Wine_GCE_13_1.png

[11]:

Y = Y - 1
t = t - 1

[12]:

from sklearn.metrics import classification_report
for i in range(Y.shape[1]):
    print('annotator',i+1)
    print(classification_report(t,Y[:,i]))
    unique, counts = np.unique(Y[:,i], return_counts=True)
    plt.figure()
    plt.bar(unique, counts)
#     unique, counts = np.unique(Y_test[5], return_counts=True)
#     plt.bar(unique, counts)

    plt.title('Class Frequency for Y_true')
    plt.xlabel('Class')
    plt.ylabel('Frequency')

annotator 1
              precision    recall  f1-score   support

           0       0.86      1.00      0.92        59
           1       0.90      0.90      0.90        71
           2       0.95      0.75      0.84        48

    accuracy                           0.89       178
   macro avg       0.90      0.88      0.89       178
weighted avg       0.90      0.89      0.89       178

annotator 2
              precision    recall  f1-score   support

           0       0.55      0.95      0.70        59
           1       0.53      0.23      0.32        71
           2       0.47      0.46      0.46        48

    accuracy                           0.53       178
   macro avg       0.52      0.54      0.49       178
weighted avg       0.52      0.53      0.48       178

annotator 3
              precision    recall  f1-score   support

           0       0.55      0.27      0.36        59
           1       0.69      0.85      0.76        71
           2       0.58      0.75      0.65        48

    accuracy                           0.63       178
   macro avg       0.61      0.62      0.59       178
weighted avg       0.61      0.63      0.60       178

annotator 4
              precision    recall  f1-score   support

           0       0.60      0.73      0.66        59
           1       0.59      0.31      0.41        71
           2       0.42      0.60      0.50        48

    accuracy                           0.53       178
   macro avg       0.54      0.55      0.52       178
weighted avg       0.55      0.53      0.51       178

annotator 5
              precision    recall  f1-score   support

           0       0.03      0.02      0.02        59
           1       0.44      0.61      0.51        71
           2       0.20      0.17      0.18        48

    accuracy                           0.29       178
   macro avg       0.22      0.26      0.24       178
weighted avg       0.24      0.29      0.26       178

../_images/notebooks_Wine_GCE_15_1.png

../_images/notebooks_Wine_GCE_15_2.png

../_images/notebooks_Wine_GCE_15_3.png

../_images/notebooks_Wine_GCE_15_4.png

../_images/notebooks_Wine_GCE_15_5.png

Split data¶

[13]:

import numpy.matlib
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
Ns = 1
ss = ShuffleSplit(n_splits=Ns, test_size=0.3,random_state =123)
for train_index, test_index in ss.split(X):
    print(test_index)
    X_train, X_test,Y_train,Y_test = X[train_index,:], X[test_index,:],Y[train_index,:], Y[test_index,:]
    Y_true_train, Y_true_test = t[train_index].reshape(-1,1), t[test_index].reshape(-1,1)
print(X_train.shape, Y_train.shape, Y_true_train.shape)

[138  60 137  77  90 159  41 136 166  93 154 141 164  33  31 152  82 122
  42  87 139 147 151 162  79 169 157  95  53  10  23  37 134 110 167 120
 142   8  72 114 148 163  36  20 121  28  13  88  24 100 127 143 144  85]
(124, 13) (124, 5) (124, 1)

Apply MinMaxScaler¶

[14]:

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

Testing the Class¶

[15]:

from sklearn.metrics import classification_report, balanced_accuracy_score, roc_auc_score
from sklearn.metrics import normalized_mutual_info_score, mutual_info_score, adjusted_mutual_info_score
import pandas as pd
l1 =0.1
NUM_RUNS =10
custom_loss = "GCE"


results = []
for i in range(NUM_RUNS):

    print("iteration: " + str(i))

    MA = Keras_MA_RCDNN(epochs=100,batch_size=128,R=5, K=len(np.unique(Y_true_train)), dropout=0.25, learning_rate=0.001,optimizer='Adam',
                l1_param=l1,  validation_split=0, verbose=0, q=0.3 )

    #MA = Keras_MA_RCDNN(epochs=100,batch_size=128,R=5, K=len(np.unique(Y_true_train)), dropout=0.25, learning_rate=0.001,optimizer='Adam',
            #    l1_param=l1,  validation_split=0, verbose=0, q=0.3, neurons=4, loss = custom_loss )

    MA.fit(X_train, Y_train)
    MA.plot_history()

    # Generate the predictions for the current run
    pred_2 = MA.predict(X_test)

    acc, auc, auc_sk,  nmi,  bacc = evaluation_metrics(Y_true_test,  pred_2[:,Y.shape[1]:], print_result=False)  # mi, ami,

    # Save the results for the current run to the list of dictionaries
    results.append({
        #'run': i,
        'accuracy': acc,
        'balanced_accuracy': bacc,
       # 'mutual_information': mi,
        'normalized_mutual_information': nmi,
     #   'adjusted_mutual_information': ami,
        'auc_tensorflow': auc,
        'auc_scikit_learn': auc_sk,
    })

# Convert the list of dictionaries to a DataFrame
df = np.round(pd.DataFrame(results)*100, 2)

# Calculate the mean and standard deviation of each metric
mean = np.round(df.mean(),2)
std = np.round(df.std(),2)

iteration: 0

../_images/notebooks_Wine_GCE_21_1.png

2/2 [==============================] - 0s 7ms/step
iteration: 1

../_images/notebooks_Wine_GCE_21_3.png

2/2 [==============================] - 0s 3ms/step
iteration: 2

../_images/notebooks_Wine_GCE_21_5.png

2/2 [==============================] - 0s 5ms/step
iteration: 3

../_images/notebooks_Wine_GCE_21_7.png

2/2 [==============================] - 0s 4ms/step
iteration: 4

../_images/notebooks_Wine_GCE_21_9.png

WARNING:tensorflow:5 out of the last 9 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7ff2bc09d9d0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.

2/2 [==============================] - 0s 7ms/step
iteration: 5

../_images/notebooks_Wine_GCE_21_12.png

WARNING:tensorflow:6 out of the last 11 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7ff2b4ce9f70> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.

2/2 [==============================] - 0s 8ms/step
iteration: 6

../_images/notebooks_Wine_GCE_21_15.png

2/2 [==============================] - 0s 3ms/step
iteration: 7

../_images/notebooks_Wine_GCE_21_17.png

2/2 [==============================] - 0s 3ms/step
iteration: 8

../_images/notebooks_Wine_GCE_21_19.png

2/2 [==============================] - 0s 3ms/step
iteration: 9

../_images/notebooks_Wine_GCE_21_21.png

2/2 [==============================] - 0s 3ms/step

[16]:

df

[16]:

	accuracy	balanced_accuracy	normalized_mutual_information	auc_tensorflow	auc_scikit_learn
0	96.30	95.45	88.72	100.0	100.0
1	96.30	95.45	88.72	100.0	100.0
2	98.15	97.73	93.18	100.0	100.0
3	92.59	90.91	82.15	100.0	100.0
4	92.59	90.91	82.15	100.0	100.0
5	100.00	100.00	100.00	100.0	100.0
6	98.15	97.73	93.18	100.0	100.0
7	96.30	95.45	88.72	100.0	100.0
8	96.30	95.45	88.72	100.0	100.0
9	98.15	97.73	93.18	100.0	100.0

[17]:

mean

[17]:

accuracy                          96.48
balanced_accuracy                 95.68
normalized_mutual_information     89.87
auc_tensorflow                   100.00
auc_scikit_learn                 100.00
dtype: float64

[18]:

std

[18]:

accuracy                         2.38
balanced_accuracy                2.92
normalized_mutual_information    5.37
auc_tensorflow                   0.00
auc_scikit_learn                 0.00
dtype: float64

[19]:

result_df = pd.concat([mean.rename('Mean'), std.rename('Std')], axis=1)

[20]:

result_df

[20]:

	Mean	Std
accuracy	96.48	2.38
balanced_accuracy	95.68	2.92
normalized_mutual_information	89.87	5.37
auc_tensorflow	100.00	0.00
auc_scikit_learn	100.00	0.00

[21]:

# Save the DataFrame to an excel file
result_df.to_excel(database + custom_loss + ".xlsx")

[21]: