[1]:
import tensorflow as tf
from sklearn.metrics import classification_report, accuracy_score, balanced_accuracy_score, roc_auc_score, mutual_info_score, normalized_mutual_info_score, adjusted_mutual_info_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import OneHotEncoder
from scipy.stats import mode
import numpy as np
def ook(t):
lb = LabelBinarizer()
y_ook = lb.fit_transform(t)
if len(np.unique(t))==2:
y_ook = np.concatenate((1-y_ook.astype(bool), y_ook), axis = 1)
return y_ook
def evaluation_metrics(y_true, y_pred, print_result=True):
acc = 0
auc = 0
auc_sk = 0
#mi = 0
nmi = 0
#ami = 0
bacc = 0
# Accuracy
#report = classification_report(y_pred.argmax(axis=1), y_true.ravel(), output_dict=True)
acc = accuracy_score( y_true.ravel(), y_pred.argmax(axis=1)) #report['accuracy']
# Balanced accuracy
bacc = balanced_accuracy_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze(), adjusted=True)
# # Mutual Information
# mi = mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())
# Normalized Mutual Information
nmi = normalized_mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())
# Adjusted Mutual Information
#ami = adjusted_mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())
# AUC (Tensorflow)
auc_metric = tf.keras.metrics.AUC(from_logits=True)
auc_metric.update_state(y_true, y_pred.argmax(axis=1).astype('float'))
auc = auc_metric.result().numpy()
auc_metric.reset_states()
# AUC (scikit-learn)
auc_sk = roc_auc_score(ook(y_true), y_pred)
if print_result:
print("Accuracy: {:.4f}".format(acc))
print("Balanced Accuracy: {:.4f}".format(bacc))
print("Mutual Information: {:.4f}".format(mi))
print("Normalized Mutual Information: {:.4f}".format(nmi))
print("Adjusted Mutual Information: {:.4f}".format(ami))
print("AUC (Tensorflow): {:.4f}".format(auc))
print("AUC (scikit-learn): {:.4f}".format(auc_sk))
return acc, auc, auc_sk, nmi, bacc # mi, , ami
Subclassing for RCDNN¶
[2]:
!git clone https://github.com/Jectrianama/GCCE_TEST.git
Cloning into 'GCCE_TEST'...
remote: Enumerating objects: 869, done.
remote: Counting objects: 100% (462/462), done.
remote: Compressing objects: 100% (246/246), done.
remote: Total 869 (delta 243), reused 397 (delta 210), pack-reused 407
Receiving objects: 100% (869/869), 39.16 MiB | 12.10 MiB/s, done.
Resolving deltas: 100% (411/411), done.
[3]:
import os
os.chdir('/content/GCCE_TEST/Models')
#from keras_ma_gcce import *
from labels_generation import MA_Clas_Gen
os.chdir('../../')
[4]:
#cargar datos desde drive otros dataset
FILEID = "1AU8pTtCLihBjCZjWITaAzpnEuL4RO436"
#https://drive.google.com/file/d/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436/view?usp=sharing
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id="$FILEID -O DataGCCE.zip && rm -rf /tmp/cookies.txt
!unzip -o DataGCCE.zip
!dir
--2023-02-13 17:55:31-- https://docs.google.com/uc?export=download&confirm=&id=1AU8pTtCLihBjCZjWITaAzpnEuL4RO436
Resolving docs.google.com (docs.google.com)... 142.250.152.139, 142.250.152.113, 142.250.152.138, ...
Connecting to docs.google.com (docs.google.com)|142.250.152.139|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/imatc23c4tjkk0pa09ojfrfs4moa9ud4/1676310900000/07591141114418430227/*/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436?e=download&uuid=2f27a04d-d2b3-4cf7-b4a8-307fef9ccbee [following]
Warning: wildcards not supported in HTTP.
--2023-02-13 17:55:32-- https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/imatc23c4tjkk0pa09ojfrfs4moa9ud4/1676310900000/07591141114418430227/*/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436?e=download&uuid=2f27a04d-d2b3-4cf7-b4a8-307fef9ccbee
Resolving doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)... 142.250.125.132, 2607:f8b0:4001:c2f::84
Connecting to doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)|142.250.125.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 38377 (37K) [application/x-zip-compressed]
Saving to: ‘DataGCCE.zip’
DataGCCE.zip 100%[===================>] 37.48K --.-KB/s in 0s
2023-02-13 17:55:32 (78.9 MB/s) - ‘DataGCCE.zip’ saved [38377/38377]
Archive: DataGCCE.zip
inflating: new-thyroid.csv
inflating: tic-tac-toe-endgame.csv
inflating: balance-scale.csv
inflating: file.csv
balance-scale.csv file.csv new-thyroid.csv tic-tac-toe-endgame.csv
DataGCCE.zip GCCE_TEST sample_data
[5]:
#cargar datos desde drive acceso libre
FILEID = "1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW"
#https://drive.google.com/file/d/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW/view?usp=sharing
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id="$FILEID -O MADatasets.zip && rm -rf /tmp/cookies.txt
!unzip -o MADatasets.zip
!dir
--2023-02-13 17:55:32-- https://docs.google.com/uc?export=download&confirm=t&id=1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW
Resolving docs.google.com (docs.google.com)... 142.250.152.139, 142.250.152.113, 142.250.152.138, ...
Connecting to docs.google.com (docs.google.com)|142.250.152.139|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/8sku5s1gjq7dgj1trb79a1m7h0ct3ae7/1676310900000/07591141114418430227/*/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW?e=download&uuid=3bd37258-38fe-4341-a2bd-1d62526fac4c [following]
Warning: wildcards not supported in HTTP.
--2023-02-13 17:55:32-- https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/8sku5s1gjq7dgj1trb79a1m7h0ct3ae7/1676310900000/07591141114418430227/*/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW?e=download&uuid=3bd37258-38fe-4341-a2bd-1d62526fac4c
Resolving doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)... 142.250.125.132, 2607:f8b0:4001:c2f::84
Connecting to doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)|142.250.125.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 156530728 (149M) [application/zip]
Saving to: ‘MADatasets.zip’
MADatasets.zip 100%[===================>] 149.28M 70.7MB/s in 2.1s
2023-02-13 17:55:35 (70.7 MB/s) - ‘MADatasets.zip’ saved [156530728/156530728]
Archive: MADatasets.zip
inflating: MADatasets/util.py
inflating: MADatasets/Iris1.mat
inflating: MADatasets/Integra_Labels.mat
inflating: MADatasets/MAGenerationClassification.py
inflating: MADatasets/Voice.mat
inflating: MADatasets/Iris.mat
inflating: MADatasets/Sinthetic.mat
inflating: MADatasets/MAGenerationClassification_1.py
inflating: MADatasets/Bupa1.mat
inflating: MADatasets/TicTacToe1.mat
inflating: MADatasets/Wine.mat
inflating: MADatasets/Breast1.mat
inflating: MADatasets/Breast.mat
inflating: MADatasets/Music.mat
inflating: MADatasets/Pima.mat
inflating: MADatasets/Ionosphere.mat
inflating: MADatasets/TicTacToe.mat
inflating: MADatasets/VoiceData.m
inflating: MADatasets/util_1.py
inflating: MADatasets/Ionosphere1.mat
inflating: MADatasets/__pycache__/util_1.cpython-37.pyc
inflating: MADatasets/Bupa.mat
inflating: MADatasets/Wine1.mat
inflating: MADatasets/__pycache__/util.cpython-37.pyc
inflating: MADatasets/Pima1.mat
inflating: MADatasets/Segmentation1.mat
inflating: MADatasets/Western.mat
inflating: MADatasets/Integra_Preprocesamiento_Seg_Caracterizacion_time_frec.mat
inflating: MADatasets/Western1.mat
inflating: MADatasets/Segmentation.mat
inflating: MADatasets/Skin_NonSkin.mat
inflating: MADatasets/Skin_NonSkin1.mat
inflating: MADatasets/Occupancy1.mat
inflating: MADatasets/Polarity.mat
inflating: MADatasets/Occupancy.mat
balance-scale.csv GCCE_TEST new-thyroid.csv
DataGCCE.zip MADatasets sample_data
file.csv MADatasets.zip tic-tac-toe-endgame.csv
[6]:
# -*- coding: utf-8 -*-
import tensorflow as tf
import matplotlib.pyplot as plt
#from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import tensorflow_probability as tfp
from tensorflow.keras import regularizers
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin
from sklearn.model_selection import train_test_split
tf.keras.backend.clear_session()
#tf.random.set_seed(42)
import types
import tempfile
class Keras_MA_RCDNN(): #transformer no va
#Constructor __init__. Special method: identified by a double underscore at either side of their name
#work in the background
# initialize data members to the object. lets the class initialize the object’s attributes and serves no other purpose.
def __init__(self,epochs=100,batch_size=30,R=5, K=2, dropout=0.5, learning_rate=1e-3,optimizer='Adam',
l1_param=0, validation_split=0.3, verbose=1, q = 0.1):
self.epochs=epochs
self.dropout=dropout
self.batch_size = batch_size
self.learning_rate=learning_rate
self.l1_param=l1_param
self.l2_param=l1_param
self.validation_split = validation_split
self.verbose = verbose
self.optimizer = optimizer
self.R=R
self.K=K
self.q = q
def custom_RCDNN_loss(self, y_true, y_pred): #ytrue \in N x R, ypred \in N x (R+K) -> PRI->JULI
#Input ytrue: samples (N) x annotators (R)
#Input ypred: samples (N) x annotators+classes (R+K)
#Ground truth estimation samples (N) x Classes(K)
pred = y_pred[:,self.R:]
pred = tf.clip_by_value(pred, clip_value_min=1e-9, clip_value_max=1-1e-9) #estabilidad numerica de la funcion de costo
# Annotators reliability prediction: samples (N) x annotators (R)
ann_ = y_pred[:,:self.R]
#Y_true 1-K: samples (N) x Classes(1-K), annotators (R)
Y_true = tf.one_hot(tf.cast(y_true, dtype=tf.int32), depth=self.K, axis=1)
#Y_pred 1 - K: samples (N) x Classes(1-K), annotators (R)
Y_hat = tf.repeat(tf.expand_dims(pred,-1), self.R, axis = -1)
#loss computation
p_logreg = tf.math.reduce_prod(tf.math.pow(Y_hat, Y_true), axis=1)
temp1 = ann_*tf.math.log(p_logreg)
temp2 = (1 - ann_)*tf.math.log(1/self.K)*tf.reduce_sum(Y_true,axis=1)
# temp2 = (tf.ones(tf.shape(ann_)) - ann_)*tf.math.log(1/K)
# print(tf.reduce_mean(Y_true,axis=1).numpy())
return -tf.math.reduce_sum((temp1 + temp2))
def GCCE_MA_loss(self, y_true, y_pred):
# print(y_true,y_pred)
# q = 0.1
pred = y_pred[:, self.R:]
pred = tf.clip_by_value(pred, clip_value_min=1e-9, clip_value_max=1)
ann_ = y_pred[:, :self.R]
# ann_ = tf.clip_by_value(ann_, clip_value_min=1e-9, clip_value_max=1-1e-9)
Y_true = tf.one_hot(tf.cast(y_true, dtype=tf.int32), depth=self.K, axis=1)
Y_hat = tf.repeat(tf.expand_dims(pred,-1), self.R, axis = -1)
p_gcce = Y_true*(1 - Y_hat**self.q)/self.q
temp1 = ann_*tf.math.reduce_sum(p_gcce, axis=1)
# p_logreg = tf.math.reduce_prod(tf.math.pow(Y_hat, Y_true), axis=1)
# temp1 = ann_*tf.math.log(p_logreg)
# temp2 = (1 - ann_)*tf.math.log(1/K)*tf.reduce_sum(Y_true,axis=1)
# aux = tf.repeat(tf.reduce_sum(pred*tf.math.log(pred),axis=1,keepdims=True), R, axis = 1)
# tf.print(tf.shape(aux))
# print(tf.shape(aux))
# temp2 = (1 - ann_)*aux*tf.reduce_sum(Y_true,axis=1)
# temp2 = (tf.ones(tf.shape(ann_)) - ann_)*tf.math.log(1/K)
# print(tf.reduce_mean(Y_true,axis=1).numpy())
# Y_true_1 = tf.clip_by_value(Y_true, clip_value_min=1e-9, clip_value_max=1)
# p_logreg_inv = tf.math.reduce_prod(tf.math.pow(Y_true_1, Y_hat), axis=1)
# temp2 = (1 - ann_)*tf.math.log(p_logreg_inv)
temp2 = (1 - ann_)*(1-(1/self.K)**self.q)/self.q*tf.reduce_sum(Y_true,axis=1)
return tf.math.reduce_sum((temp1 + temp2))
def PRI_MA_loss(self, y_true, y_pred): #, -> PRI
#Input ytrue: samples (N) x annotators (R)
#Input ypred: samples (N) x annotators+classes (R+K)
#PRI MA
cce_M = tf.keras.losses.CategoricalCrossentropy(reduction='none',axis=-1)
cce_C = tf.keras.losses.CategoricalCrossentropy(reduction='none',axis=1)
#N = tf.cast(y_true.shape[0],dtype=tf.int32)
N = tf.shape(y_true)[0]
#Ground truth estimation samples (N) x Classes(K)
y_pred_tf = y_pred[:,self.R:]
# Annotators reliability prediction: samples (N) x annotators (R)
lambda_R_tf = y_pred[:,:self.R] #tf.ones(shape=(N,R))
#Y_true 1 K: samples (N) x Classes(1-K), annotators (R)
Y_true_1K = tf.one_hot(tf.cast(y_true,dtype=tf.int32),depth=self.K,axis=1)
#Y_pred 1 - K: samples (N) x Classes(1-K), annotators (R)
Y_hat = tf.repeat(tf.expand_dims(y_pred_tf,-1), self.R, axis = -1)
#marginal entropy
#cce along the K classes -> ypred_tf
Hm_ = cce_M(y_pred_tf,y_pred_tf)
#cross entropy
#cce along the K classes -> Y_hat
Hc_ = cce_C(Y_true_1K,Y_hat)
#PRI MA computation
LH = tf.linalg.matmul(lambda_R_tf,Hc_,transpose_a=True) # \Lambda^\top H_c
temp1 = tf.linalg.trace(LH) #trace(LH)
Hm1L = tf.linalg.matmul(tf.ones(shape=(N,self.R))-lambda_R_tf,tf.ones(shape=(self.R,1)))# 1_N 1_R^\top - Lambda
Hm_t = tf.reshape(Hm_,shape=(N,1))
temp2 = tf.squeeze(tf.linalg.matmul(Hm_t,Hm1L,transpose_a=True)) # Hm^\top Hm1L
loss_c = temp1+ temp2 #return loss
return loss_c
def fit(self, X, Y):
#input X numpy array first dimension samples (N)x features (P)
#input Y numpy array vector len = samples (N) x annotators (R)
P = X.shape[1]
if self.optimizer == "Adam":
opt = tf.keras.optimizers.Adam(learning_rate=self.learning_rate, clipnorm=1.0)
elif self.optimizer == "SGD":
opt = tf.keras.optimizers.SGD(learning_rate=self.learning_rate, clipnorm=1.0)
else:
opt=self.optimizer
#capa de entrada
input_l = tf.keras.layers.Input(shape=(X_train.shape[1]), name='entrada')
#capas densas
h1 = tf.keras.layers.Dense(int(P*4*(self.K+self.R)),activation='selu',name='h1',
kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param))(input_l)#argumento de entrada
h2 = tf.keras.layers.Dense(int(P*(self.K+self.R)),activation='selu',name='h2',
kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param))(h1)
dout = tf.keras.layers.Dropout(rate=self.dropout)(h2)
output_R = tf.keras.layers.Dense(self.R,activation="sigmoid",
kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param), name= 'out_R_RCDNN' )(dout)
output_K = tf.keras.layers.Dense(self.K,activation="softmax",
kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param), name= 'out_K_RCDNN')(dout)
output = tf.keras.layers.concatenate([output_R, output_K])
self.model = tf.keras.Model(inputs= input_l,outputs=output)
self.model.compile(loss=self.custom_RCDNN_loss, optimizer=opt)
self.history = self.model.fit(X, Y, epochs=self.epochs, validation_split=self.validation_split, #
batch_size=self.batch_size,verbose=self.verbose)
return self
def predict(self, X, *_):
#input X numpy array first dimension samples (N)x features (P)
return self.model.predict(X)
def fit_predict(self,X,y):
#input X numpy array first dimension samples (N)x features (P)
#input Y numpy array vector len = samples (N) x annotators (R)
self.fit(X,y)
return self.predict(X)
#graphics
def plot_history(self):
pd.DataFrame(self.history.history).plot(figsize=(8, 5))
plt.grid(True)
#plt.gca().set_ylim(0, 1)
#save_fig("keras_learning_curves_plot")
plt.show()
return
def set_params(self, **parameters):
for parameter, value in parameters.items():
setattr(self, parameter, value)
return self
def get_params(self, deep=True):
return { 'l1_param':self.l1_param, 'dropout':self.dropout, 'optimizer':self.optimizer,
'learning_rate':self.learning_rate, 'batch_size':self.batch_size,
'epochs':self.epochs, 'verbose':self.verbose, 'validation_split':self.validation_split,
'R':self.R, 'K':self.K, 'q':self.q
}
return self
Load Data¶
[7]:
#load data
import scipy.io as sio
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf #importar tensorflow
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import numpy as np
database = 'Wine' #['bupa1', 'breast-cancer-wisconsin1','pima-indians-diabetes1', 'ionosphere1', 'tic-tac-toe1', 'iris1', 'wine1', 'segmentation1']
path_ = 'MADatasets/'+ database+ '.mat'
Xdata = sio.loadmat(path_)
Xdata.keys()
[7]:
dict_keys(['__header__', '__version__', '__globals__', 'X', 'y', 'Y', 'iAnn', 'Exp', 'idxtr', 'idxte'])
[8]:
X = Xdata['X']
Y = Xdata['Y']
t = Xdata['y'].reshape(-1)
print('X',X.shape,'t',t.shape,'Y',Y.shape)
X (178, 13) t (178,) Y (178, 5)
[9]:
t
[9]:
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3], dtype=uint8)
Labels Generation¶
[10]:
Y, iAnn, Lam_r = MA_Clas_Gen(X ,t, R=5, NrP=1)
/usr/local/lib/python3.8/dist-packages/sklearn/manifold/_t_sne.py:780: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2.
warnings.warn(
/usr/local/lib/python3.8/dist-packages/sklearn/manifold/_t_sne.py:790: FutureWarning: The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.
warnings.warn(
[11]:
Y = Y - 1
t = t - 1
[12]:
from sklearn.metrics import classification_report
for i in range(Y.shape[1]):
print('annotator',i+1)
print(classification_report(t,Y[:,i]))
unique, counts = np.unique(Y[:,i], return_counts=True)
plt.figure()
plt.bar(unique, counts)
# unique, counts = np.unique(Y_test[5], return_counts=True)
# plt.bar(unique, counts)
plt.title('Class Frequency for Y_true')
plt.xlabel('Class')
plt.ylabel('Frequency')
annotator 1
precision recall f1-score support
0 0.86 1.00 0.92 59
1 0.90 0.90 0.90 71
2 0.95 0.75 0.84 48
accuracy 0.89 178
macro avg 0.90 0.88 0.89 178
weighted avg 0.90 0.89 0.89 178
annotator 2
precision recall f1-score support
0 0.55 0.95 0.70 59
1 0.53 0.23 0.32 71
2 0.47 0.46 0.46 48
accuracy 0.53 178
macro avg 0.52 0.54 0.49 178
weighted avg 0.52 0.53 0.48 178
annotator 3
precision recall f1-score support
0 0.55 0.27 0.36 59
1 0.69 0.85 0.76 71
2 0.58 0.75 0.65 48
accuracy 0.63 178
macro avg 0.61 0.62 0.59 178
weighted avg 0.61 0.63 0.60 178
annotator 4
precision recall f1-score support
0 0.60 0.73 0.66 59
1 0.59 0.31 0.41 71
2 0.42 0.60 0.50 48
accuracy 0.53 178
macro avg 0.54 0.55 0.52 178
weighted avg 0.55 0.53 0.51 178
annotator 5
precision recall f1-score support
0 0.03 0.02 0.02 59
1 0.44 0.61 0.51 71
2 0.20 0.17 0.18 48
accuracy 0.29 178
macro avg 0.22 0.26 0.24 178
weighted avg 0.24 0.29 0.26 178
Split data¶
[13]:
import numpy.matlib
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
Ns = 1
ss = ShuffleSplit(n_splits=Ns, test_size=0.3,random_state =123)
for train_index, test_index in ss.split(X):
print(test_index)
X_train, X_test,Y_train,Y_test = X[train_index,:], X[test_index,:],Y[train_index,:], Y[test_index,:]
Y_true_train, Y_true_test = t[train_index].reshape(-1,1), t[test_index].reshape(-1,1)
print(X_train.shape, Y_train.shape, Y_true_train.shape)
[138 60 137 77 90 159 41 136 166 93 154 141 164 33 31 152 82 122
42 87 139 147 151 162 79 169 157 95 53 10 23 37 134 110 167 120
142 8 72 114 148 163 36 20 121 28 13 88 24 100 127 143 144 85]
(124, 13) (124, 5) (124, 1)
Apply MinMaxScaler¶
[14]:
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
Testing the Class¶
[15]:
from sklearn.metrics import classification_report, balanced_accuracy_score, roc_auc_score
from sklearn.metrics import normalized_mutual_info_score, mutual_info_score, adjusted_mutual_info_score
import pandas as pd
l1 =0.1
NUM_RUNS =10
custom_loss = "GCE"
results = []
for i in range(NUM_RUNS):
print("iteration: " + str(i))
MA = Keras_MA_RCDNN(epochs=100,batch_size=128,R=5, K=len(np.unique(Y_true_train)), dropout=0.25, learning_rate=0.001,optimizer='Adam',
l1_param=l1, validation_split=0, verbose=0, q=0.3 )
#MA = Keras_MA_RCDNN(epochs=100,batch_size=128,R=5, K=len(np.unique(Y_true_train)), dropout=0.25, learning_rate=0.001,optimizer='Adam',
# l1_param=l1, validation_split=0, verbose=0, q=0.3, neurons=4, loss = custom_loss )
MA.fit(X_train, Y_train)
MA.plot_history()
# Generate the predictions for the current run
pred_2 = MA.predict(X_test)
acc, auc, auc_sk, nmi, bacc = evaluation_metrics(Y_true_test, pred_2[:,Y.shape[1]:], print_result=False) # mi, ami,
# Save the results for the current run to the list of dictionaries
results.append({
#'run': i,
'accuracy': acc,
'balanced_accuracy': bacc,
# 'mutual_information': mi,
'normalized_mutual_information': nmi,
# 'adjusted_mutual_information': ami,
'auc_tensorflow': auc,
'auc_scikit_learn': auc_sk,
})
# Convert the list of dictionaries to a DataFrame
df = np.round(pd.DataFrame(results)*100, 2)
# Calculate the mean and standard deviation of each metric
mean = np.round(df.mean(),2)
std = np.round(df.std(),2)
iteration: 0
2/2 [==============================] - 0s 7ms/step
iteration: 1
2/2 [==============================] - 0s 3ms/step
iteration: 2
2/2 [==============================] - 0s 5ms/step
iteration: 3
2/2 [==============================] - 0s 4ms/step
iteration: 4
WARNING:tensorflow:5 out of the last 9 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7ff2bc09d9d0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2/2 [==============================] - 0s 7ms/step
iteration: 5
WARNING:tensorflow:6 out of the last 11 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7ff2b4ce9f70> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2/2 [==============================] - 0s 8ms/step
iteration: 6
2/2 [==============================] - 0s 3ms/step
iteration: 7
2/2 [==============================] - 0s 3ms/step
iteration: 8
2/2 [==============================] - 0s 3ms/step
iteration: 9
2/2 [==============================] - 0s 3ms/step
[16]:
df
[16]:
accuracy | balanced_accuracy | normalized_mutual_information | auc_tensorflow | auc_scikit_learn | |
---|---|---|---|---|---|
0 | 96.30 | 95.45 | 88.72 | 100.0 | 100.0 |
1 | 96.30 | 95.45 | 88.72 | 100.0 | 100.0 |
2 | 98.15 | 97.73 | 93.18 | 100.0 | 100.0 |
3 | 92.59 | 90.91 | 82.15 | 100.0 | 100.0 |
4 | 92.59 | 90.91 | 82.15 | 100.0 | 100.0 |
5 | 100.00 | 100.00 | 100.00 | 100.0 | 100.0 |
6 | 98.15 | 97.73 | 93.18 | 100.0 | 100.0 |
7 | 96.30 | 95.45 | 88.72 | 100.0 | 100.0 |
8 | 96.30 | 95.45 | 88.72 | 100.0 | 100.0 |
9 | 98.15 | 97.73 | 93.18 | 100.0 | 100.0 |
[17]:
mean
[17]:
accuracy 96.48
balanced_accuracy 95.68
normalized_mutual_information 89.87
auc_tensorflow 100.00
auc_scikit_learn 100.00
dtype: float64
[18]:
std
[18]:
accuracy 2.38
balanced_accuracy 2.92
normalized_mutual_information 5.37
auc_tensorflow 0.00
auc_scikit_learn 0.00
dtype: float64
[19]:
result_df = pd.concat([mean.rename('Mean'), std.rename('Std')], axis=1)
[20]:
result_df
[20]:
Mean | Std | |
---|---|---|
accuracy | 96.48 | 2.38 |
balanced_accuracy | 95.68 | 2.92 |
normalized_mutual_information | 89.87 | 5.37 |
auc_tensorflow | 100.00 | 0.00 |
auc_scikit_learn | 100.00 | 0.00 |
[21]:
# Save the DataFrame to an excel file
result_df.to_excel(database + custom_loss + ".xlsx")
[21]: