[ ]:
import tensorflow as tf
import numpy as np
from scipy.io import loadmat
from sklearn.preprocessing import StandardScaler
from scipy.io import savemat
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import OneHotEncoder
from scipy.stats import mode
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from sklearn.decomposition import PCA
def ook(t):
lb = LabelBinarizer()
y_ook = lb.fit_transform(t)
if len(np.unique(t))==2:
y_ook = np.concatenate((1-y_ook.astype(bool), y_ook), axis = 1)
return y_ook
def scheduler1(step = 10, ratio = 1.2):
def scheduler(epoch, lr):
if epoch % step == 0 and epoch>1:
return lr/ratio
else:
return lr
return scheduler
[ ]:
# -*- coding: utf-8 -*-
import tensorflow as tf
import matplotlib.pyplot as plt
#from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import tensorflow_probability as tfp
from tensorflow.keras import regularizers
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin
from sklearn.model_selection import train_test_split
tf.keras.backend.clear_session()
#tf.random.set_seed(42)
import types
import tempfile
class Keras_MA_GCCE(): #transformer no va
#Constructor __init__. Special method: identified by a double underscore at either side of their name
#work in the background
# initialize data members to the object. lets the class initialize the object’s attributes and serves no other purpose.
def __init__(self,epochs=100,batch_size=30,R=5, K=2, dropout=0.5, learning_rate=1e-3,optimizer='Adam',
l1_param=0, validation_split=0.3, verbose=1, q = 0.1, neurons=1):
self.epochs=epochs
self.dropout=dropout
self.batch_size = batch_size
self.learning_rate=learning_rate
self.l1_param=l1_param
self.l2_param=l1_param
self.validation_split = validation_split
self.verbose = verbose
self.optimizer = optimizer
self.R=R
self.K=K
self.q = q
self.neurons = neurons
def GCCE_MA_loss(self, y_true, y_pred):
# print(y_true,y_pred)
# q = 0.1
pred = y_pred[:, self.R:]
pred = tf.clip_by_value(pred, clip_value_min=1e-9, clip_value_max=1)
ann_ = y_pred[:, :self.R]
# ann_ = tf.clip_by_value(ann_, clip_value_min=1e-9, clip_value_max=1-1e-9)
Y_true = tf.one_hot(tf.cast(y_true, dtype=tf.int32), depth=self.K, axis=1)
#tf.print(tf.reduce_sum(Y_true[1,:,0]))
#tf.print(y_true[1,0])
Y_hat = tf.repeat(tf.expand_dims(pred,-1), self.R, axis = -1)
p_gcce = Y_true*(1 - Y_hat**self.q)/self.q
temp1 = ann_*tf.math.reduce_sum(p_gcce, axis=1)
# p_logreg = tf.math.reduce_prod(tf.math.pow(Y_hat, Y_true), axis=1)
# temp1 = ann_*tf.math.log(p_logreg)
# temp2 = (1 - ann_)*tf.math.log(1/K)*tf.reduce_sum(Y_true,axis=1)
# aux = tf.repeat(tf.reduce_sum(pred*tf.math.log(pred),axis=1,keepdims=True), R, axis = 1)
# tf.print(tf.shape(aux))
# print(tf.shape(aux))
# temp2 = (1 - ann_)*aux*tf.reduce_sum(Y_true,axis=1)
# temp2 = (tf.ones(tf.shape(ann_)) - ann_)*tf.math.log(1/K)
# print(tf.reduce_mean(Y_true,axis=1).numpy())
# Y_true_1 = tf.clip_by_value(Y_true, clip_value_min=1e-9, clip_value_max=1)
# p_logreg_inv = tf.math.reduce_prod(tf.math.pow(Y_true_1, Y_hat), axis=1)
# temp2 = (1 - ann_)*tf.math.log(p_logreg_inv)
temp2 = (1 - ann_)*(1-(1/self.K)**self.q)/self.q*tf.reduce_sum(Y_true,axis=1)
return tf.math.reduce_sum((temp1 + temp2))
def custom_RCDNN_loss(self, y_true, y_pred): #ytrue \in N x R, ypred \in N x (R+K) -> PRI->JULI
#Input ytrue: samples (N) x annotators (R)
#Input ypred: samples (N) x annotators+classes (R+K)
#Ground truth estimation samples (N) x Classes(K)
pred = y_pred[:,self.R:]
pred = tf.clip_by_value(pred, clip_value_min=1e-9, clip_value_max=1-1e-9) #estabilidad numerica de la funcion de costo
# Annotators reliability prediction: samples (N) x annotators (R)
ann_ = y_pred[:,:self.R]
#Y_true 1-K: samples (N) x Classes(1-K), annotators (R)
Y_true = tf.one_hot(tf.cast(y_true, dtype=tf.int32), depth=self.K, axis=1)
#Y_pred 1 - K: samples (N) x Classes(1-K), annotators (R)
Y_hat = tf.repeat(tf.expand_dims(pred,-1), self.R, axis = -1)
#loss computation
p_logreg = tf.math.reduce_prod(tf.math.pow(Y_hat, Y_true), axis=1)
temp1 = ann_*tf.math.log(p_logreg)
temp2 = (1 - ann_)*tf.math.log(1/self.K)*tf.reduce_sum(Y_true,axis=1)
# temp2 = (tf.ones(tf.shape(ann_)) - ann_)*tf.math.log(1/K)
# print(tf.reduce_mean(Y_true,axis=1).numpy())
return -tf.math.reduce_sum((temp1 + temp2))
def PRI_MA_loss(self, y_true, y_pred): #, -> PRI
#Input ytrue: samples (N) x annotators (R)
#Input ypred: samples (N) x annotators+classes (R+K)
#PRI MA
cce_M = tf.keras.losses.CategoricalCrossentropy(reduction='none',axis=-1)
cce_C = tf.keras.losses.CategoricalCrossentropy(reduction='none',axis=1)
#N = tf.cast(y_true.shape[0],dtype=tf.int32)
N = tf.shape(y_true)[0]
#Ground truth estimation samples (N) x Classes(K)
y_pred_tf = y_pred[:,self.R:]
# Annotators reliability prediction: samples (N) x annotators (R)
lambda_R_tf = y_pred[:,:self.R] #tf.ones(shape=(N,R))
#Y_true 1 K: samples (N) x Classes(1-K), annotators (R)
Y_true_1K = tf.one_hot(tf.cast(y_true,dtype=tf.int32),depth=self.K,axis=1)
#Y_pred 1 - K: samples (N) x Classes(1-K), annotators (R)
Y_hat = tf.repeat(tf.expand_dims(y_pred_tf,-1), self.R, axis = -1)
#marginal entropy
#cce along the K classes -> ypred_tf
Hm_ = cce_M(y_pred_tf,y_pred_tf)
#cross entropy
#cce along the K classes -> Y_hat
Hc_ = cce_C(Y_true_1K,Y_hat)
#PRI MA computation
LH = tf.linalg.matmul(lambda_R_tf,Hc_,transpose_a=True) # \Lambda^\top H_c
temp1 = tf.linalg.trace(LH) #trace(LH)
Hm1L = tf.linalg.matmul(tf.ones(shape=(N,self.R))-lambda_R_tf,tf.ones(shape=(self.R,1)))# 1_N 1_R^\top - Lambda
Hm_t = tf.reshape(Hm_,shape=(N,1))
temp2 = tf.squeeze(tf.linalg.matmul(Hm_t,Hm1L,transpose_a=True)) # Hm^\top Hm1L
loss_c = temp1+ temp2 #return loss
return loss_c
def fit(self, X, Y):
initializer = tf.keras.initializers.GlorotNormal(seed=100)
#input X numpy array first dimension samples (N)x features (P)
#input Y numpy array vector len = samples (N) x annotators (R)
P = X.shape[1]
if self.optimizer == "Adam":
opt = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
elif self.optimizer == "SGD":
opt = tf.keras.optimizers.SGD(learning_rate=self.learning_rate)
elif self.optimizer == "RMSprop":
opt = tf.keras.optimizers.RMSprop(learning_rate=self.learning_rate)
elif self.optimizer == "Adagrad":
opt = tf.keras.optimizers.Adagrad(learning_rate=self.learning_rate)
else:
opt=self.optimizer
#capa de entrada
input_l = tf.keras.layers.Input(shape=(X_train.shape[1]), name='entrada')
input_l_b = tf.keras.layers.BatchNormalization()(input_l)
input_l_do = tf.keras.layers.Dropout(rate=self.dropout)(input_l_b)
#capas densas
h1 = tf.keras.layers.Dense(int(P*1.5*(self.K+self.R)),activation='selu',name='h1', bias_initializer='zeros', kernel_initializer=initializer,
kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param))(input_l_do)#argumento de entrada
# h1 = tf.keras.layers.BatchNormalization()(h1)
h1 = tf.keras.layers.AlphaDropout(rate=self.dropout)(h1)
h2 = tf.keras.layers.Dense(int(P*(self.K+self.R)),activation='selu',name='h2', bias_initializer='zeros', kernel_initializer=initializer,
kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param))(h1)
#h2 = tf.keras.layers.BatchNormalization()(h2)
dout = tf.keras.layers.AlphaDropout(rate=self.dropout)(h2)
output_R = tf.keras.layers.Dense(self.R,activation="sigmoid", bias_initializer='zeros', kernel_initializer=initializer,
kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param), name= 'out_R_GCCE' )(dout)
output_K = tf.keras.layers.Dense(self.K,activation="softmax", bias_initializer='zeros', kernel_initializer=initializer,
kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param), name= 'out_K_GCCE')(dout)
output = tf.keras.layers.concatenate([output_R, output_K])
self.model = tf.keras.Model(inputs= input_l,outputs=output)
self.model.compile(loss=self.GCCE_MA_loss, optimizer=opt)
callback1 = tf.keras.callbacks.TerminateOnNaN()
callback2 = tf.keras.callbacks.LearningRateScheduler(scheduler1(ratio = 1))
#callback2 = tf.keras.callbacks.LearningRateScheduler(scheduler2)
callback3 = tf.keras.callbacks.EarlyStopping(monitor="loss", min_delta=1e-2,
patience=15, verbose=0, mode="auto",
baseline=None, restore_best_weights=True)
self.history = self.model.fit(X, Y, epochs=self.epochs, validation_split=self.validation_split, #
batch_size=self.batch_size, callbacks = [callback1, callback2], verbose=self.verbose, shuffle = True)
return self
# def fit(self, X, Y):
# #input X numpy array first dimension samples (N)x features (P)
# #input Y numpy array vector len = samples (N) x annotators (R)
# P = X.shape[1]
# if self.optimizer == "Adam":
# opt = tf.keras.optimizers.Adam(learning_rate=self.learning_rate, clipnorm=1.0)
# elif self.optimizer == "SGD":
# opt = tf.keras.optimizers.SGD(learning_rate=self.learning_rate, clipnorm=1.0)
# else:
# opt=self.optimizer
# #capa de entrada
# input_l = tf.keras.layers.Input(shape=(X_train.shape[1]), name='entrada')
# #capas densas
# h1 = tf.keras.layers.Dense(int(P*4*(self.K+self.R)),activation='selu',name='h1',
# kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param))(input_l)#argumento de entrada
# h2 = tf.keras.layers.Dense(int(P*(self.K+self.R)),activation='selu',name='h2',
# kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param))(h1)
# dout = tf.keras.layers.Dropout(rate=self.dropout)(h2)
# output_R = tf.keras.layers.Dense(self.R,activation="sigmoid",
# kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param), name= 'out_R_GCCE' )(dout)
# output_K = tf.keras.layers.Dense(self.K,activation="softmax",
# kernel_regularizer=tf.keras.regularizers.l1_l2(l1=self.l1_param,l2=self.l2_param), name= 'out_K_GCCE')(dout)
# output = tf.keras.layers.concatenate([output_R, output_K])
# self.model = tf.keras.Model(inputs= input_l,outputs=output)
# self.model.compile(loss=self.GCCE_MA_loss, optimizer=opt)
# self.history = self.model.fit(X, Y, epochs=self.epochs, validation_split=self.validation_split, #
# batch_size=self.batch_size,verbose=self.verbose)
# return self
def predict(self, X, *_):
#input X numpy array first dimension samples (N)x features (P)
return self.model.predict(X)
def fit_predict(self,X,y):
#input X numpy array first dimension samples (N)x features (P)
#input Y numpy array vector len = samples (N) x annotators (R)
self.fit(X,y)
return self.predict(X)
def model_MC(self, X, *_):
#input X numpy array first dimension samples (N)x features (P)
return self.model(X, training=True)
#graphics
def plot_history(self):
pd.DataFrame(self.history.history).plot(figsize=(8, 5))
plt.grid(True)
#plt.gca().set_ylim(0, 1)
#save_fig("keras_learning_curves_plot")
plt.show()
return
def set_params(self, **parameters):
for parameter, value in parameters.items():
setattr(self, parameter, value)
return self
def get_params(self, deep=True):
return { 'l1_param':self.l1_param, 'dropout':self.dropout, 'optimizer':self.optimizer,
'learning_rate':self.learning_rate, 'batch_size':self.batch_size,
'epochs':self.epochs, 'verbose':self.verbose, 'validation_split':self.validation_split,
'R':self.R, 'K':self.K, 'q':self.q
}
return self
[ ]:
#cargar datos desde drive acceso libre
FILEID = "1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW"
#https://drive.google.com/file/d/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW/view?usp=sharing
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id="$FILEID -O MADatasets.zip && rm -rf /tmp/cookies.txt
!unzip -o MADatasets.zip
!dir
--2023-02-09 14:41:03-- https://docs.google.com/uc?export=download&confirm=t&id=1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW
Resolving docs.google.com (docs.google.com)... 142.251.111.102, 142.251.111.100, 142.251.111.138, ...
Connecting to docs.google.com (docs.google.com)|142.251.111.102|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/spjmpdq4kqaek5vrolkl8o9k2aev7qos/1675953600000/07591141114418430227/*/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW?e=download&uuid=60e99211-de0a-4973-b29e-ca1cdffd3fa2 [following]
Warning: wildcards not supported in HTTP.
--2023-02-09 14:41:03-- https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/spjmpdq4kqaek5vrolkl8o9k2aev7qos/1675953600000/07591141114418430227/*/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW?e=download&uuid=60e99211-de0a-4973-b29e-ca1cdffd3fa2
Resolving doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)... 142.251.167.132, 2607:f8b0:4004:c1d::84
Connecting to doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)|142.251.167.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 156530728 (149M) [application/zip]
Saving to: ‘MADatasets.zip’
MADatasets.zip 100%[===================>] 149.28M 104MB/s in 1.4s
2023-02-09 14:41:05 (104 MB/s) - ‘MADatasets.zip’ saved [156530728/156530728]
Archive: MADatasets.zip
inflating: MADatasets/util.py
inflating: MADatasets/Iris1.mat
inflating: MADatasets/Integra_Labels.mat
inflating: MADatasets/MAGenerationClassification.py
inflating: MADatasets/Voice.mat
inflating: MADatasets/Iris.mat
inflating: MADatasets/Sinthetic.mat
inflating: MADatasets/MAGenerationClassification_1.py
inflating: MADatasets/Bupa1.mat
inflating: MADatasets/TicTacToe1.mat
inflating: MADatasets/Wine.mat
inflating: MADatasets/Breast1.mat
inflating: MADatasets/Breast.mat
inflating: MADatasets/Music.mat
inflating: MADatasets/Pima.mat
inflating: MADatasets/Ionosphere.mat
inflating: MADatasets/TicTacToe.mat
inflating: MADatasets/VoiceData.m
inflating: MADatasets/util_1.py
inflating: MADatasets/Ionosphere1.mat
inflating: MADatasets/__pycache__/util_1.cpython-37.pyc
inflating: MADatasets/Bupa.mat
inflating: MADatasets/Wine1.mat
inflating: MADatasets/__pycache__/util.cpython-37.pyc
inflating: MADatasets/Pima1.mat
inflating: MADatasets/Segmentation1.mat
inflating: MADatasets/Western.mat
inflating: MADatasets/Integra_Preprocesamiento_Seg_Caracterizacion_time_frec.mat
inflating: MADatasets/Western1.mat
inflating: MADatasets/Segmentation.mat
inflating: MADatasets/Skin_NonSkin.mat
inflating: MADatasets/Skin_NonSkin1.mat
inflating: MADatasets/Occupancy1.mat
inflating: MADatasets/Polarity.mat
inflating: MADatasets/Occupancy.mat
MADatasets MADatasets.zip sample_data
Load Data¶
[ ]:
#load data
import scipy.io as sio
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf #importar tensorflow
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import numpy as np
database = 'Music' #['bupa1', 'breast-cancer-wisconsin1','pima-indians-diabetes1', 'ionosphere1', 'tic-tac-toe1', 'iris1', 'wine1', 'segmentation1']
path_ = 'MADatasets/'+ database+ '.mat'
Xdata = sio.loadmat(path_)
Xdata.keys()
dict_keys(['__header__', '__version__', '__globals__', 'Xtest', 'Xtrain', 'Ytrain', 'vRef', 'ytest', 'ytrain'])
[ ]:
Xtrain = Xdata['Xtrain']
Xtest = Xdata['Xtest']
Y_train = Xdata['Ytrain']
vref = Xdata['vRef']
Y_true_train= Xdata['ytrain'].reshape(-1)
Y_true_test = Xdata['ytest'].reshape(-1)
print('Xtrain',Xtrain.shape, 'Xtest',Xtest.shape, 'Y_true_train', Y_true_train.shape, 'Y_train',Y_train.shape, 'Y_true_test',Y_true_test.shape)
Xtrain (700, 124) Xtest (300, 124) Y_true_train (700,) Y_train (700, 44) Y_true_test (300,)
[ ]:
Ytrain_sel = np.zeros((Y_train.shape[0], 9))
np.unique(Y_train[:,0].astype(int))
k = 0
for i in range(Y_train.shape[1]):
elm_count = np.count_nonzero(Y_train[:,i]== -1e+20)
#print(elm_count)
if elm_count <= Y_train.shape[0]*0.85:
Ytrain_sel[:,k] = Y_train[:,i]
k=k+1
#print( "anotador", str(i), ' aprobo umbral del 15%')
[ ]:
N = Y_true_train.shape[0]
Nk = 1
Ntr = round(N)
train_index = np.zeros((Ntr))
rng = np.random.default_rng(seed=123)
aux = rng.permutation(N, axis=0)
train_index = aux[0:Ntr]
print(train_index.shape)
X_train, Y_train, = Xtrain[train_index,:], Ytrain_sel[train_index,:]
Y_true_train = Y_true_train[train_index].reshape(-1,1)
train_index
(700,)
array([ 64, 404, 641, 400, 9, 321, 458, 151, 287, 689, 535, 699, 494,
499, 460, 413, 587, 51, 492, 234, 54, 175, 238, 359, 10, 670,
96, 13, 329, 520, 94, 199, 442, 319, 679, 289, 654, 463, 76,
471, 212, 401, 402, 43, 274, 337, 348, 567, 639, 551, 547, 617,
382, 515, 286, 305, 443, 544, 370, 488, 323, 207, 288, 230, 555,
198, 36, 481, 398, 4, 342, 495, 486, 446, 114, 678, 134, 125,
432, 550, 275, 185, 657, 137, 293, 562, 97, 478, 627, 386, 415,
78, 484, 614, 57, 225, 192, 642, 390, 31, 222, 336, 19, 239,
56, 130, 221, 462, 202, 613, 171, 447, 191, 360, 16, 211, 394,
621, 563, 695, 646, 663, 389, 554, 373, 680, 30, 150, 561, 656,
475, 284, 611, 354, 668, 263, 676, 684, 553, 278, 630, 669, 594,
218, 309, 21, 565, 514, 372, 560, 452, 273, 308, 371, 582, 482,
606, 109, 294, 522, 383, 575, 95, 327, 368, 176, 615, 158, 392,
418, 143, 167, 479, 129, 517, 533, 546, 503, 111, 23, 343, 504,
42, 0, 451, 427, 58, 665, 586, 155, 196, 40, 181, 539, 411,
119, 407, 28, 698, 696, 673, 1, 419, 416, 524, 250, 385, 498,
50, 525, 485, 271, 417, 186, 126, 120, 344, 160, 46, 391, 11,
448, 113, 477, 45, 357, 459, 380, 80, 306, 697, 253, 362, 588,
88, 692, 466, 548, 166, 607, 595, 545, 104, 661, 132, 558, 312,
12, 242, 101, 264, 530, 428, 209, 644, 605, 441, 435, 232, 300,
107, 624, 536, 425, 208, 317, 69, 602, 626, 307, 254, 578, 108,
180, 636, 172, 631, 204, 241, 603, 333, 542, 326, 658, 85, 409,
260, 141, 2, 395, 685, 513, 87, 162, 258, 374, 249, 650, 528,
379, 652, 224, 572, 169, 674, 381, 456, 83, 65, 177, 133, 648,
660, 213, 569, 159, 634, 612, 220, 439, 589, 472, 61, 574, 269,
318, 502, 622, 14, 252, 436, 251, 165, 123, 384, 335, 361, 295,
255, 340, 675, 328, 197, 100, 235, 99, 559, 376, 690, 297, 518,
422, 688, 541, 683, 632, 543, 557, 426, 189, 187, 35, 527, 369,
148, 454, 194, 182, 304, 74, 397, 651, 39, 467, 124, 387, 506,
564, 236, 591, 53, 483, 566, 245, 403, 584, 316, 430, 465, 346,
121, 378, 79, 464, 437, 474, 147, 246, 210, 406, 223, 568, 473,
282, 440, 599, 608, 142, 195, 358, 694, 110, 146, 59, 334, 117,
29, 511, 102, 248, 37, 215, 549, 444, 17, 188, 73, 163, 325,
157, 577, 55, 106, 38, 200, 410, 681, 84, 365, 647, 173, 279,
276, 521, 666, 664, 620, 596, 154, 140, 91, 52, 449, 18, 581,
156, 72, 310, 445, 296, 90, 193, 322, 256, 672, 618, 347, 375,
597, 635, 47, 526, 349, 280, 330, 205, 507, 610, 122, 183, 487,
179, 138, 431, 63, 489, 377, 351, 26, 423, 314, 265, 609, 388,
92, 352, 523, 41, 655, 240, 127, 25, 217, 60, 592, 420, 315,
299, 320, 508, 625, 34, 450, 429, 103, 616, 540, 598, 324, 170,
272, 48, 468, 469, 691, 7, 556, 116, 178, 604, 89, 86, 509,
583, 532, 366, 15, 516, 501, 259, 184, 490, 341, 290, 470, 261,
237, 226, 8, 22, 405, 396, 115, 257, 628, 247, 531, 131, 693,
537, 353, 339, 653, 67, 424, 505, 270, 414, 145, 68, 623, 438,
600, 677, 638, 640, 593, 399, 480, 421, 49, 161, 20, 77, 500,
227, 643, 277, 579, 81, 667, 244, 62, 75, 267, 457, 367, 128,
331, 206, 229, 649, 332, 682, 671, 629, 164, 262, 619, 112, 529,
203, 71, 3, 313, 27, 512, 687, 434, 570, 491, 174, 311, 168,
281, 497, 292, 149, 393, 118, 363, 70, 552, 66, 455, 519, 32,
637, 139, 303, 534, 364, 266, 98, 345, 412, 82, 350, 135, 201,
408, 601, 356, 576, 231, 216, 510, 24, 476, 538, 144, 355, 590,
580, 93, 659, 338, 571, 136, 298, 152, 686, 645, 219, 285, 453,
283, 228, 573, 301, 190, 6, 33, 44, 493, 496, 268, 105, 233,
633, 243, 585, 214, 433, 5, 302, 662, 291, 153, 461])
[ ]:
N = Y_true_test.shape[0]
Nk = 1
Nte = round(N)
test_index = np.zeros((Nte))
#test_index = np.zeros((N-Ntr))
rng = np.random.default_rng( seed=123)
#for i in range(Nk):
aux = rng.permutation(N, axis=0)
#print(aux)
test_index = aux[0:Nte]
#print(idxtr)
#test_index= aux[Ntr+1:]
print(test_index.shape)
X_test = Xtest[test_index,:]
Y_true_test = Y_true_test[test_index].reshape(-1,1)
test_index
(300,)
array([273, 192, 169, 162, 91, 120, 38, 251, 195, 65, 290, 249, 99,
296, 46, 239, 258, 72, 134, 189, 116, 53, 280, 246, 205, 247,
113, 87, 151, 278, 240, 80, 18, 54, 129, 245, 132, 2, 196,
283, 202, 224, 145, 130, 263, 74, 52, 127, 104, 126, 212, 51,
241, 235, 250, 31, 262, 95, 9, 138, 114, 28, 101, 238, 41,
185, 60, 167, 259, 161, 43, 256, 204, 163, 265, 157, 45, 203,
217, 140, 277, 171, 16, 109, 84, 223, 23, 35, 260, 257, 267,
90, 270, 103, 284, 39, 50, 184, 170, 287, 79, 194, 292, 123,
227, 122, 148, 188, 299, 48, 210, 264, 117, 15, 176, 165, 63,
56, 106, 85, 197, 69, 166, 154, 295, 96, 220, 248, 42, 175,
146, 115, 1, 293, 206, 172, 215, 97, 83, 281, 275, 159, 14,
218, 234, 160, 236, 213, 181, 143, 102, 49, 34, 180, 86, 77,
242, 279, 21, 211, 81, 269, 186, 229, 298, 193, 252, 62, 230,
11, 111, 128, 131, 75, 4, 8, 208, 37, 19, 182, 20, 92,
76, 286, 12, 237, 164, 272, 155, 285, 207, 112, 17, 198, 294,
142, 71, 183, 3, 173, 119, 57, 27, 0, 289, 141, 178, 58,
209, 174, 110, 67, 55, 168, 274, 25, 36, 276, 266, 149, 137,
244, 88, 200, 107, 30, 29, 70, 40, 66, 199, 7, 32, 125,
139, 47, 22, 221, 158, 124, 108, 10, 98, 261, 118, 156, 82,
94, 13, 135, 201, 177, 271, 89, 226, 100, 64, 231, 179, 216,
24, 253, 26, 144, 78, 222, 68, 93, 147, 59, 136, 152, 288,
133, 232, 187, 191, 297, 282, 255, 225, 254, 228, 61, 190, 6,
33, 44, 268, 105, 233, 121, 243, 73, 214, 5, 150, 219, 291,
153])
Apply MinMaxScaler¶
[ ]:
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
Testing the Class¶
[ ]:
Y_true_train = Y_true_train-1
Y_true_test = Y_true_test -1
Y_train[Y_train != vref] -= 1
[ ]:
from sklearn.metrics import classification_report, balanced_accuracy_score, roc_auc_score
from sklearn.metrics import normalized_mutual_info_score, mutual_info_score, adjusted_mutual_info_score
l1 =0.001
NUM_RUNS =10
ACC = np.zeros(NUM_RUNS)
AUC = np.zeros(NUM_RUNS)
MI = np.zeros(NUM_RUNS)
NMI = np.zeros(NUM_RUNS)
AMI = np.zeros(NUM_RUNS)
BACC = np.zeros(NUM_RUNS)
for i in range(NUM_RUNS): #10
print("iteration: " + str(i))
MA = Keras_MA_GCCE(epochs=200,batch_size= 64,R= Ytrain_sel.shape[1], K=len(np.unique(Y_true_train)), dropout=0.25, learning_rate=0.001,optimizer='Adam',
l1_param=l1, validation_split=0, verbose=0, q=0.001, neurons =0.5)
MA.fit(X_train, Y_train)
MA.plot_history()
#Accuracy
pred_2 = MA.predict(X_test)
report = classification_report(Y_true_test.ravel(), pred_2[:,Y_train.shape[1]:].argmax(axis=1),output_dict=True)
ACC[i] = report['accuracy']
print("Validation ACC: %.4f" % (float(ACC[i])))
# balanced. Accurcy
BACC[i] = balanced_accuracy_score(Y_true_test.squeeze(), pred_2[:,Y_train.shape[1]:].argmax(axis=1).squeeze(), adjusted=True)
print("Validation Balanced_ACC: %.4f" % (float(BACC[i])))
#MI
MI[i] = mutual_info_score(Y_true_test.squeeze(), pred_2[:,Y_train.shape[1]:].argmax(axis=1).squeeze())
print("Validation MI: %.4f" % (float(MI[i]),))
NMI[i] = normalized_mutual_info_score(Y_true_test.squeeze(), pred_2[:,Y_train.shape[1]:].argmax(axis=1).squeeze())
print("Validation Normalized MI: %.4f" % (float(NMI[i]),))
AMI[i]= adjusted_mutual_info_score(Y_true_test.squeeze(), pred_2[:,Y_train.shape[1]:].argmax(axis=1).squeeze())
print("Validation Adjusted MI: %.4f" % (float(AMI[i]),))
#AUC
val_AUC_metric = tf.keras.metrics.AUC( from_logits = True)
# val_logits =MA.predict(X_test) # model(X_test, training=False)
# tf.print(y_batch_val)
val_AUC_metric.update_state(Y_true_test, pred_2[:,Y_train.shape[1]:].argmax(axis=1).astype('float'))
val_AUC = val_AUC_metric.result()
val_AUC_metric.reset_states()
val_AUC = val_AUC.numpy()
print("Validation aUc: %.4f" % (float(val_AUC),))
AUC[i] = val_AUC
val_AUC1 = roc_auc_score(ook(Y_true_test), pred_2[:,Y_train.shape[1]:])
print("Validation aUc_Sklearn: %.4f" % (float(val_AUC1),))
AUC[i] = val_AUC1
iteration: 0
10/10 [==============================] - 0s 2ms/step
Validation ACC: 0.6633
Validation Balanced_ACC: 0.6322
Validation MI: 1.2925
Validation Normalized MI: 0.5746
Validation Adjusted MI: 0.5438
Validation aUc: 0.7610
Validation aUc_Sklearn: 0.9385
iteration: 1
10/10 [==============================] - 0s 2ms/step
Validation ACC: 0.6233
Validation Balanced_ACC: 0.5776
Validation MI: 1.2367
Validation Normalized MI: 0.5585
Validation Adjusted MI: 0.5260
Validation aUc: 0.6000
Validation aUc_Sklearn: 0.9407
iteration: 2
10/10 [==============================] - 0s 3ms/step
Validation ACC: 0.6433
Validation Balanced_ACC: 0.6086
Validation MI: 1.2735
Validation Normalized MI: 0.5648
Validation Adjusted MI: 0.5335
Validation aUc: 0.7430
Validation aUc_Sklearn: 0.9428
iteration: 3
10/10 [==============================] - 0s 3ms/step
Validation ACC: 0.6533
Validation Balanced_ACC: 0.6180
Validation MI: 1.3022
Validation Normalized MI: 0.5747
Validation Adjusted MI: 0.5443
Validation aUc: 0.7093
Validation aUc_Sklearn: 0.9348
iteration: 4
10/10 [==============================] - 0s 2ms/step
Validation ACC: 0.6533
Validation Balanced_ACC: 0.6127
Validation MI: 1.2881
Validation Normalized MI: 0.5732
Validation Adjusted MI: 0.5423
Validation aUc: 0.7078
Validation aUc_Sklearn: 0.9413
iteration: 5
10/10 [==============================] - 0s 2ms/step
Validation ACC: 0.6500
Validation Balanced_ACC: 0.6080
Validation MI: 1.2461
Validation Normalized MI: 0.5499
Validation Adjusted MI: 0.5177
Validation aUc: 0.7553
Validation aUc_Sklearn: 0.9396
iteration: 6
10/10 [==============================] - 0s 3ms/step
Validation ACC: 0.6667
Validation Balanced_ACC: 0.6325
Validation MI: 1.3093
Validation Normalized MI: 0.5766
Validation Adjusted MI: 0.5464
Validation aUc: 0.6760
Validation aUc_Sklearn: 0.9467
iteration: 7
10/10 [==============================] - 0s 3ms/step
Validation ACC: 0.6500
Validation Balanced_ACC: 0.6081
Validation MI: 1.3183
Validation Normalized MI: 0.5869
Validation Adjusted MI: 0.5569
Validation aUc: 0.7296
Validation aUc_Sklearn: 0.9377
iteration: 8
10/10 [==============================] - 0s 2ms/step
Validation ACC: 0.7200
Validation Balanced_ACC: 0.6904
Validation MI: 1.3773
Validation Normalized MI: 0.6039
Validation Adjusted MI: 0.5758
Validation aUc: 0.7373
Validation aUc_Sklearn: 0.9531
iteration: 9
10/10 [==============================] - 0s 2ms/step
Validation ACC: 0.6400
Validation Balanced_ACC: 0.5957
Validation MI: 1.2719
Validation Normalized MI: 0.5727
Validation Adjusted MI: 0.5412
Validation aUc: 0.7535
Validation aUc_Sklearn: 0.9382
[ ]:
from sklearn.metrics import classification_report
pred_2 = MA.predict(X_test)
print(classification_report(Y_true_test.ravel(), pred_2[:,Y_train.shape[1]:].argmax(axis=1)))
10/10 [==============================] - 0s 2ms/step
precision recall f1-score support
0 0.94 0.45 0.61 33
1 0.87 0.96 0.92 28
2 0.60 0.69 0.64 26
3 0.73 0.29 0.41 28
4 0.96 0.59 0.73 37
5 0.93 0.90 0.91 29
6 1.00 0.44 0.61 25
7 0.40 0.83 0.54 36
8 0.62 0.70 0.66 30
9 0.34 0.50 0.41 28
accuracy 0.64 300
macro avg 0.74 0.64 0.64 300
weighted avg 0.74 0.64 0.64 300
[ ]:
print('Average Accuracy: ', np.round( ACC.mean(),4)*100)
print('Average std: ',np.round(np.std( ACC),4)*100)
print('==============================================')
print('Average AUC: ', np.round( AUC.mean(),4)*100)
print('Average AUC std: ',np.round(np.std( AUC),4)*100)
print('==============================================')
print('Average Balanced Accuracy: ', np.round( BACC.mean(),4)*100)
print('Average std: ',np.round(np.std( BACC),4)*100)
print('==============================================')
print('Average MI: ', np.round( MI.mean(),4)*100)
print('Average std: ',np.round(np.std(MI),4)*100)
print('==============================================')
print('Average Normalized MI: ', np.round( NMI.mean(),4)*100)
print('Average std: ',np.round(np.std(NMI),4)*100)
print('==============================================')
print('Average Ajdusted MI: ', np.round( AMI.mean(),4)*100)
print('Average std: ',np.round(np.std(AMI),4)*100)
Average Accuracy: 65.63
Average std: 2.41
==============================================
Average AUC: 94.13
Average AUC std: 0.5
==============================================
Average Balanced Accuracy: 61.839999999999996
Average std: 2.8400000000000003
==============================================
Average MI: 129.16
Average std: 3.7800000000000002
==============================================
Average Normalized MI: 57.36
Average std: 1.4000000000000001
==============================================
Average Ajdusted MI: 54.279999999999994
Average std: 1.52
[ ]: