[34]:
#!pip install tensorflow==2.7.0
#!pip install scikeras
Subclassing for RCDNN¶
[35]:
import tensorflow as tf
from sklearn.metrics import classification_report, accuracy_score, balanced_accuracy_score, roc_auc_score, mutual_info_score, normalized_mutual_info_score, adjusted_mutual_info_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import OneHotEncoder
from scipy.stats import mode
import numpy as np
def ook(t):
lb = LabelBinarizer()
y_ook = lb.fit_transform(t)
if len(np.unique(t))==2:
y_ook = np.concatenate((1-y_ook.astype(bool), y_ook), axis = 1)
return y_ook
def evaluation_metrics(y_true, y_pred, print_result=True):
acc = 0
auc = 0
auc_sk = 0
#mi = 0
nmi = 0
#ami = 0
bacc = 0
# Accuracy
#report = classification_report(y_pred.argmax(axis=1), y_true.ravel(), output_dict=True)
acc = accuracy_score( y_true.ravel(), y_pred.argmax(axis=1)) #report['accuracy']
# Balanced accuracy
bacc = balanced_accuracy_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze(), adjusted=True)
# # Mutual Information
# mi = mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())
# Normalized Mutual Information
nmi = normalized_mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())
# Adjusted Mutual Information
#ami = adjusted_mutual_info_score(y_true.squeeze(), y_pred.argmax(axis=1).squeeze())
# AUC (Tensorflow)
auc_metric = tf.keras.metrics.AUC(from_logits=True)
auc_metric.update_state(y_true, y_pred.argmax(axis=1).astype('float'))
auc = auc_metric.result().numpy()
auc_metric.reset_states()
# AUC (scikit-learn)
auc_sk = roc_auc_score(ook(y_true), y_pred)
if print_result:
print("Accuracy: {:.4f}".format(acc))
print("Balanced Accuracy: {:.4f}".format(bacc))
print("Mutual Information: {:.4f}".format(mi))
print("Normalized Mutual Information: {:.4f}".format(nmi))
print("Adjusted Mutual Information: {:.4f}".format(ami))
print("AUC (Tensorflow): {:.4f}".format(auc))
print("AUC (scikit-learn): {:.4f}".format(auc_sk))
return acc, auc, auc_sk, nmi, bacc # mi, , ami
[36]:
!git clone https://github.com/Jectrianama/GCCE_TEST.git
fatal: destination path 'GCCE_TEST' already exists and is not an empty directory.
[37]:
import os
os.chdir('/content/GCCE_TEST/Models')
from keras_ma_gcce import *
from labels_generation import MA_Clas_Gen
os.chdir('../../')
[38]:
#cargar datos desde drive otros dataset
FILEID = "1AU8pTtCLihBjCZjWITaAzpnEuL4RO436"
#https://drive.google.com/file/d/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436/view?usp=sharing
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id="$FILEID -O DataGCCE.zip && rm -rf /tmp/cookies.txt
!unzip -o DataGCCE.zip
!dir
--2023-02-13 18:13:04-- https://docs.google.com/uc?export=download&confirm=&id=1AU8pTtCLihBjCZjWITaAzpnEuL4RO436
Resolving docs.google.com (docs.google.com)... 173.194.211.139, 173.194.211.138, 173.194.211.100, ...
Connecting to docs.google.com (docs.google.com)|173.194.211.139|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/t8lku189msk5lbalvk1pb3chsggs0p2u/1676311950000/07591141114418430227/*/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436?e=download&uuid=b19a2520-762e-44c6-8fdb-05935c87b060 [following]
Warning: wildcards not supported in HTTP.
--2023-02-13 18:13:04-- https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/t8lku189msk5lbalvk1pb3chsggs0p2u/1676311950000/07591141114418430227/*/1AU8pTtCLihBjCZjWITaAzpnEuL4RO436?e=download&uuid=b19a2520-762e-44c6-8fdb-05935c87b060
Resolving doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)... 173.194.210.132, 2607:f8b0:400c:c0f::84
Connecting to doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)|173.194.210.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 38377 (37K) [application/x-zip-compressed]
Saving to: ‘DataGCCE.zip’
DataGCCE.zip 100%[===================>] 37.48K --.-KB/s in 0.001s
2023-02-13 18:13:04 (53.0 MB/s) - ‘DataGCCE.zip’ saved [38377/38377]
Archive: DataGCCE.zip
inflating: new-thyroid.csv
inflating: tic-tac-toe-endgame.csv
inflating: balance-scale.csv
inflating: file.csv
balance-scale.csv GCCE_TEST new-thyroid.csv tic-tac-toe-endgame.csv
DataGCCE.zip MADatasets sample_data
file.csv MADatasets.zip thyroidGCE.xlsx
[39]:
#cargar datos desde drive acceso libre
FILEID = "1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW"
#https://drive.google.com/file/d/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW/view?usp=sharing
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id='$FILEID -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id="$FILEID -O MADatasets.zip && rm -rf /tmp/cookies.txt
!unzip -o MADatasets.zip
!dir
--2023-02-13 18:13:05-- https://docs.google.com/uc?export=download&confirm=t&id=1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW
Resolving docs.google.com (docs.google.com)... 173.194.211.139, 173.194.211.138, 173.194.211.100, ...
Connecting to docs.google.com (docs.google.com)|173.194.211.139|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/5ctlugfe1le1jhqnpgrek4fuu2h4rvfa/1676311950000/07591141114418430227/*/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW?e=download&uuid=4caccdc0-4217-4fee-bafa-7cad897bd651 [following]
Warning: wildcards not supported in HTTP.
--2023-02-13 18:13:05-- https://doc-00-90-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/5ctlugfe1le1jhqnpgrek4fuu2h4rvfa/1676311950000/07591141114418430227/*/1SQnWXGROG2Xexs5vn3twuv7SqiWG5njW?e=download&uuid=4caccdc0-4217-4fee-bafa-7cad897bd651
Resolving doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)... 173.194.210.132, 2607:f8b0:400c:c0f::84
Connecting to doc-00-90-docs.googleusercontent.com (doc-00-90-docs.googleusercontent.com)|173.194.210.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 156530728 (149M) [application/zip]
Saving to: ‘MADatasets.zip’
MADatasets.zip 100%[===================>] 149.28M 127MB/s in 1.2s
2023-02-13 18:13:06 (127 MB/s) - ‘MADatasets.zip’ saved [156530728/156530728]
Archive: MADatasets.zip
inflating: MADatasets/util.py
inflating: MADatasets/Iris1.mat
inflating: MADatasets/Integra_Labels.mat
inflating: MADatasets/MAGenerationClassification.py
inflating: MADatasets/Voice.mat
inflating: MADatasets/Iris.mat
inflating: MADatasets/Sinthetic.mat
inflating: MADatasets/MAGenerationClassification_1.py
inflating: MADatasets/Bupa1.mat
inflating: MADatasets/TicTacToe1.mat
inflating: MADatasets/Wine.mat
inflating: MADatasets/Breast1.mat
inflating: MADatasets/Breast.mat
inflating: MADatasets/Music.mat
inflating: MADatasets/Pima.mat
inflating: MADatasets/Ionosphere.mat
inflating: MADatasets/TicTacToe.mat
inflating: MADatasets/VoiceData.m
inflating: MADatasets/util_1.py
inflating: MADatasets/Ionosphere1.mat
inflating: MADatasets/__pycache__/util_1.cpython-37.pyc
inflating: MADatasets/Bupa.mat
inflating: MADatasets/Wine1.mat
inflating: MADatasets/__pycache__/util.cpython-37.pyc
inflating: MADatasets/Pima1.mat
inflating: MADatasets/Segmentation1.mat
inflating: MADatasets/Western.mat
inflating: MADatasets/Integra_Preprocesamiento_Seg_Caracterizacion_time_frec.mat
inflating: MADatasets/Western1.mat
inflating: MADatasets/Segmentation.mat
inflating: MADatasets/Skin_NonSkin.mat
inflating: MADatasets/Skin_NonSkin1.mat
inflating: MADatasets/Occupancy1.mat
inflating: MADatasets/Polarity.mat
inflating: MADatasets/Occupancy.mat
balance-scale.csv GCCE_TEST new-thyroid.csv tic-tac-toe-endgame.csv
DataGCCE.zip MADatasets sample_data
file.csv MADatasets.zip thyroidGCE.xlsx
Load Data¶
[40]:
#load data
import scipy.io as sio
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf #importar tensorflow
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import numpy as np
database = 'thyroid' #['bupa1', 'breast-cancer-wisconsin1','pima-indians-diabetes1', 'ionosphere1', 'tic-tac-toe1', 'iris1', 'wine1', 'segmentation1']
import pandas as pd
from sklearn.preprocessing import LabelEncoder
df = pd.read_csv(r'/content/new-thyroid.csv')
df.head()
#Dividing X and y
t = np.array(LabelEncoder().fit_transform(df['class'].tolist()))
X = np.array(df.drop(['class'], axis = 1).values)
print('X',X.shape,'t',t.shape)
t =t+1
X (215, 5) t (215,)
Labels Generation¶
[41]:
Y, iAnn, Lam_r = MA_Clas_Gen(X ,t, R=5, NrP=1)
/usr/local/lib/python3.8/dist-packages/sklearn/manifold/_t_sne.py:780: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2.
warnings.warn(
/usr/local/lib/python3.8/dist-packages/sklearn/manifold/_t_sne.py:790: FutureWarning: The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.
warnings.warn(
[42]:
Y = Y - 1
t = t - 1
#YMA = YMA-1
[43]:
from sklearn.metrics import classification_report
for i in range(Y.shape[1]):
print('annotator',i+1)
print(classification_report(t,Y[:,i]))
unique, counts = np.unique(Y[:,i], return_counts=True)
plt.figure()
plt.bar(unique, counts)
# unique, counts = np.unique(Y_test[5], return_counts=True)
# plt.bar(unique, counts)
plt.title('Class Frequency for Y_true')
plt.xlabel('Class')
plt.ylabel('Frequency')
annotator 1
precision recall f1-score support
0 0.98 0.81 0.89 150
1 0.65 0.91 0.76 35
2 0.69 0.97 0.81 30
accuracy 0.85 215
macro avg 0.78 0.90 0.82 215
weighted avg 0.89 0.85 0.86 215
annotator 2
precision recall f1-score support
0 0.87 0.65 0.75 150
1 0.17 0.17 0.17 35
2 0.42 0.93 0.58 30
accuracy 0.61 215
macro avg 0.49 0.59 0.50 215
weighted avg 0.69 0.61 0.63 215
annotator 3
precision recall f1-score support
0 0.93 0.61 0.74 150
1 0.41 0.89 0.56 35
2 0.32 0.43 0.37 30
accuracy 0.63 215
macro avg 0.55 0.64 0.56 215
weighted avg 0.76 0.63 0.66 215
annotator 4
precision recall f1-score support
0 0.84 0.55 0.66 150
1 0.15 0.17 0.16 35
2 0.36 0.93 0.52 30
accuracy 0.54 215
macro avg 0.45 0.55 0.45 215
weighted avg 0.66 0.54 0.56 215
annotator 5
precision recall f1-score support
0 0.58 0.15 0.23 150
1 0.22 0.71 0.33 35
2 0.03 0.07 0.04 30
accuracy 0.23 215
macro avg 0.28 0.31 0.20 215
weighted avg 0.44 0.23 0.22 215
Split data¶
[44]:
import numpy.matlib
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
Ns = 1
ss = ShuffleSplit(n_splits=Ns, test_size=0.3,random_state =123)
for train_index, test_index in ss.split(X):
print(test_index)
X_train, X_test,Y_train,Y_test = X[train_index,:], X[test_index,:],Y[train_index,:], Y[test_index,:]
Y_true_train, Y_true_test = t[train_index].reshape(-1,1), t[test_index].reshape(-1,1)
print(X_train.shape, Y_train.shape, Y_true_train.shape)
[167 200 31 128 93 158 165 133 190 139 119 19 107 187 20 122 198 186
193 52 204 71 166 178 4 201 79 197 116 161 172 21 127 53 179 72
206 149 10 42 81 26 11 140 90 150 120 85 183 100 88 143 189 37
50 62 108 77 181 24 175 169 33 23 194]
(150, 5) (150, 5) (150, 1)
Apply MinMaxScaler¶
[45]:
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
Testing the Class¶
[47]:
from sklearn.metrics import classification_report, balanced_accuracy_score, roc_auc_score
from sklearn.metrics import normalized_mutual_info_score, mutual_info_score, adjusted_mutual_info_score
import pandas as pd
l1 =0.001
NUM_RUNS =10
custom_loss = "GCE"
results = []
for i in range(NUM_RUNS):
print("iteration: " + str(i))
MA = Keras_MA_GCCE(epochs=100,batch_size=32,R=5, K=len(np.unique(Y_true_train)), dropout=0.2, learning_rate=0.01,optimizer='Adam',
l1_param=l1, validation_split=0, verbose=0, q=0.01, neurons=4, loss = custom_loss )
MA.fit(X_train, Y_train)
MA.plot_history()
# Generate the predictions for the current run
pred_2 = MA.predict(X_test)
acc, auc, auc_sk, nmi, bacc = evaluation_metrics(Y_true_test, pred_2[:,Y.shape[1]:], print_result=False) # mi, ami,
# Save the results for the current run to the list of dictionaries
results.append({
#'run': i,
'accuracy': acc,
'balanced_accuracy': bacc,
# 'mutual_information': mi,
'normalized_mutual_information': nmi,
# 'adjusted_mutual_information': ami,
'auc_tensorflow': auc,
'auc_scikit_learn': auc_sk,
})
# Convert the list of dictionaries to a DataFrame
df = np.round(pd.DataFrame(results)*100, 2)
# Calculate the mean and standard deviation of each metric
mean = np.round(df.mean(),2)
std = np.round(df.std(),2)
iteration: 0
3/3 [==============================] - 0s 5ms/step
iteration: 1
3/3 [==============================] - 0s 4ms/step
iteration: 2
3/3 [==============================] - 0s 12ms/step
iteration: 3
3/3 [==============================] - 0s 4ms/step
iteration: 4
3/3 [==============================] - 0s 6ms/step
iteration: 5
3/3 [==============================] - 0s 4ms/step
iteration: 6
3/3 [==============================] - 0s 6ms/step
iteration: 7
3/3 [==============================] - 0s 5ms/step
iteration: 8
3/3 [==============================] - 0s 8ms/step
iteration: 9
3/3 [==============================] - 0s 5ms/step
[48]:
df
[48]:
accuracy | balanced_accuracy | normalized_mutual_information | auc_tensorflow | auc_scikit_learn | |
---|---|---|---|---|---|
0 | 95.38 | 88.14 | 81.43 | 94.0 | 97.95 |
1 | 95.38 | 88.14 | 81.43 | 94.0 | 97.67 |
2 | 95.38 | 88.14 | 81.43 | 94.0 | 100.00 |
3 | 92.31 | 80.45 | 74.09 | 90.0 | 94.76 |
4 | 96.92 | 91.99 | 86.20 | 96.0 | 99.80 |
5 | 93.85 | 84.29 | 77.49 | 92.0 | 96.98 |
6 | 95.38 | 88.14 | 81.43 | 94.0 | 97.78 |
7 | 95.38 | 88.14 | 81.43 | 94.0 | 97.50 |
8 | 93.85 | 84.29 | 77.49 | 92.0 | 93.75 |
9 | 96.92 | 91.99 | 86.20 | 96.0 | 99.87 |
[49]:
mean
[49]:
accuracy 95.08
balanced_accuracy 87.37
normalized_mutual_information 80.86
auc_tensorflow 93.60
auc_scikit_learn 97.61
dtype: float64
[50]:
std
[50]:
accuracy 1.41
balanced_accuracy 3.54
normalized_mutual_information 3.76
auc_tensorflow 1.84
auc_scikit_learn 2.09
dtype: float64
[51]:
result_df = pd.concat([mean.rename('Mean'), std.rename('Std')], axis=1)
[52]:
result_df
[52]:
Mean | Std | |
---|---|---|
accuracy | 95.08 | 1.41 |
balanced_accuracy | 87.37 | 3.54 |
normalized_mutual_information | 80.86 | 3.76 |
auc_tensorflow | 93.60 | 1.84 |
auc_scikit_learn | 97.61 | 2.09 |
[53]:
# Save the DataFrame to an excel file
result_df.to_excel(database + custom_loss + ".xlsx")
[53]: