3. Classification Performance Assessment
import pandas as pd
import numpy as np
import math
import re
import getpass
AI_data = pd.read_parquet(f"/Users/ { getpass. getuser()} /OneDrive - World Justice Project/EU Subnational/EU-S Data/Automated Qualitative Checks/Data/GPT-vs-Gemini-data.parquet.gzip" )
EU_team = pd.read_parquet(f"/Users/ { getpass. getuser()} /OneDrive - World Justice Project/EU Subnational/EU-S Data/Automated Qualitative Checks/Data/human_labelling.parquet.gzip" )
EU_team = EU_team[EU_team['bucket' ] != "hortiz" ]
horacio = pd.read_parquet(f"/Users/ { getpass. getuser()} /OneDrive - World Justice Project/EU Subnational/EU-S Data/Automated Qualitative Checks/Data/hortiz.parquet.gzip" )
def calculate_performance(df, predicted_column, truth_column):
true_pos = len (df[(df[predicted_column] == 1 ) & (df[truth_column] == 1 )])
true_neg = len (df[(df[predicted_column] == 0 ) & (df[truth_column] == 0 )])
false_neg = len (df[(df[predicted_column] == 0 ) & (df[truth_column] == 1 )])
false_pos = len (df[(df[predicted_column] == 1 ) & (df[truth_column] == 0 )])
accuracy = (true_pos + true_neg) / (true_neg + true_pos + false_neg + false_pos)
precision = ((true_pos) / (true_pos + false_pos)) if (true_pos + false_pos) > 0 else 0
recall = ((true_pos) / (true_pos + false_neg)) if (true_pos + false_neg) > 0 else 0
f1 = (2 * ((precision* recall) / (precision+ recall))) if (recall != 0 ) else 0
tpr = ((true_pos)/ (true_pos + false_neg)) if (true_pos + false_neg) > 0 else 0
fpr = ((false_pos)/ (false_pos + true_neg)) if (false_pos + true_neg) > 0 else 0
c_matrix = np.array(
[[true_neg, false_pos],[false_neg, true_pos]]
)
mcc = ((true_pos* true_neg) - (false_pos* false_neg)) / math.sqrt((true_pos+ false_pos)* (true_pos+ false_neg)* (true_neg+ false_pos)* (true_neg+ false_neg)) if (
math.sqrt((true_pos+ false_pos)* (true_pos+ false_neg)* (true_neg+ false_pos)* (true_neg+ false_neg))) > 0 else 0
summary = {
'accuracy' : accuracy,
'precision' : precision,
'recall' : recall,
'f1' : f1,
'mcc' : mcc,
'tpr' : tpr,
'fpr' : fpr,
'confusion_matrix' : c_matrix,
'true_pos' : true_pos,
'true_neg' : true_neg,
'false_pos' : false_pos,
'false_neg' : false_neg
}
return summary
AI_data['horacio_rol' ] = (
AI_data[[
'horacio_pillar_1' , 'horacio_pillar_2' , 'horacio_pillar_3' , 'horacio_pillar_4' ,
'horacio_pillar_5' , 'horacio_pillar_6' , 'horacio_pillar_7' , 'horacio_pillar_8'
]].eq(1 ).any (axis= 1 )
).astype(int )
AI_data['GPT_ROL' ] = (AI_data['factor(s)' ] != "Not related to Rule of Law" ).astype(int )
AI_data['Gemini_ROL' ] = (AI_data['gemini_stage_1' ] != "Unrelated" ).astype(int )
horacio['horacio_ROL' ] = np.where((horacio['factor(s)' ] != "Not related to Rule of Law" ),1 ,0 )
EU_team['model_ROL' ] = np.where((EU_team['factor(s)' ] != "Not related to Rule of Law" ),1 ,0 )
for df in [horacio, EU_team]:
df['factor(s)' ] = df['factor(s)' ].astype(str )
pillars = [i for i in range (1 ,9 )]
for pillar in pillars:
horacio[f'horacio_pillar_ { pillar} ' ] = horacio['factor(s)' ].apply (lambda x: int (bool (re.search(f' { pillar} :' , str (x)))))
EU_team[f'model_pillar_ { pillar} ' ] = EU_team['factor(s)' ].apply (lambda x: int (bool (re.search(f' { pillar} :' , str (x)))))
horacio = horacio.drop(columns = ['link' , 'factor(s)' , 'sentiment' , 'is_eu_related' ,'related_country' , 'comments' ])
EU_team = EU_team.drop(columns = ['link' , 'factor(s)' , 'sentiment' , 'is_eu_related' , 'related_country' , 'comments' ])
dataframes = {}
for model in EU_team['bucket' ].unique():
subset = EU_team.loc[EU_team['bucket' ] == model]
human_vs_horacio = pd.merge(subset, horacio, on = "article_id" , how = "inner" )
dataframes[model] = human_vs_horacio
n_articles = len (human_vs_horacio)
gpt_metrics = calculate_performance(AI_data, 'GPT_ROL' , 'horacio_rol' )
print ('GPT metrics:' , gpt_metrics)
gemini_metrics = calculate_performance(AI_data, 'Gemini_ROL' , 'horacio_rol' )
print ('Gemini metrics: ' , gemini_metrics)
GPT metrics: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'mcc': 1.0, 'tpr': 1.0, 'fpr': 0.0, 'confusion_matrix': array([[136, 0],
[ 0, 67]]), 'true_pos': 67, 'true_neg': 136, 'false_pos': 0, 'false_neg': 0}
Gemini metrics: {'accuracy': 0.7980295566502463, 'precision': 0.782608695652174, 'recall': 0.5373134328358209, 'f1': 0.6371681415929203, 'mcc': 0.5209474243346167, 'tpr': 0.5373134328358209, 'fpr': 0.07352941176470588, 'confusion_matrix': array([[126, 10],
[ 31, 36]]), 'true_pos': 36, 'true_neg': 126, 'false_pos': 10, 'false_neg': 31}
classification_performance = pd.DataFrame()
for classifier in dataframes:
metrics = calculate_performance(dataframes[classifier], 'model_ROL' , 'horacio_ROL' )
new_row = {
'classifier' : classifier,
'accuracy' : metrics['accuracy' ],
'precision' : metrics['precision' ],
'recall' : metrics['recall' ],
'f1' : metrics['f1' ],
'mcc' : metrics['mcc' ],
'tpr' : metrics['tpr' ],
'fpr' : metrics['fpr' ],
'confusion_matrix' : metrics['confusion_matrix' ], # Store the confusion matrix if needed
'true_pos' : metrics['true_pos' ],
'true_neg' : metrics['true_neg' ],
'false_pos' : metrics['false_pos' ],
'false_neg' : metrics['false_neg' ]
}
new_row = pd.DataFrame([new_row])
classification_performance = pd.concat([classification_performance, new_row], ignore_index = True )
stage_one_summary = []
gpt_stage_one = {
'tpr' : gpt_metrics["tpr" ],
"fpr" : gpt_metrics["fpr" ],
'confusion_matrix' : gpt_metrics["confusion_matrix" ],
'classifier' : 'GPT'
}
gem_stage_one = {
'tpr' : gemini_metrics["tpr" ],
"fpr" : gemini_metrics["fpr" ],
'confusion_matrix' : gemini_metrics["confusion_matrix" ],
'classifier' : 'Gemini'
}
top = classification_performance.loc[
(classification_performance['tpr' ] > 0.8 )
]
top_eu = {
'tpr' : top['tpr' ].mean(),
'fpr' : top['fpr' ].mean(),
'confusion_matrix' : top['confusion_matrix' ].sum (),
'classifier' : 'Top Human Classifiers'
}
all_of_eu = {
'tpr' : classification_performance['tpr' ].mean(),
'fpr' : classification_performance['fpr' ].mean(),
'confusion_matrix' : classification_performance['confusion_matrix' ].sum (),
'classifier' : 'Human Classifiers'
}
for i in [gpt_stage_one, gem_stage_one, top_eu, all_of_eu]:
stage_one_summary.append(i)
stage_one = pd.DataFrame(stage_one_summary)
stage_one[["classifier" , "tpr" , "fpr" , "confusion_matrix" ]].rename(
columns = {
"classifier" : "Classifier" ,
"tpr" : "TPR" ,
"fpr" : "FPR" ,
"confusion_matrix" : "Confussion Matrix"
},
# inplace = True
).style.hide(axis= "index" ).format ({
"TPR" : " {:,.2f} " ,
"FPR" : " {:,.2f} "
})
better_than_gemini = classification_performance.loc[
(classification_performance['accuracy' ] >= 0.8 ) &
(classification_performance['precision' ] >= 0.8 ) &
(classification_performance['recall' ] >= 0.54 ) &
(classification_performance['f1' ] >= 0.64 )
]
p = len (better_than_gemini) / len (classification_performance)
print (f" { p* 100 :.2f} % of the EU team classified articles better than Gemini ( { len (better_than_gemini)} people)." )
35.00 % of the EU team classified articles better than Gemini (7 people).
AI_data = AI_data[AI_data['horacio_rol' ] == 1 ]
gpt_columns = [
'GPT_pillar_1' ,'GPT_pillar_2' ,'GPT_pillar_3' ,'GPT_pillar_4' ,
'GPT_pillar_5' ,'GPT_pillar_6' ,'GPT_pillar_7' ,'GPT_pillar_8'
]
gemini_columns = [
'Gemini_pillar_1' ,'Gemini_pillar_2' ,'Gemini_pillar_3' ,'Gemini_pillar_4' ,
'Gemini_pillar_5' ,'Gemini_pillar_6' ,'Gemini_pillar_7' ,'Gemini_pillar_8'
]
horacio_columns = [
'horacio_pillar_1' , 'horacio_pillar_2' , 'horacio_pillar_3' , 'horacio_pillar_4' ,
'horacio_pillar_5' , 'horacio_pillar_6' , 'horacio_pillar_7' , 'horacio_pillar_8'
]
performance_data = []
for horacio, gpt, gemini in zip (horacio_columns, gpt_columns, gemini_columns):
gpt_performance = calculate_performance(AI_data, gpt, horacio)
gpt_performance['Classifier' ] = 'GPT'
gpt_performance['Model_Column' ] = gpt
gpt_performance['Horacio' ] = horacio
performance_data.append(gpt_performance)
gemini_performance = calculate_performance(AI_data, gemini, horacio)
gemini_performance['Classifier' ] = 'Gemini'
gemini_performance['Model_Column' ] = gemini
gemini_performance['Horacio' ] = horacio
performance_data.append(gemini_performance)
AI_performance = pd.DataFrame(performance_data)
stage_two = {}
for classifier, df in dataframes.items():
filtered_df = df.loc[df['horacio_ROL' ] == 1 ]
stage_two[classifier] = filtered_df
EU_columns = [
'model_pillar_1' , 'model_pillar_2' , 'model_pillar_3' , 'model_pillar_4' ,
'model_pillar_5' , 'model_pillar_6' , 'model_pillar_7' , 'model_pillar_8'
]
horacio_columns = [
'horacio_pillar_1' , 'horacio_pillar_2' , 'horacio_pillar_3' , 'horacio_pillar_4' ,
'horacio_pillar_5' , 'horacio_pillar_6' , 'horacio_pillar_7' , 'horacio_pillar_8'
]
stage_two_classification_performance = []
for classifier in stage_two:
for i, (eu_col, horacio_col) in enumerate (zip (EU_columns, horacio_columns)):
classifier_df = stage_two[classifier]
metrics = calculate_performance(classifier_df, eu_col, horacio_col)
new_row = {
'classifier' : classifier,
'pillar' : f'Pillar { i + 1 } ' ,
'accuracy' : metrics['accuracy' ],
'precision' : metrics['precision' ],
'recall' : metrics['recall' ],
'f1' : metrics['f1' ],
'mcc' : metrics['mcc' ],
'tpr' : metrics['tpr' ],
'fpr' : metrics['fpr' ],
'confusion_matrix' : [metrics['confusion_matrix' ]],
'true_pos' : metrics['true_pos' ],
'true_neg' : metrics['true_neg' ],
'false_pos' : metrics['false_pos' ],
'false_neg' : metrics['false_neg' ]
}
stage_two_classification_performance.append(new_row)
stage_two_classification_performance = pd.DataFrame(stage_two_classification_performance)
def map_pillar(column):
if 'pillar_' in column:
pillar_number = column.split('_' )[- 1 ]
return f'Pillar { pillar_number} '
AI_performance['pillar' ] = AI_performance['Model_Column' ].apply (map_pillar)
AI_performance = AI_performance.rename(
columns = {
'Classifier' : 'classifier' ,
'confusion matrix' : 'confusion_matrix'
}
)
AI_performance = AI_performance.drop(columns = ['Model_Column' , 'Horacio' ])
performance = pd.concat([AI_performance, stage_two_classification_performance], ignore_index= True )
p1 = performance.loc[performance['pillar' ] == "Pillar 1" ]
pillar_one = []
p1_top = p1.loc[
(p1['classifier' ] != 'lcleary' ) & (p1['tpr' ] >= 0.5 ) & (p1['fpr' ] < .4 )
]
p1_top['confusion_matrix' ] = np.array(p1_top['confusion_matrix' ])
eu_p1 = p1.loc[
(p1['classifier' ] != 'GPT' ) & (p1['classifier' ] != 'Gemini' )
]
eu_p1['confusion_matrix' ] = np.array(eu_p1['confusion_matrix' ])
gem_p1 = {
'tpr' : p1.loc[(p1['classifier' ] == 'Gemini' )]['tpr' ].iloc[0 ],
'fpr' : p1.loc[(p1['classifier' ] == 'Gemini' )]['fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p1.loc[(p1['classifier' ] == 'Gemini' )]['confusion_matrix' ].iloc[0 ]),
'classifier' : 'Gemini' ,
'pillar' : 'Pillar 1'
}
gpt_p1 = {
'tpr' : p1.loc[p1['classifier' ] == 'GPT' , 'tpr' ].iloc[0 ],
'fpr' : p1.loc[p1['classifier' ] == 'GPT' , 'fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p1.loc[p1['classifier' ] == 'GPT' , 'confusion_matrix' ].iloc[0 ]),
'classifier' : 'GPT' ,
'pillar' : 'Pillar 1'
}
top_p1 = {
'tpr' : p1_top['tpr' ].mean(),
'fpr' : p1_top['fpr' ].mean(),
'confusion_matrix' : np.array([[p1_top['true_neg' ].sum (), p1_top['false_pos' ].sum ()],
[p1_top['false_neg' ].sum (), p1_top['true_pos' ].sum ()]]),
'classifier' : 'Top Human Classifiers' ,
'pillar' : 'Pillar 1'
}
all_eu_p1 = {
'tpr' : eu_p1['tpr' ].mean(),
'fpr' : eu_p1['fpr' ].mean(),
'confusion_matrix' : np.array([[eu_p1['true_neg' ].sum (),eu_p1['false_pos' ].sum ()],
[eu_p1['false_neg' ].sum (), eu_p1['true_pos' ].sum ()]]),
'classifier' : 'Human Classifiers' ,
'pillar' : 'Pillar 1'
}
for i in [gpt_p1, gem_p1, top_p1, all_eu_p1]:
pillar_one.append(i)
pillar_one = pd.DataFrame(pillar_one)
/var/folders/7x/fdwfv0y13yz0y3sjb4mwznqm0000gp/T/ipykernel_47536/406494460.py:7: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
p1_top['confusion_matrix'] = np.array(p1_top['confusion_matrix'])
/var/folders/7x/fdwfv0y13yz0y3sjb4mwznqm0000gp/T/ipykernel_47536/406494460.py:11: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
eu_p1['confusion_matrix'] = np.array(eu_p1['confusion_matrix'])
p2 = performance.loc[performance['pillar' ] == "Pillar 2" ]
pillar_two = []
p2_top = p2.loc[
(p2['classifier' ] != 'lcleary' ) & (p2['tpr' ] >= 0.5 ) & (p2['fpr' ] < .4 )
]
eu_p2 = p2.loc[
(p2['classifier' ] != 'GPT' ) & (p2['classifier' ] != 'Gemini' )
]
eu_p2['confusion_matrix' ] = np.array(eu_p2['confusion_matrix' ])
gem_p2 = {
'tpr' : p2.loc[(p2['classifier' ] == 'Gemini' )]['tpr' ].iloc[0 ],
'fpr' : p2.loc[(p2['classifier' ] == 'Gemini' )]['fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p2.loc[(p2['classifier' ] == 'Gemini' )]['confusion_matrix' ].iloc[0 ]),
'classifier' : 'Gemini' ,
'pillar' : 'Pillar 2'
}
gpt_p2 = {
'tpr' : p2.loc[p2['classifier' ] == 'GPT' , 'tpr' ].iloc[0 ],
'fpr' : p2.loc[p2['classifier' ] == 'GPT' , 'fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p2.loc[p2['classifier' ] == 'GPT' , 'confusion_matrix' ].iloc[0 ]),
'classifier' : 'GPT' ,
'pillar' : 'Pillar 2'
}
top_p2 = {
'tpr' : p2_top['tpr' ].mean(),
'fpr' : p2_top['fpr' ].mean(),
'confusion_matrix' : np.array([[p2_top['true_neg' ].sum (), p2_top['false_pos' ].sum ()],
[p2_top['false_neg' ].sum (), p2_top['true_pos' ].sum ()]]),
'classifier' : 'Top Human Classifiers' ,
'pillar' : 'Pillar 2'
}
all_eu_p2 = {
'tpr' : eu_p2['tpr' ].mean(),
'fpr' : eu_p2['fpr' ].mean(),
'confusion_matrix' : np.array([[eu_p2['true_neg' ].sum (),eu_p2['false_pos' ].sum ()],
[eu_p2['false_neg' ].sum (), eu_p2['true_pos' ].sum ()]]),
'classifier' : 'Human Classifiers' ,
'pillar' : 'Pillar 2'
}
for i in [gpt_p2, gem_p2, top_p2, all_eu_p2]:
pillar_two.append(i)
pillar_two = pd.DataFrame(pillar_two)
/var/folders/7x/fdwfv0y13yz0y3sjb4mwznqm0000gp/T/ipykernel_47536/1115769891.py:11: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
eu_p2['confusion_matrix'] = np.array(eu_p2['confusion_matrix'])
p3 = performance.loc[performance['pillar' ] == "Pillar 3" ]
pillar_three = []
p3_top = p3.loc[
(p3['classifier' ] != 'lcleary' ) & (p3['tpr' ] >= 0.5 ) & (p3['fpr' ] < .4 )
]
eu_p3 = p3.loc[
(p3['classifier' ] != 'GPT' ) & (p3['classifier' ] != 'Gemini' )
]
eu_p3['confusion_matrix' ] = np.array(eu_p3['confusion_matrix' ])
gem_p3 = {
'tpr' : p3.loc[(p3['classifier' ] == 'Gemini' )]['tpr' ].iloc[0 ],
'fpr' : p3.loc[(p3['classifier' ] == 'Gemini' )]['fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p3.loc[(p3['classifier' ] == 'Gemini' )]['confusion_matrix' ].iloc[0 ]),
'classifier' : 'Gemini' ,
'pillar' : 'Pillar 3'
}
gpt_p3 = {
'tpr' : p3.loc[p3['classifier' ] == 'GPT' , 'tpr' ].iloc[0 ],
'fpr' : p3.loc[p3['classifier' ] == 'GPT' , 'fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p3.loc[p3['classifier' ] == 'GPT' , 'confusion_matrix' ].iloc[0 ]),
'classifier' : 'GPT' ,
'pillar' : 'Pillar 3'
}
top_p3 = {
'tpr' : p3_top['tpr' ].mean(),
'fpr' : p3_top['fpr' ].mean(),
'confusion_matrix' : np.array([[p3_top['true_neg' ].sum (), p3_top['false_pos' ].sum ()],
[p3_top['false_neg' ].sum (), p3_top['true_pos' ].sum ()]]),
'classifier' : 'Top Human Classifiers' ,
'pillar' : 'Pillar 3'
}
all_eu_p3 = {
'tpr' : eu_p3['tpr' ].mean(),
'fpr' : eu_p3['fpr' ].mean(),
'confusion_matrix' : np.array([[eu_p3['true_neg' ].sum (),eu_p3['false_pos' ].sum ()],
[eu_p3['false_neg' ].sum (), eu_p3['true_pos' ].sum ()]]),
'classifier' : 'Human Classifiers' ,
'pillar' : 'Pillar 3'
}
for i in [gpt_p3, gem_p3, top_p3, all_eu_p3]:
pillar_three.append(i)
pillar_three = pd.DataFrame(pillar_two)
/var/folders/7x/fdwfv0y13yz0y3sjb4mwznqm0000gp/T/ipykernel_47536/4165617852.py:11: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
eu_p3['confusion_matrix'] = np.array(eu_p3['confusion_matrix'])
p4 = performance.loc[performance['pillar' ] == "Pillar 4" ]
pillar_four = []
p4_top = p4.loc[
(p4['classifier' ] != 'lcleary' ) & (p4['tpr' ] >= 0.5 ) & (p4['fpr' ] < .4 )
]
eu_p4 = p4.loc[
(p4['classifier' ] != 'GPT' ) & (p4['classifier' ] != 'Gemini' )
]
eu_p4['confusion_matrix' ] = np.array(eu_p4['confusion_matrix' ])
gem_p4 = {
'tpr' : p4.loc[(p4['classifier' ] == 'Gemini' )]['tpr' ].iloc[0 ],
'fpr' : p4.loc[(p4['classifier' ] == 'Gemini' )]['fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p4.loc[(p4['classifier' ] == 'Gemini' )]['confusion_matrix' ].iloc[0 ]),
'classifier' : 'Gemini' ,
'pillar' : 'Pillar 4'
}
gpt_p4 = {
'tpr' : p4.loc[p4['classifier' ] == 'GPT' , 'tpr' ].iloc[0 ],
'fpr' : p4.loc[p4['classifier' ] == 'GPT' , 'fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p4.loc[p4['classifier' ] == 'GPT' , 'confusion_matrix' ].iloc[0 ]),
'classifier' : 'GPT' ,
'pillar' : 'Pillar 4'
}
top_p4 = {
'tpr' : p4_top['tpr' ].mean(),
'fpr' : p4_top['fpr' ].mean(),
'confusion_matrix' : np.array([[p4_top['true_neg' ].sum (), p4_top['false_pos' ].sum ()],
[p4_top['false_neg' ].sum (), p4_top['true_pos' ].sum ()]]),
'classifier' : 'Top Human Classifiers' ,
'pillar' : 'Pillar 4'
}
all_eu_p4 = {
'tpr' : eu_p4['tpr' ].mean(),
'fpr' : eu_p4['fpr' ].mean(),
'confusion_matrix' : np.array([[eu_p4['true_neg' ].sum (),eu_p4['false_pos' ].sum ()],
[eu_p4['false_neg' ].sum (), eu_p4['true_pos' ].sum ()]]),
'classifier' : 'Human Classifiers' ,
'pillar' : 'Pillar 4'
}
for i in [gpt_p4, gem_p4, top_p4, all_eu_p4]:
pillar_four.append(i)
pillar_four = pd.DataFrame(pillar_four)
/var/folders/7x/fdwfv0y13yz0y3sjb4mwznqm0000gp/T/ipykernel_47536/2076089035.py:11: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
eu_p4['confusion_matrix'] = np.array(eu_p4['confusion_matrix'])
p5 = performance.loc[performance['pillar' ] == "Pillar 5" ]
pillar_five = []
p5_top = p5.loc[
(p5['classifier' ] != 'lcleary' ) & (p5['tpr' ] >= 0.5 ) & (p5['fpr' ] < .4 )
]
eu_p5 = p5.loc[
(p5['classifier' ] != 'GPT' ) & (p5['classifier' ] != 'Gemini' )
]
eu_p5['confusion_matrix' ] = np.array(eu_p5['confusion_matrix' ])
gem_p5 = {
'tpr' : p5.loc[(p5['classifier' ] == 'Gemini' )]['tpr' ].iloc[0 ],
'fpr' : p5.loc[(p5['classifier' ] == 'Gemini' )]['fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p5.loc[(p5['classifier' ] == 'Gemini' )]['confusion_matrix' ].iloc[0 ]),
'classifier' : 'Gemini' ,
'pillar' : 'Pillar 5'
}
gpt_p5 = {
'tpr' : p5.loc[p5['classifier' ] == 'GPT' , 'tpr' ].iloc[0 ],
'fpr' : p5.loc[p5['classifier' ] == 'GPT' , 'fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p5.loc[p5['classifier' ] == 'GPT' , 'confusion_matrix' ].iloc[0 ]),
'classifier' : 'GPT' ,
'pillar' : 'Pillar 5'
}
top_p5 = {
'tpr' : p5_top['tpr' ].mean(),
'fpr' : p5_top['fpr' ].mean(),
'confusion_matrix' : np.array([[p5_top['true_neg' ].sum (), p5_top['false_pos' ].sum ()],
[p5_top['false_neg' ].sum (), p5_top['true_pos' ].sum ()]]),
'classifier' : 'Top Human Classifiers' ,
'pillar' : 'Pillar 5'
}
all_eu_p5 = {
'tpr' : eu_p5['tpr' ].mean(),
'fpr' : eu_p5['fpr' ].mean(),
'confusion_matrix' : np.array([[eu_p5['true_neg' ].sum (),eu_p5['false_pos' ].sum ()],
[eu_p5['false_neg' ].sum (), eu_p5['true_pos' ].sum ()]]),
'classifier' : 'Human Classifiers' ,
'pillar' : 'Pillar 5'
}
for i in [gpt_p5, gem_p5, top_p5, all_eu_p5]:
pillar_five.append(i)
pillar_five = pd.DataFrame(pillar_five)
/var/folders/7x/fdwfv0y13yz0y3sjb4mwznqm0000gp/T/ipykernel_47536/775791368.py:11: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
eu_p5['confusion_matrix'] = np.array(eu_p5['confusion_matrix'])
p6 = performance.loc[performance['pillar' ] == "Pillar 6" ]
pillar_six = []
p6_top = p6.loc[
(p6['classifier' ] != 'lcleary' ) & (p6['tpr' ] >= 0.5 ) & (p6['fpr' ] < .4 )
]
eu_p6 = p6.loc[
(p6['classifier' ] != 'GPT' ) & (p6['classifier' ] != 'Gemini' )
]
eu_p6['confusion_matrix' ] = np.array(eu_p6['confusion_matrix' ])
gem_p6 = {
'tpr' : p6.loc[(p6['classifier' ] == 'Gemini' )]['tpr' ].iloc[0 ],
'fpr' : p6.loc[(p6['classifier' ] == 'Gemini' )]['fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p6.loc[(p6['classifier' ] == 'Gemini' )]['confusion_matrix' ].iloc[0 ]),
'classifier' : 'Gemini' ,
'pillar' : 'Pillar 6'
}
gpt_p6 = {
'tpr' : p6.loc[p6['classifier' ] == 'GPT' , 'tpr' ].iloc[0 ],
'fpr' : p6.loc[p6['classifier' ] == 'GPT' , 'fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p6.loc[p6['classifier' ] == 'GPT' , 'confusion_matrix' ].iloc[0 ]),
'classifier' : 'GPT' ,
'pillar' : 'Pillar 6'
}
top_p6 = {
'tpr' : p6_top['tpr' ].mean(),
'fpr' : p6_top['fpr' ].mean(),
'confusion_matrix' : np.array([[p6_top['true_neg' ].sum (), p6_top['false_pos' ].sum ()],
[p6_top['false_neg' ].sum (), p6_top['true_pos' ].sum ()]]),
'classifier' : 'Top Human Classifiers' ,
'pillar' : 'Pillar 6'
}
all_eu_p6 = {
'tpr' : eu_p6['tpr' ].mean(),
'fpr' : eu_p6['fpr' ].mean(),
'confusion_matrix' : np.array([[eu_p6['true_neg' ].sum (),eu_p6['false_pos' ].sum ()],
[eu_p6['false_neg' ].sum (), eu_p6['true_pos' ].sum ()]]),
'classifier' : 'Human Classifiers' ,
'pillar' : 'Pillar 6'
}
for i in [gpt_p6, gem_p6, top_p6, all_eu_p6]:
pillar_six.append(i)
pillar_six = pd.DataFrame(pillar_six)
/var/folders/7x/fdwfv0y13yz0y3sjb4mwznqm0000gp/T/ipykernel_47536/4058969709.py:11: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
eu_p6['confusion_matrix'] = np.array(eu_p6['confusion_matrix'])
p7 = performance.loc[performance['pillar' ] == "Pillar 7" ]
pillar_seven = []
p7_top = p7.loc[
(p7['classifier' ] != 'lcleary' ) & (p7['tpr' ] >= 0.5 ) & (p7['fpr' ] < .4 )
]
eu_p7 = p7.loc[
(p7['classifier' ] != 'GPT' ) & (p7['classifier' ] != 'Gemini' )
]
eu_p7['confusion_matrix' ] = np.array(eu_p7['confusion_matrix' ])
gem_p7 = {
'tpr' : p7.loc[(p7['classifier' ] == 'Gemini' )]['tpr' ].iloc[0 ],
'fpr' : p7.loc[(p7['classifier' ] == 'Gemini' )]['fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p7.loc[(p7['classifier' ] == 'Gemini' )]['confusion_matrix' ].iloc[0 ]),
'classifier' : 'Gemini' ,
'pillar' : 'Pillar 7'
}
gpt_p7= {
'tpr' : p7.loc[p7['classifier' ] == 'GPT' , 'tpr' ].iloc[0 ],
'fpr' : p7.loc[p7['classifier' ] == 'GPT' , 'fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p7.loc[p7['classifier' ] == 'GPT' , 'confusion_matrix' ].iloc[0 ]),
'classifier' : 'GPT' ,
'pillar' : 'Pillar 7'
}
top_p7 = {
'tpr' : p7_top['tpr' ].mean(),
'fpr' : p7_top['fpr' ].mean(),
'confusion_matrix' : np.array([[p7_top['true_neg' ].sum (), p7_top['false_pos' ].sum ()],
[p7_top['false_neg' ].sum (), p7_top['true_pos' ].sum ()]]),
'classifier' : 'Top Human Classifiers' ,
'pillar' : 'Pillar 7'
}
all_eu_p7 = {
'tpr' : eu_p7['tpr' ].mean(),
'fpr' : eu_p7['fpr' ].mean(),
'confusion_matrix' : np.array([[eu_p7['true_neg' ].sum (),eu_p7['false_pos' ].sum ()],
[eu_p7['false_neg' ].sum (), eu_p7['true_pos' ].sum ()]]),
'classifier' : 'Human Classifiers' ,
'pillar' : 'Pillar 7'
}
for i in [gpt_p7, gem_p7, top_p7, all_eu_p7]:
pillar_seven.append(i)
pillar_seven = pd.DataFrame(pillar_seven)
/var/folders/7x/fdwfv0y13yz0y3sjb4mwznqm0000gp/T/ipykernel_47536/3508723912.py:11: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
eu_p7['confusion_matrix'] = np.array(eu_p7['confusion_matrix'])
p8 = performance.loc[performance['pillar' ] == "Pillar 8" ]
pillar_eight = []
p8_top = p8.loc[
(p8['classifier' ] != 'lcleary' ) & (p8['tpr' ] >= 0.5 ) & (p8['fpr' ] < .4 )
]
eu_p8 = p8.loc[
(p8['classifier' ] != 'GPT' ) & (p8['classifier' ] != 'Gemini' )
]
eu_p8['confusion_matrix' ] = np.array(eu_p8['confusion_matrix' ])
gem_p8 = {
'tpr' : p8.loc[(p8['classifier' ] == 'Gemini' )]['tpr' ].iloc[0 ],
'fpr' : p8.loc[(p8['classifier' ] == 'Gemini' )]['fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p8.loc[(p8['classifier' ] == 'Gemini' )]['confusion_matrix' ].iloc[0 ]),
'classifier' : 'Gemini' ,
'pillar' : 'Pillar 8'
}
gpt_p8 = {
'tpr' : p8.loc[p8['classifier' ] == 'GPT' , 'tpr' ].iloc[0 ],
'fpr' : p8.loc[p8['classifier' ] == 'GPT' , 'fpr' ].iloc[0 ],
'confusion_matrix' : np.array(p8.loc[p8['classifier' ] == 'GPT' , 'confusion_matrix' ].iloc[0 ]),
'classifier' : 'GPT' ,
'pillar' : 'Pillar 8'
}
top_p8 = {
'tpr' : p8_top['tpr' ].mean(),
'fpr' : p8_top['fpr' ].mean(),
'confusion_matrix' : np.array([[p8_top['true_neg' ].sum (), p8_top['false_pos' ].sum ()],
[p8_top['false_neg' ].sum (), p8_top['true_pos' ].sum ()]]),
'classifier' : 'Top Human Classifiers' ,
'pillar' : 'Pillar 8'
}
all_eu_p8 = {
'tpr' : eu_p8['tpr' ].mean(),
'fpr' : eu_p8['fpr' ].mean(),
'confusion_matrix' : np.array([[eu_p8['true_neg' ].sum (),eu_p8['false_pos' ].sum ()],
[eu_p8['false_neg' ].sum (), eu_p8['true_pos' ].sum ()]]),
'classifier' : 'Human Classifiers' ,
'pillar' : 'Pillar 8'
}
for i in [gpt_p8, gem_p8, top_p8, all_eu_p8]:
pillar_eight.append(i)
pillar_eight = pd.DataFrame(pillar_eight)
/var/folders/7x/fdwfv0y13yz0y3sjb4mwznqm0000gp/T/ipykernel_47536/1783717482.py:11: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
eu_p8['confusion_matrix'] = np.array(eu_p8['confusion_matrix'])
dataframes = [pillar_one, pillar_four, pillar_five]
stage_two = pd.concat(dataframes)
stage_two[["classifier" , "pillar" , "tpr" , "fpr" , "confusion_matrix" ]].rename(
columns = {
"classifier" : "Classifier" ,
"pillar" : "Pillar" ,
"tpr" : "TPR" ,
"fpr" : "FPR" ,
"confusion_matrix" : "Confussion Matrix"
},
# inplace = True
).style.hide(axis= "index" ).format ({
"TPR" : " {:,.2f} " ,
"FPR" : " {:,.2f} "
})