Spaces:
Runtime error
Runtime error
import requests | |
import random | |
import time | |
import pandas as pd | |
import gradio as gr | |
import numpy as np | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from transformers import pipeline | |
import torch | |
def read3(num_selected_former): | |
fname = 'data3_convai2_inferred.txt' | |
with open(fname, encoding='utf-8') as f: | |
content = f.readlines() | |
index_selected = random.randint(0,len(content)/2-1) | |
while index_selected == num_selected_former: | |
index_selected = random.randint(0,len(content)/2-1) | |
text = eval(content[index_selected*2]) | |
interpretation = eval(content[int(index_selected*2+1)]) | |
min_len = 5 | |
tokens = [i[0] for i in interpretation] | |
tokens = tokens[1:-1] | |
while len(tokens) <= min_len or '\\' in text['text'] or '//' in text['text']: | |
index_selected = random.randint(0,len(content)/2-1) | |
text = eval(content[int(index_selected*2)]) | |
res_tmp = [(i, 0) for i in text['text'].split(' ')] | |
res = {"original": text['text'], "interpretation": res_tmp} | |
return res, index_selected | |
def func3(num_selected, human_predict, num1, num2, user_important): | |
chatbot = [] | |
# num1: Human score; num2: AI score | |
fname = 'data3_convai2_inferred.txt' | |
with open(fname) as f: | |
content = f.readlines() | |
text = eval(content[int(num_selected*2)]) | |
interpretation = eval(content[int(num_selected*2+1)]) | |
if text['binary_label'] == 1: | |
golden_label = int(5 * (1 - text['binary_score'])) | |
else: | |
golden_label = int(5 * (1 + text['binary_score'])) | |
# (START) off-the-shelf version -- slow at the beginning | |
# Load model directly | |
# Use a pipeline as a high-level helper | |
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") | |
classifier = pipeline("text-classification", model="padmajabfrl/Gender-Classification", device=device) | |
output = classifier([text['text']]) | |
print(output) | |
out = output[0] | |
# (END) off-the-shelf version | |
if out['label'] == 'Female': | |
ai_predict = int(10 * out['score']) | |
else: | |
ai_predict = 10 - int(10 * out['score']) | |
user_select = "You focused on " | |
flag_select = False | |
if user_important == "": | |
user_select += "nothing. Interesting! " | |
else: | |
user_select += "'" + user_important + "'. " | |
# for i in range(len(user_marks)): | |
# if user_marks[i][1] != None and h1[i][0] not in ["P", "N"]: | |
# flag_select = True | |
# user_select += "'" + h1[i][0] + "'" | |
# if i == len(h1) - 1: | |
# user_select += ". " | |
# else: | |
# user_select += ", " | |
# if not flag_select: | |
# user_select += "nothing. Interesting! " | |
user_select += "Wanna see how the AI made the guess? Click here. ⬅️" | |
if golden_label > 6: | |
gender = ' (female)' | |
elif golden_label < 4: | |
gender = ' (male)' | |
else: | |
gender = ' (neutral)' | |
if abs(golden_label - human_predict) <= 2 and abs(golden_label - ai_predict) <= 2: | |
chatbot.append(("The correct answer is " + str(golden_label) + gender + ". Congratulations! 🎉 Both of you get the correct answer!", user_select)) | |
num1 += 1 | |
num2 += 1 | |
elif abs(golden_label - human_predict) > 2 and abs(golden_label - ai_predict) > 2: | |
chatbot.append(("The correct answer is " + str(golden_label) + gender + ". Sorry.. No one gets the correct answer. But nice try! 😉", user_select)) | |
elif abs(golden_label - human_predict) <= 2 and abs(golden_label - ai_predict) > 2: | |
chatbot.append(("The correct answer is " + str(golden_label) + gender + ". Great! 🎉 You are closer to the answer and better than AI!", user_select)) | |
num1 += 1 | |
else: | |
chatbot.append(("The correct answer is " + str(golden_label) + gender + ". Sorry.. AI wins in this round.", user_select)) | |
num2 += 1 | |
# tot_scores = ''' ### <p style="text-align: center;"> 🤖 Machine   ''' + str(int(num2)) + '''   VS   ''' + str(int(num1)) + '''   Human 👨👩 </p>''' | |
# tot_scores = ''' #### <p style="text-align: center;"> Today's Scores:</p> | |
# #### <p style="text-align: center;"> 🤖 Machine   <span style="color: red;">''' + str(int(num2)) + '''</span>   VS   <span style="color: red;">''' + str(int(num1)) + '''</span>   Human 🙋 </p>''' | |
tot_scores = ''' #### <p style="text-align: center;"> Today's Scores:     🤖 Machine   <span style="color: red;">''' + str(int(num2)) + '''</span>   VS   <span style="color: red;">''' + str(int(num1)) + '''</span>   Human 🙋 </p>''' | |
return ai_predict, chatbot, num1, num2, tot_scores | |
def interpre3(num_selected): | |
fname = 'data3_convai2_inferred.txt' | |
tokenizer = AutoTokenizer.from_pretrained("padmajabfrl/Gender-Classification") | |
with open(fname) as f: | |
content = f.readlines() | |
text = eval(content[int(num_selected*2)]) | |
interpretation = eval(content[int(num_selected*2+1)]) | |
print(interpretation) | |
encodings = tokenizer(text['text'], return_offsets_mapping=True) | |
print(encodings['offset_mapping']) | |
is_subword = [False, False] | |
for i in range(2, len(encodings['offset_mapping'])): | |
if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]: | |
is_subword.append(True) | |
else: | |
is_subword.append(False) | |
print(is_subword) | |
interpretation_combined = [] | |
index_tmp = 0 | |
while index_tmp < (len(interpretation) - 1): | |
if not is_subword[index_tmp+1]: | |
interpretation_combined.append(interpretation[index_tmp]) | |
index_tmp += 1 | |
else: | |
text_combined = interpretation[index_tmp][0] | |
score_combinded = interpretation[index_tmp][1] | |
length = 1 | |
while is_subword[index_tmp+length]: | |
text_combined += interpretation[index_tmp+length][0] | |
score_combinded += interpretation[index_tmp+length][1] | |
length += 1 | |
interpretation_combined.append((text_combined, score_combinded/length)) | |
index_tmp += length | |
interpretation_combined.append(('', 0.0)) | |
print(interpretation_combined) | |
res = {"original": text['text'], "interpretation": interpretation_combined} | |
# pos = [] | |
# neg = [] | |
# res = [] | |
# for i in interpretation: | |
# if i[1] > 0: | |
# pos.append(i[1]) | |
# elif i[1] < 0: | |
# neg.append(i[1]) | |
# else: | |
# continue | |
# median_pos = np.median(pos) | |
# median_neg = np.median(neg) | |
# res.append(("P", "+")) | |
# res.append(("/", None)) | |
# res.append(("N", "-")) | |
# res.append(("Review:", None)) | |
# for i in interpretation: | |
# if i[1] > median_pos: | |
# res.append((i[0], "+")) | |
# elif i[1] < median_neg: | |
# res.append((i[0], "-")) | |
# else: | |
# res.append((i[0], None)) | |
return res | |
def func3_written(text_written, human_predict): | |
chatbot = [] | |
# num1: Human score; num2: AI score | |
# (START) off-the-shelf version | |
# tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") | |
# model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment") | |
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") | |
classifier = pipeline("text-classification", model="padmajabfrl/Gender-Classification", device=device) | |
tokenizer = AutoTokenizer.from_pretrained("padmajabfrl/Gender-Classification") | |
output = classifier([text_written]) | |
print(output) | |
out = output[0] | |
# (END) off-the-shelf version | |
if out['label'] == 'Female': | |
ai_predict = int(10 * out['score']) | |
else: | |
ai_predict = 10 - int(10 * out['score']) | |
if abs(ai_predict - human_predict) <= 2: | |
chatbot.append(("AI gives it a close score! 🎉", "⬅️ Feel free to try another one! This time let’s see if you can trick the AI into giving a wrong rating. ⬅️")) | |
else: | |
chatbot.append(("AI thinks in a different way from human. 😉", "⬅️ Feel free to try another one! ⬅️")) | |
import shap | |
gender_classifier = pipeline("text-classification", model="padmajabfrl/Gender-Classification", return_all_scores=True, device=device) | |
explainer = shap.Explainer(gender_classifier) | |
shap_values = explainer([text_written]) | |
interpretation = list(zip(shap_values.data[0], shap_values.values[0, :, 1])) | |
encodings = tokenizer(text_written, return_offsets_mapping=True) | |
print(encodings['offset_mapping']) | |
is_subword = [False, False] | |
for i in range(2, len(encodings['offset_mapping'])): | |
if encodings['offset_mapping'][i][0] == encodings['offset_mapping'][i-1][1]: | |
is_subword.append(True) | |
else: | |
is_subword.append(False) | |
print(is_subword) | |
interpretation_combined = [] | |
index_tmp = 0 | |
while index_tmp < (len(interpretation) - 1): | |
if not is_subword[index_tmp+1]: | |
interpretation_combined.append(interpretation[index_tmp]) | |
index_tmp += 1 | |
else: | |
text_combined = interpretation[index_tmp][0] | |
score_combinded = interpretation[index_tmp][1] | |
length = 1 | |
while is_subword[index_tmp+length]: | |
text_combined += interpretation[index_tmp+length][0] | |
score_combinded += interpretation[index_tmp+length][1] | |
length += 1 | |
interpretation_combined.append((text_combined, score_combinded/length)) | |
index_tmp += length | |
interpretation_combined.append(('', 0.0)) | |
print(interpretation_combined) | |
res = {"original": text_written, "interpretation": interpretation_combined} | |
print(res) | |
return res, ai_predict, chatbot | |