Spaces:
Build error
Build error
Jordan
commited on
Commit
•
10f417b
1
Parent(s):
e16fb05
Unbias - Version one push
Browse files- app.py +27 -4
- bias_check.py +47 -0
- combine_modules.py +17 -0
- img2txt.py +19 -0
- interpret_model_pt.py +8 -0
- load_model_pt.py +15 -0
- video2txt.py +22 -0
app.py
CHANGED
@@ -1,7 +1,30 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import combine_modules
|
2 |
import gradio as gr
|
3 |
|
4 |
+
with gr.Blocks() as demo:
|
5 |
+
gr.Markdown("Welcome to Bias Checking Demo !")
|
6 |
+
with gr.Tab("Text"):
|
7 |
+
text_input = gr.Textbox()
|
8 |
+
text_output = gr.Textbox()
|
9 |
+
text_output_interpret = gr.TextArea()
|
10 |
+
text_button = gr.Button("Check Bias in your Text")
|
11 |
+
|
12 |
+
with gr.Tab("Video"):
|
13 |
+
vid_input = gr.Video()
|
14 |
+
vid_output = gr.Textbox()
|
15 |
+
vid_output_interpret = gr.TextArea()
|
16 |
+
vid_button = gr.Button("Check Bias in your Video")
|
17 |
|
18 |
+
with gr.Tab("Image"):
|
19 |
+
img_input = gr.Image()
|
20 |
+
img_output = gr.Textbox()
|
21 |
+
img_output_interpret = gr.TextArea()
|
22 |
+
img_button = gr.Button("Check Bias in your Image")
|
23 |
+
|
24 |
+
text_button.click(combine_modules.app_nlp_start, inputs=text_input, outputs=[text_output, text_output_interpret])
|
25 |
+
vid_button.click(combine_modules.app_video_start, inputs=vid_input, outputs=[vid_output, vid_output_interpret])
|
26 |
+
img_button.click(combine_modules.app_image_start, inputs=img_input, outputs=[img_output, img_output_interpret])
|
27 |
+
|
28 |
+
|
29 |
+
if __name__=="__main__":
|
30 |
+
demo.launch()
|
bias_check.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import load_model_pt
|
2 |
+
import interpret_model_pt
|
3 |
+
|
4 |
+
|
5 |
+
def sub_pipeline(raw_input, pretrained_model):
|
6 |
+
tokenizer, model = load_model_pt.load_models_from_pretrained(pretrained_model)
|
7 |
+
output_ = load_model_pt.load_pipeline(raw_input, pretrained_model)
|
8 |
+
words_weightages = interpret_model_pt.explainer(raw_input, model, tokenizer)
|
9 |
+
return output_, words_weightages
|
10 |
+
|
11 |
+
def bias_checker(input_statement):
|
12 |
+
pretrained_model_basic_check = "valurank/distilroberta-bias"
|
13 |
+
pretrained_model_political = "valurank/distilroberta-mbfc-bias"
|
14 |
+
pretrained_model_gender = "monologg/koelectra-base-v3-gender-bias"
|
15 |
+
|
16 |
+
raw_input = input_statement
|
17 |
+
# print("Checking if the input has any primary bias ?..")
|
18 |
+
output_stmt_zero, words_interpreted = sub_pipeline(raw_input, pretrained_model_basic_check)
|
19 |
+
print(output_stmt_zero)
|
20 |
+
return_var = " "
|
21 |
+
interpret_var = " "
|
22 |
+
|
23 |
+
if (output_stmt_zero["label"] == "BIASED" and output_stmt_zero["score"] >= 0.7) or (output_stmt_zero["label"] == "NEUTRAL" and output_stmt_zero["score"] < 0.6):
|
24 |
+
# print(output_stmt_zero)
|
25 |
+
# print("\n The statement seems biased, lets investigate ! \n")
|
26 |
+
# print(words_interpreted)
|
27 |
+
# print("\n Checking for political propaganda... \n")
|
28 |
+
output_stmt_political, words_interpreted_political = sub_pipeline(raw_input, pretrained_model_political)
|
29 |
+
# print(output_stmt_political, "\n")
|
30 |
+
# print(words_interpreted_political, "\n")
|
31 |
+
# print("\n Let's check for gender bias, shall we ? \n")
|
32 |
+
output_stmt_gender, words_interpreted_gender = sub_pipeline(raw_input, pretrained_model_gender)
|
33 |
+
# print(output_stmt_gender, "\n")
|
34 |
+
# print(words_interpreted_gender, "\n")
|
35 |
+
return_var = ("Generic:", output_stmt_zero,"\n","Gender:", output_stmt_gender,"\n","Political:", output_stmt_political)
|
36 |
+
interpret_var = ("Generic:", words_interpreted, "\n", "Gender:", words_interpreted_gender, "\n","Political:", words_interpreted_political)
|
37 |
+
else:
|
38 |
+
# print("The statement seems ok as of now, please input another statement!")
|
39 |
+
return_var = "The statement seems ok as of now, please input another statement!"
|
40 |
+
interpret_var = " "
|
41 |
+
|
42 |
+
return return_var, interpret_var
|
43 |
+
|
44 |
+
|
45 |
+
if __name__=="__main__":
|
46 |
+
input_stmt = "Nevertheless, Trump and other Republicans have tarred the protests as havens for terrorists intent on destroying property."
|
47 |
+
bias_checker(input_stmt)
|
combine_modules.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from bias_check import bias_checker
|
2 |
+
from video2txt import read_video_file_and_return_text as rtxt
|
3 |
+
from img2txt import load_image_pipeline as img_pipe
|
4 |
+
|
5 |
+
def app_nlp_start(statement):
|
6 |
+
output_txt, interpreted_txt = bias_checker(statement)
|
7 |
+
return output_txt, interpreted_txt
|
8 |
+
|
9 |
+
def app_video_start(video_path):
|
10 |
+
return_text = rtxt(video_path)
|
11 |
+
output_txt = bias_checker(return_text)
|
12 |
+
return output_txt
|
13 |
+
|
14 |
+
def app_image_start(image_path):
|
15 |
+
text_generated = img_pipe(image_path)
|
16 |
+
output_txt = bias_checker(text_generated)
|
17 |
+
return output_txt
|
img2txt.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
from PIL import Image
|
3 |
+
import os
|
4 |
+
pretrained_img_model = "nlpconnect/vit-gpt2-image-captioning"
|
5 |
+
|
6 |
+
|
7 |
+
def load_image_pipeline(img_path):
|
8 |
+
img_path_read = Image.fromarray(img_path)
|
9 |
+
img_path_read.save("temp_img.jpg")
|
10 |
+
image_to_text = pipeline("image-to-text", model=pretrained_img_model, framework="pt")
|
11 |
+
generated_text = image_to_text("temp_img.jpg")[0]["generated_text"]
|
12 |
+
os.remove("temp_img.jpg")
|
13 |
+
return generated_text
|
14 |
+
|
15 |
+
|
16 |
+
if __name__=="__main__":
|
17 |
+
imgpath = r"C:\Users\Shringar\Pictures\ar.jpg"
|
18 |
+
img_text_generated = load_image_pipeline(imgpath)
|
19 |
+
print(img_text_generated)
|
interpret_model_pt.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers_interpret import SequenceClassificationExplainer, MultiLabelClassificationExplainer
|
2 |
+
|
3 |
+
def explainer(input_statement, model, tokenizer):
|
4 |
+
# cls_explainer = SequenceClassificationExplainer(model, tokenizer)
|
5 |
+
cls_explainer = MultiLabelClassificationExplainer(model, tokenizer)
|
6 |
+
word_attributions = cls_explainer(input_statement)
|
7 |
+
return dict(word_attributions)
|
8 |
+
|
load_model_pt.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
from transformers import AutoTokenizer
|
3 |
+
from transformers import AutoModelForSequenceClassification
|
4 |
+
|
5 |
+
|
6 |
+
def load_pipeline(input_statement, pretrained_model_name):
|
7 |
+
classifier_ = pipeline("text-classification", model=pretrained_model_name, framework="pt")
|
8 |
+
cls_output = classifier_(input_statement)[0]
|
9 |
+
return cls_output
|
10 |
+
|
11 |
+
def load_models_from_pretrained(checkpoint):
|
12 |
+
checkpoint_local = checkpoint
|
13 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint_local)
|
14 |
+
model = AutoModelForSequenceClassification.from_pretrained(checkpoint_local)
|
15 |
+
return tokenizer, model
|
video2txt.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import speech_recognition as sr
|
2 |
+
import moviepy.editor as mp
|
3 |
+
import os
|
4 |
+
|
5 |
+
|
6 |
+
def convert_video_to_audio(file_path):
|
7 |
+
vid_clip = mp.VideoFileClip(file_path)
|
8 |
+
vid_clip.audio.write_audiofile("temp_converted_mp3.wav")
|
9 |
+
|
10 |
+
def convert_audio_to_text():
|
11 |
+
recogniser = sr.Recognizer()
|
12 |
+
audio_file = sr.AudioFile(r"temp_converted_mp3.wav")
|
13 |
+
with audio_file as source:
|
14 |
+
audio_source = recogniser.record(source)
|
15 |
+
audio_result = recogniser.recognize_google(audio_source)
|
16 |
+
os.remove("temp_converted_mp3.wav")
|
17 |
+
return audio_result
|
18 |
+
|
19 |
+
def read_video_file_and_return_text(filepath=r"C:\Users\Shringar\Documents\Python Scripts\hface\course\emma_1.mp4"):
|
20 |
+
convert_video_to_audio(filepath)
|
21 |
+
converted_text = convert_audio_to_text()
|
22 |
+
return converted_text
|