Jordan commited on
Commit
10f417b
1 Parent(s): e16fb05

Unbias - Version one push

Browse files
Files changed (7) hide show
  1. app.py +27 -4
  2. bias_check.py +47 -0
  3. combine_modules.py +17 -0
  4. img2txt.py +19 -0
  5. interpret_model_pt.py +8 -0
  6. load_model_pt.py +15 -0
  7. video2txt.py +22 -0
app.py CHANGED
@@ -1,7 +1,30 @@
 
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import combine_modules
2
  import gradio as gr
3
 
4
+ with gr.Blocks() as demo:
5
+ gr.Markdown("Welcome to Bias Checking Demo !")
6
+ with gr.Tab("Text"):
7
+ text_input = gr.Textbox()
8
+ text_output = gr.Textbox()
9
+ text_output_interpret = gr.TextArea()
10
+ text_button = gr.Button("Check Bias in your Text")
11
+
12
+ with gr.Tab("Video"):
13
+ vid_input = gr.Video()
14
+ vid_output = gr.Textbox()
15
+ vid_output_interpret = gr.TextArea()
16
+ vid_button = gr.Button("Check Bias in your Video")
17
 
18
+ with gr.Tab("Image"):
19
+ img_input = gr.Image()
20
+ img_output = gr.Textbox()
21
+ img_output_interpret = gr.TextArea()
22
+ img_button = gr.Button("Check Bias in your Image")
23
+
24
+ text_button.click(combine_modules.app_nlp_start, inputs=text_input, outputs=[text_output, text_output_interpret])
25
+ vid_button.click(combine_modules.app_video_start, inputs=vid_input, outputs=[vid_output, vid_output_interpret])
26
+ img_button.click(combine_modules.app_image_start, inputs=img_input, outputs=[img_output, img_output_interpret])
27
+
28
+
29
+ if __name__=="__main__":
30
+ demo.launch()
bias_check.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import load_model_pt
2
+ import interpret_model_pt
3
+
4
+
5
+ def sub_pipeline(raw_input, pretrained_model):
6
+ tokenizer, model = load_model_pt.load_models_from_pretrained(pretrained_model)
7
+ output_ = load_model_pt.load_pipeline(raw_input, pretrained_model)
8
+ words_weightages = interpret_model_pt.explainer(raw_input, model, tokenizer)
9
+ return output_, words_weightages
10
+
11
+ def bias_checker(input_statement):
12
+ pretrained_model_basic_check = "valurank/distilroberta-bias"
13
+ pretrained_model_political = "valurank/distilroberta-mbfc-bias"
14
+ pretrained_model_gender = "monologg/koelectra-base-v3-gender-bias"
15
+
16
+ raw_input = input_statement
17
+ # print("Checking if the input has any primary bias ?..")
18
+ output_stmt_zero, words_interpreted = sub_pipeline(raw_input, pretrained_model_basic_check)
19
+ print(output_stmt_zero)
20
+ return_var = " "
21
+ interpret_var = " "
22
+
23
+ if (output_stmt_zero["label"] == "BIASED" and output_stmt_zero["score"] >= 0.7) or (output_stmt_zero["label"] == "NEUTRAL" and output_stmt_zero["score"] < 0.6):
24
+ # print(output_stmt_zero)
25
+ # print("\n The statement seems biased, lets investigate ! \n")
26
+ # print(words_interpreted)
27
+ # print("\n Checking for political propaganda... \n")
28
+ output_stmt_political, words_interpreted_political = sub_pipeline(raw_input, pretrained_model_political)
29
+ # print(output_stmt_political, "\n")
30
+ # print(words_interpreted_political, "\n")
31
+ # print("\n Let's check for gender bias, shall we ? \n")
32
+ output_stmt_gender, words_interpreted_gender = sub_pipeline(raw_input, pretrained_model_gender)
33
+ # print(output_stmt_gender, "\n")
34
+ # print(words_interpreted_gender, "\n")
35
+ return_var = ("Generic:", output_stmt_zero,"\n","Gender:", output_stmt_gender,"\n","Political:", output_stmt_political)
36
+ interpret_var = ("Generic:", words_interpreted, "\n", "Gender:", words_interpreted_gender, "\n","Political:", words_interpreted_political)
37
+ else:
38
+ # print("The statement seems ok as of now, please input another statement!")
39
+ return_var = "The statement seems ok as of now, please input another statement!"
40
+ interpret_var = " "
41
+
42
+ return return_var, interpret_var
43
+
44
+
45
+ if __name__=="__main__":
46
+ input_stmt = "Nevertheless, Trump and other Republicans have tarred the protests as havens for terrorists intent on destroying property."
47
+ bias_checker(input_stmt)
combine_modules.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bias_check import bias_checker
2
+ from video2txt import read_video_file_and_return_text as rtxt
3
+ from img2txt import load_image_pipeline as img_pipe
4
+
5
+ def app_nlp_start(statement):
6
+ output_txt, interpreted_txt = bias_checker(statement)
7
+ return output_txt, interpreted_txt
8
+
9
+ def app_video_start(video_path):
10
+ return_text = rtxt(video_path)
11
+ output_txt = bias_checker(return_text)
12
+ return output_txt
13
+
14
+ def app_image_start(image_path):
15
+ text_generated = img_pipe(image_path)
16
+ output_txt = bias_checker(text_generated)
17
+ return output_txt
img2txt.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from PIL import Image
3
+ import os
4
+ pretrained_img_model = "nlpconnect/vit-gpt2-image-captioning"
5
+
6
+
7
+ def load_image_pipeline(img_path):
8
+ img_path_read = Image.fromarray(img_path)
9
+ img_path_read.save("temp_img.jpg")
10
+ image_to_text = pipeline("image-to-text", model=pretrained_img_model, framework="pt")
11
+ generated_text = image_to_text("temp_img.jpg")[0]["generated_text"]
12
+ os.remove("temp_img.jpg")
13
+ return generated_text
14
+
15
+
16
+ if __name__=="__main__":
17
+ imgpath = r"C:\Users\Shringar\Pictures\ar.jpg"
18
+ img_text_generated = load_image_pipeline(imgpath)
19
+ print(img_text_generated)
interpret_model_pt.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from transformers_interpret import SequenceClassificationExplainer, MultiLabelClassificationExplainer
2
+
3
+ def explainer(input_statement, model, tokenizer):
4
+ # cls_explainer = SequenceClassificationExplainer(model, tokenizer)
5
+ cls_explainer = MultiLabelClassificationExplainer(model, tokenizer)
6
+ word_attributions = cls_explainer(input_statement)
7
+ return dict(word_attributions)
8
+
load_model_pt.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from transformers import AutoTokenizer
3
+ from transformers import AutoModelForSequenceClassification
4
+
5
+
6
+ def load_pipeline(input_statement, pretrained_model_name):
7
+ classifier_ = pipeline("text-classification", model=pretrained_model_name, framework="pt")
8
+ cls_output = classifier_(input_statement)[0]
9
+ return cls_output
10
+
11
+ def load_models_from_pretrained(checkpoint):
12
+ checkpoint_local = checkpoint
13
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint_local)
14
+ model = AutoModelForSequenceClassification.from_pretrained(checkpoint_local)
15
+ return tokenizer, model
video2txt.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import speech_recognition as sr
2
+ import moviepy.editor as mp
3
+ import os
4
+
5
+
6
+ def convert_video_to_audio(file_path):
7
+ vid_clip = mp.VideoFileClip(file_path)
8
+ vid_clip.audio.write_audiofile("temp_converted_mp3.wav")
9
+
10
+ def convert_audio_to_text():
11
+ recogniser = sr.Recognizer()
12
+ audio_file = sr.AudioFile(r"temp_converted_mp3.wav")
13
+ with audio_file as source:
14
+ audio_source = recogniser.record(source)
15
+ audio_result = recogniser.recognize_google(audio_source)
16
+ os.remove("temp_converted_mp3.wav")
17
+ return audio_result
18
+
19
+ def read_video_file_and_return_text(filepath=r"C:\Users\Shringar\Documents\Python Scripts\hface\course\emma_1.mp4"):
20
+ convert_video_to_audio(filepath)
21
+ converted_text = convert_audio_to_text()
22
+ return converted_text