Spaces:
Sleeping
Sleeping
File size: 3,314 Bytes
b94d9cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# """
# Author: Amir Hossein Kargaran
# Date: August, 2023
# Description: This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.
# MIT License
# Some part of the code is adopted from here: https://gist.github.com/ageitgey/60a8b556a9047a4ca91d6034376e5980
# """
import gradio as gr
from io import BytesIO
import base64
from fasttext.FastText import _FastText
import re
import lime.lime_text
import numpy as np
from pathlib import Path
from huggingface_hub import hf_hub_download
# Load the FastText language identification model from Hugging Face Hub
model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin")
# Create the FastText classifier
classifier = _FastText(model_path)
def remove_label_prefix(item):
"""
Remove label prefix from an item
"""
return item.replace('__label__', '')
def remove_label_prefix_list(input_list):
"""
Remove label prefix from list or list of list
"""
if isinstance(input_list[0], list):
# If the first element is a list, it's a list of lists
return [[remove_label_prefix(item) for item in inner_list] for inner_list in input_list]
else:
# Otherwise, it's a simple list
return [remove_label_prefix(item) for item in input_list]
# Get the sorted class names from the classifier
class_names = remove_label_prefix_list(classifier.labels)
class_names = np.sort(class_names)
num_class = len(class_names)
def tokenize_string(string):
"""
Splits the string into words similar to FastText's method.
"""
return string.split()
explainer = lime.lime_text.LimeTextExplainer(
split_expression=tokenize_string,
bow=False,
class_names=class_names
)
def fasttext_prediction_in_sklearn_format(classifier, texts):
"""
Converts FastText predictions into Scikit-Learn format predictions.
"""
res = []
labels, probabilities = classifier.predict(texts, num_class)
# Remove label prefix
labels = remove_label_prefix_list(labels)
for label, probs, text in zip(labels, probabilities, texts):
order = np.argsort(np.array(label))
res.append(probs[order])
return np.array(res)
def generate_explanation_html(input_sentence):
"""
Generates an explanation HTML file using LIME for the input sentence.
"""
preprocessed_sentence = input_sentence # No need to preprocess anymore
exp = explainer.explain_instance(
preprocessed_sentence,
classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x),
top_labels=2,
num_features=20,
)
output_html_filename = "explanation.html"
exp.save_to_file(output_html_filename)
return output_html_filename
def download_html_file(html_filename):
"""
Downloads the content of the given HTML file.
"""
with open(html_filename, "rb") as file:
html_content = file.read()
return html_content
input_sentence = gr.inputs.Textbox(label="Input Sentence") # Change the label if needed
output_explanation = gr.outputs.File(label="Download Explanation HTML")
gr.Interface(
fn=generate_explanation_html,
inputs=input_sentence,
outputs=output_explanation,
allow_flagging='never'
).launch()
|