File size: 3,314 Bytes
b94d9cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# """
# Author: Amir Hossein Kargaran
# Date: August, 2023

# Description: This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.

# MIT License

# Some part of the code is adopted from here: https://gist.github.com/ageitgey/60a8b556a9047a4ca91d6034376e5980
# """

import gradio as gr
from io import BytesIO
import base64
from fasttext.FastText import _FastText
import re
import lime.lime_text
import numpy as np
from pathlib import Path
from huggingface_hub import hf_hub_download

# Load the FastText language identification model from Hugging Face Hub
model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin")

# Create the FastText classifier
classifier = _FastText(model_path)

def remove_label_prefix(item):
    """
    Remove label prefix from an item
    """
    return item.replace('__label__', '')

def remove_label_prefix_list(input_list):
    """
    Remove label prefix from list or list of list
    """
    if isinstance(input_list[0], list):
        # If the first element is a list, it's a list of lists
        return [[remove_label_prefix(item) for item in inner_list] for inner_list in input_list]
    else:
        # Otherwise, it's a simple list
        return [remove_label_prefix(item) for item in input_list]


# Get the sorted class names from the classifier
class_names = remove_label_prefix_list(classifier.labels)
class_names = np.sort(class_names)
num_class = len(class_names)


def tokenize_string(string):
    """
    Splits the string into words similar to FastText's method.
    """
    return string.split()

explainer = lime.lime_text.LimeTextExplainer(
    split_expression=tokenize_string,
    bow=False,
    class_names=class_names
)

def fasttext_prediction_in_sklearn_format(classifier, texts):
    """
    Converts FastText predictions into Scikit-Learn format predictions.
    """
    res = []
    labels, probabilities = classifier.predict(texts, num_class)
    
    # Remove label prefix
    labels = remove_label_prefix_list(labels)
    
    for label, probs, text in zip(labels, probabilities, texts):
        order = np.argsort(np.array(label))
        res.append(probs[order])

    return np.array(res)

def generate_explanation_html(input_sentence):
    """
    Generates an explanation HTML file using LIME for the input sentence.
    """
    preprocessed_sentence = input_sentence  # No need to preprocess anymore
    exp = explainer.explain_instance(
        preprocessed_sentence,
        classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x),
        top_labels=2,
        num_features=20,
    )

    output_html_filename = "explanation.html"
    exp.save_to_file(output_html_filename)

    return output_html_filename

def download_html_file(html_filename):
    """
    Downloads the content of the given HTML file.
    """
    with open(html_filename, "rb") as file:
        html_content = file.read()
    return html_content

input_sentence = gr.inputs.Textbox(label="Input Sentence")  # Change the label if needed
output_explanation = gr.outputs.File(label="Download Explanation HTML")

gr.Interface(
    fn=generate_explanation_html,
    inputs=input_sentence,
    outputs=output_explanation,
    allow_flagging='never'
).launch()