# """ # Author: Amir Hossein Kargaran # Date: August, 2023 # Description: This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification. # MIT License # Some part of the code is adopted from here: https://gist.github.com/ageitgey/60a8b556a9047a4ca91d6034376e5980 # """ import gradio as gr from io import BytesIO import base64 from fasttext.FastText import _FastText import re import lime.lime_text import numpy as np from pathlib import Path from huggingface_hub import hf_hub_download # Load the FastText language identification model from Hugging Face Hub model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin") # Create the FastText classifier classifier = _FastText(model_path) def remove_label_prefix(item): """ Remove label prefix from an item """ return item.replace('__label__', '') def remove_label_prefix_list(input_list): """ Remove label prefix from list or list of list """ if isinstance(input_list[0], list): # If the first element is a list, it's a list of lists return [[remove_label_prefix(item) for item in inner_list] for inner_list in input_list] else: # Otherwise, it's a simple list return [remove_label_prefix(item) for item in input_list] # Get the sorted class names from the classifier class_names = remove_label_prefix_list(classifier.labels) class_names = np.sort(class_names) num_class = len(class_names) def tokenize_string(string): """ Splits the string into words similar to FastText's method. """ return string.split() explainer = lime.lime_text.LimeTextExplainer( split_expression=tokenize_string, bow=False, class_names=class_names ) def fasttext_prediction_in_sklearn_format(classifier, texts): """ Converts FastText predictions into Scikit-Learn format predictions. """ res = [] labels, probabilities = classifier.predict(texts, num_class) # Remove label prefix labels = remove_label_prefix_list(labels) for label, probs, text in zip(labels, probabilities, texts): order = np.argsort(np.array(label)) res.append(probs[order]) return np.array(res) def generate_explanation_html(input_sentence): """ Generates an explanation HTML file using LIME for the input sentence. """ preprocessed_sentence = input_sentence # No need to preprocess anymore exp = explainer.explain_instance( preprocessed_sentence, classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x), top_labels=2, num_features=20, ) output_html_filename = "explanation.html" exp.save_to_file(output_html_filename) return output_html_filename def download_html_file(html_filename): """ Downloads the content of the given HTML file. """ with open(html_filename, "rb") as file: html_content = file.read() return html_content input_sentence = gr.inputs.Textbox(label="Input Sentence") # Change the label if needed output_explanation = gr.outputs.File(label="Download Explanation HTML") gr.Interface( fn=generate_explanation_html, inputs=input_sentence, outputs=output_explanation, allow_flagging='never' ).launch()