Spaces:

kargaranamir
/

LangID-LIME

Sleeping

App Files Files Community

LangID-LIME / app_legacy.py

kargaranamir

add image output

b94d9cd about 1 year ago

raw

history blame

No virus

3.31 kB

	# """
	# Author: Amir Hossein Kargaran
	# Date: August, 2023

	# Description: This code applies LIME (Local Interpretable Model-Agnostic Explanations) on fasttext language identification.

	# MIT License

	# Some part of the code is adopted from here: https://gist.github.com/ageitgey/60a8b556a9047a4ca91d6034376e5980
	# """

	import gradio as gr
	from io import BytesIO
	import base64
	from fasttext.FastText import _FastText
	import re
	import lime.lime_text
	import numpy as np
	from pathlib import Path
	from huggingface_hub import hf_hub_download

	# Load the FastText language identification model from Hugging Face Hub
	model_path = hf_hub_download(repo_id="facebook/fasttext-language-identification", filename="model.bin")

	# Create the FastText classifier
	classifier = _FastText(model_path)

	def remove_label_prefix(item):
	"""
	Remove label prefix from an item
	"""
	return item.replace('__label__', '')

	def remove_label_prefix_list(input_list):
	"""
	Remove label prefix from list or list of list
	"""
	if isinstance(input_list[0], list):
	# If the first element is a list, it's a list of lists
	return [[remove_label_prefix(item) for item in inner_list] for inner_list in input_list]
	else:
	# Otherwise, it's a simple list
	return [remove_label_prefix(item) for item in input_list]


	# Get the sorted class names from the classifier
	class_names = remove_label_prefix_list(classifier.labels)
	class_names = np.sort(class_names)
	num_class = len(class_names)


	def tokenize_string(string):
	"""
	Splits the string into words similar to FastText's method.
	"""
	return string.split()

	explainer = lime.lime_text.LimeTextExplainer(
	split_expression=tokenize_string,
	bow=False,
	class_names=class_names
	)

	def fasttext_prediction_in_sklearn_format(classifier, texts):
	"""
	Converts FastText predictions into Scikit-Learn format predictions.
	"""
	res = []
	labels, probabilities = classifier.predict(texts, num_class)

	# Remove label prefix
	labels = remove_label_prefix_list(labels)

	for label, probs, text in zip(labels, probabilities, texts):
	order = np.argsort(np.array(label))
	res.append(probs[order])

	return np.array(res)

	def generate_explanation_html(input_sentence):
	"""
	Generates an explanation HTML file using LIME for the input sentence.
	"""
	preprocessed_sentence = input_sentence # No need to preprocess anymore
	exp = explainer.explain_instance(
	preprocessed_sentence,
	classifier_fn=lambda x: fasttext_prediction_in_sklearn_format(classifier, x),
	top_labels=2,
	num_features=20,
	)

	output_html_filename = "explanation.html"
	exp.save_to_file(output_html_filename)

	return output_html_filename

	def download_html_file(html_filename):
	"""
	Downloads the content of the given HTML file.
	"""
	with open(html_filename, "rb") as file:
	html_content = file.read()
	return html_content

	input_sentence = gr.inputs.Textbox(label="Input Sentence") # Change the label if needed
	output_explanation = gr.outputs.File(label="Download Explanation HTML")

	gr.Interface(
	fn=generate_explanation_html,
	inputs=input_sentence,
	outputs=output_explanation,
	allow_flagging='never'
	).launch()