Spaces:

Mohamed-Maher
/

Hadith_Classification

Sleeping

App Files Files Community

Hadith_Classification / app.py

Mohamed-Maher

Update app.py

39ccf9b verified 5 months ago

raw

history blame

2.2 kB

	import os
	import re
	import pickle
	import numpy as np
	import pandas as pd
	import nltk
	import gradio as gr
	from sklearn.metrics.pairwise import cosine_similarity

	class HadithClassificationApp:
	def __init__(self):
	# Download NLTK resources if needed
	nltk.download('punkt')

	# Load the dataset and labels
	self.dataset = pd.read_csv("Preprocess_LK_Hadith_dataset.csv")
	self.labels = self.dataset['Arabic_Grade']

	# Load the models
	with open("tfidf_vectorizer.pkl", "rb") as f:
	self.vectorizer = pickle.load(f)
	with open("cosine_similarity_model.pkl", "rb") as f:
	self.X = pickle.load(f)

	@staticmethod
	def remove_tashkeel(text):
	tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
	return re.sub(tashkeel_pattern, '', text)

	def preprocess_arabic_text(self, text):
	text = self.remove_tashkeel(text)
	tokens = nltk.word_tokenize(text)
	cleaned_tokens = [token for token in tokens if token.isalnum()]
	lowercase_tokens = [token.lower() for token in cleaned_tokens]
	return " ".join(lowercase_tokens)

	def predict_label(self, input_text, threshold=0.5):
	input_text = self.preprocess_arabic_text(input_text)
	input_vector = self.vectorizer.transform([input_text])
	similarities = cosine_similarity(input_vector, self.X).flatten()

	max_index = np.argmax(similarities)
	max_similarity = similarities[max_index]

	if max_similarity >= threshold:
	return self.labels.iloc[max_index]
	else:
	return "No similar text found in dataset"

	def classify_hadith(self, input_text):
	return self.predict_label(input_text)

	if __name__ == "__main__":
	# Initialize the app
	hadith_classification_app = HadithClassificationApp()

	# Set up the Gradio interface
	iface = gr.Interface(
	fn=hadith_classification_app.classify_hadith,
	inputs="text",
	outputs="text",
	title="Hadith Classification App",
	description="Classify Hadith text based on pre-trained model."
	)

	# Launch the Gradio interface
	iface.launch()