Spaces:
Sleeping
Sleeping
import os | |
import re | |
import pickle | |
import numpy as np | |
import pandas as pd | |
import nltk | |
import gradio as gr | |
from sklearn.metrics.pairwise import cosine_similarity | |
class HadithClassificationApp: | |
def __init__(self): | |
# Download NLTK resources if needed | |
nltk.download('punkt') | |
# Load the dataset and labels | |
self.dataset = pd.read_csv("Preprocess_LK_Hadith_dataset.csv") | |
self.labels = self.dataset['Arabic_Grade'] | |
# Load the models | |
with open("tfidf_vectorizer.pkl", "rb") as f: | |
self.vectorizer = pickle.load(f) | |
with open("cosine_similarity_model.pkl", "rb") as f: | |
self.X = pickle.load(f) | |
def remove_tashkeel(text): | |
tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]') | |
return re.sub(tashkeel_pattern, '', text) | |
def preprocess_arabic_text(self, text): | |
text = self.remove_tashkeel(text) | |
tokens = nltk.word_tokenize(text) | |
cleaned_tokens = [token for token in tokens if token.isalnum()] | |
lowercase_tokens = [token.lower() for token in cleaned_tokens] | |
return " ".join(lowercase_tokens) | |
def predict_label(self, input_text, threshold=0.5): | |
input_text = self.preprocess_arabic_text(input_text) | |
input_vector = self.vectorizer.transform([input_text]) | |
similarities = cosine_similarity(input_vector, self.X).flatten() | |
max_index = np.argmax(similarities) | |
max_similarity = similarities[max_index] | |
if max_similarity >= threshold: | |
return self.labels.iloc[max_index] | |
else: | |
return "No similar text found in dataset" | |
def classify_hadith(self, input_text): | |
return self.predict_label(input_text) | |
if __name__ == "__main__": | |
# Initialize the app | |
hadith_classification_app = HadithClassificationApp() | |
# Set up the Gradio interface | |
iface = gr.Interface( | |
fn=hadith_classification_app.classify_hadith, | |
inputs="text", | |
outputs="text", | |
title="Hadith Classification App", | |
description="Classify Hadith text based on pre-trained model." | |
) | |
# Launch the Gradio interface | |
iface.launch() |