Spaces:
Sleeping
Sleeping
File size: 2,200 Bytes
39ccf9b 18feff1 39ccf9b 18feff1 39ccf9b 18feff1 39ccf9b 18feff1 39ccf9b 18feff1 39ccf9b 18feff1 39ccf9b 18feff1 39ccf9b 18feff1 39ccf9b 18feff1 39ccf9b 18feff1 39ccf9b 18feff1 39ccf9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import os
import re
import pickle
import numpy as np
import pandas as pd
import nltk
import gradio as gr
from sklearn.metrics.pairwise import cosine_similarity
class HadithClassificationApp:
def __init__(self):
# Download NLTK resources if needed
nltk.download('punkt')
# Load the dataset and labels
self.dataset = pd.read_csv("Preprocess_LK_Hadith_dataset.csv")
self.labels = self.dataset['Arabic_Grade']
# Load the models
with open("tfidf_vectorizer.pkl", "rb") as f:
self.vectorizer = pickle.load(f)
with open("cosine_similarity_model.pkl", "rb") as f:
self.X = pickle.load(f)
@staticmethod
def remove_tashkeel(text):
tashkeel_pattern = re.compile(r'[\u0617-\u061A\u064B-\u0652]')
return re.sub(tashkeel_pattern, '', text)
def preprocess_arabic_text(self, text):
text = self.remove_tashkeel(text)
tokens = nltk.word_tokenize(text)
cleaned_tokens = [token for token in tokens if token.isalnum()]
lowercase_tokens = [token.lower() for token in cleaned_tokens]
return " ".join(lowercase_tokens)
def predict_label(self, input_text, threshold=0.5):
input_text = self.preprocess_arabic_text(input_text)
input_vector = self.vectorizer.transform([input_text])
similarities = cosine_similarity(input_vector, self.X).flatten()
max_index = np.argmax(similarities)
max_similarity = similarities[max_index]
if max_similarity >= threshold:
return self.labels.iloc[max_index]
else:
return "No similar text found in dataset"
def classify_hadith(self, input_text):
return self.predict_label(input_text)
if __name__ == "__main__":
# Initialize the app
hadith_classification_app = HadithClassificationApp()
# Set up the Gradio interface
iface = gr.Interface(
fn=hadith_classification_app.classify_hadith,
inputs="text",
outputs="text",
title="Hadith Classification App",
description="Classify Hadith text based on pre-trained model."
)
# Launch the Gradio interface
iface.launch() |