Spaces:

abhinavsarkar
/

TextTweakAI

Sleeping

App Files Files Community

TextTweakAI / app.py

abhinavsarkar

Create app.py

8212cca verified 19 days ago

raw

history blame

9.08 kB

	import streamlit as st
	import pandas as pd
	import textdistance
	import re
	from collections import Counter
	import torch
	from transformers import T5Tokenizer, T5ForConditionalGeneration

	# Set the page configuration as the first Streamlit command
	st.set_page_config(page_title="Spell & Grammar Checker", layout="wide")

	# Load the grammar correction model
	@st.cache_resource
	def load_grammar_model():
	model_name = 'abhinavsarkar/Google-T5-base-Grammatical_Error_Correction-Finetuned-C4-200M-550k'
	torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
	tokenizer = T5Tokenizer.from_pretrained(model_name)
	model = T5ForConditionalGeneration.from_pretrained(model_name).to(torch_device)
	return tokenizer, model, torch_device

	tokenizer, model, torch_device = load_grammar_model()

	# Load vocabulary for spell checking (optimized loading)
	@st.cache_resource
	def load_vocabulary():
	file_paths = ['Vocabulary/book.txt', 'Vocabulary/alice_in_wonderland.txt', 'Vocabulary/big.txt', 'Vocabulary/shakespeare.txt']
	words = []
	for file_path in file_paths:
	with open(file_path, 'r') as f:
	file_name_data = f.read().lower()
	words += re.findall(r'\w+', file_name_data)
	V = set(words)
	word_freq = Counter(words)
	probs = {k: word_freq[k] / sum(word_freq.values()) for k in word_freq}
	return V, word_freq, probs

	V, word_freq, probs = load_vocabulary()

	# Precompute Jaccard similarity scores for spell check
	def precompute_similarities(input_word):
	input_word = input_word.lower()
	sim = [1 - (textdistance.Jaccard(qval=2).distance(v, input_word)) for v in word_freq.keys()]
	return sim

	def my_autocorrect(input_paragraph, top_n=5):
	input_paragraph = input_paragraph.lower()
	words_in_paragraph = re.findall(r'\w+', input_paragraph)
	incorrect_words = []
	corrected_words = []
	for word in words_in_paragraph:
	if word not in V:
	sim = precompute_similarities(word)
	df = pd.DataFrame.from_dict(probs, orient='index').reset_index()
	df = df.rename(columns={'index': 'Word', 0: 'Prob'})
	df['Similarity'] = sim
	output = df.sort_values(['Similarity', 'Prob'], ascending=False).head(top_n)
	output = output[['Word', 'Similarity', 'Prob']].reset_index(drop=True)
	output.index = output.index + 1
	incorrect_words.append(word)
	corrected_words.append(output)
	return incorrect_words, corrected_words

	# Function for grammar correction
	def correct_grammar(input_text, num_return_sequences=2):
	batch = tokenizer([input_text], truncation=True, padding='max_length', max_length=64, return_tensors="pt").to(torch_device)
	translated = model.generate(**batch, max_length=64, num_beams=4, num_return_sequences=num_return_sequences, temperature=1.5)
	tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
	return tgt_text

	# Streamlit app layout
	def main():
	st.title("📚 Intelligent Spell & Grammar Checker")
	st.markdown("""
	Welcome to the Spell & Grammar Checker! This app is designed to help you improve your writing by detecting and correcting spelling and grammar errors. Simply enter a paragraph below and let the app do the rest. Each section provides unique suggestions to refine your text.
	""")

	paragraph = st.text_area("✨ Enter a paragraph to check for spelling and grammar issues:", height=200)

	# Two side-by-side sections
	col1, col2 = st.columns(2)

	# Initialize session state for storing results
	if 'spelling_results' not in st.session_state:
	st.session_state.spelling_results = None
	if 'grammar_results' not in st.session_state:
	st.session_state.grammar_results = None

	with col1:
	st.header("🔍 Spell Checker")
	st.markdown("""
	About the Spell Checker:
	Our spell checker uses a vocabulary from multiple literary texts to detect potential misspellings. It offers suggestions ranked by similarity and probability, helping you to identify and correct errors with ease.
	How to use:
	Enter a paragraph and click Check Spelling to see any misspelled words along with suggestions.
	""")

	if st.button("Check Spelling"):
	if paragraph:
	with st.spinner("Checking spelling..."):
	incorrect_words, corrected_words = my_autocorrect(paragraph)
	if incorrect_words:
	st.session_state.spelling_results = (incorrect_words, corrected_words)
	else:
	st.session_state.spelling_results = ("✅ No spelling errors detected!", [])
	else:
	st.warning("Please enter a paragraph to check for spelling.")

	if st.session_state.spelling_results:
	incorrect_words, corrected_words = st.session_state.spelling_results
	if isinstance(incorrect_words, str):
	st.success(incorrect_words)
	else:
	st.subheader("🔴 Spelling Errors & Suggestions:")
	for i, word in enumerate(incorrect_words):
	st.write(f"Misspelled Word: `{word}`")
	with st.expander(f"Suggestions for `{word}`"):
	suggestions_df = corrected_words[i]
	st.table(suggestions_df[['Word', 'Similarity', 'Prob']])

	with col2:
	st.header("📝 Grammar Checker")
	st.markdown("""
	About the Grammar Checker:
	Powered by a fine-tuned T5 model, our grammar checker analyzes each sentence for potential errors in structure, tense, and word choice. It offers refined suggestions to enhance readability and grammatical accuracy.
	How to use:
	Enter a paragraph and click Check Grammar to review each sentence with suggested improvements.
	""")

	if st.button("Check Grammar"):
	if paragraph:
	with st.spinner("Checking grammar..."):
	sentences = re.split(r'(?<=[.!?]) +', paragraph)
	grammar_results = []
	for sentence in sentences:
	if sentence.strip():
	corrected_sentences = correct_grammar(sentence, num_return_sequences=2)
	grammar_results.append((sentence, corrected_sentences))
	st.session_state.grammar_results = grammar_results
	else:
	st.warning("Please enter a paragraph to check for grammar.")

	if st.session_state.grammar_results:
	st.subheader("🔵 Grammar Corrections:")
	for sentence, corrected_sentences in st.session_state.grammar_results:
	with st.expander(f"Original Sentence: {sentence}", expanded=True):
	st.write("### Suggestions:")
	for corrected_sentence in corrected_sentences:
	st.write(f"- {corrected_sentence}")

	# Model details section
	st.markdown("---")
	st.header("📘 Grammar Checker Information")

	st.markdown("""
	### Grammar Checker Model
	The Grammar Checker model, fine-tuned for grammatical error correction (GEC), is ideal for enhancing writing quality across various domains. Below, you'll find relevant resources related to this model's development and usage.

	- 🔗 [Finetuned Model on Hugging Face](https://huggingface.co/abhinavsarkar/Google-T5-base-Grammatical_Error_Correction-Finetuned-C4-200M-550k)
	Access the model details, fine-tuning specifics, and download options on Hugging Face.

	- 📊 [Used Dataset on Hugging Face](https://huggingface.co/datasets/abhinavsarkar/C4-200m-550k-Determiner)
	Explore the pre-processed dataset used to train this model.

	- 📂 [Original Dataset URL](https://www.kaggle.com/datasets/felixstahlberg/the-c4-200m-dataset-for-gec)
	This dataset contains 200 million sentences with diverse structures, hosted on Kaggle.

	- 🛠️ [GitHub Repository](https://github.com/AbhinavSarkarr/Spell-and-Grammer-Checker)
	Access the code repository for dataset preparation, model training, and additional development resources.
	""")

	# Spell Checker Information
	st.markdown("---")
	st.header("🔍 Spell Checker Information")

	st.markdown("""
	### Spell Checker
	The Spell Checker leverages a corpus containing multiple text resources to suggest corrections for spelling errors. The algorithm uses Jaccard Similarity and Relative Probability to identify the closest matches to the input words, ensuring accuracy in suggestions.

	- 📂 [Corpus Resource](https://drive.google.com/drive/u/0/folders/1WsvpWHKUv3OI2mRce-NPg4HsVPyhfk0e)
	The vocabulary for this checker is based on a collection of literary works and publicly available texts.
	""")

	# Run the app
	if __name__ == "__main__":
	main()