Spaces:

HalalFoodNLP
/

halalnlp

Running

App Files Files Community

halalnlp / app.py

aisyahhrazak

Update app.py

535c2fb verified about 1 month ago

raw

history blame

16.4 kB

	import gradio as gr
	from transformers import pipeline, AutoTokenizer
	from classifier import MistralForSequenceClassification
	import torch
	import nltk
	import json
	import pandas as pd
	import plotly.graph_objects as go
	from wordcloud import WordCloud
	import matplotlib.pyplot as plt
	import io
	import base64
	from PIL import Image
	from nltk import bigrams
	import malaya
	from collections import Counter
	import os

	HF_TOKEN = os.getenv('hf_token')

	hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN,'aisyahhrazak/tpb-crowdsourced-dataset')

	with open('en.json') as fopen:
	en = json.load(fopen)

	stopwords = malaya.text.function.get_stopwords()
	stopwords = stopwords + en + ['lor', 'quote','Quote','QUOTE','pm', 'long', 'jer', 'time', 'feel', 'liao', 'wow', 'https', 'http', 've', 'ko', 'kena', 'post', 'ni', 'tu', 'don', 'je', 'jeh', 'la', 'tau', 'haha', 'hahaha', 'hahahaha']
	stopwords += ['for me', 'to be', 'in the', 'me to', 'for me to']

	nltk.download('punkt', quiet=True)
	nltk.download('punkt_tab', quiet=True)
	nltk.download('stopwords', quiet=True)
	nltk.download('vader_lexicon', quiet=True)
	tokenizer_tpb = AutoTokenizer.from_pretrained('mesolitica/malaysian-mistral-191M-MLM-512')
	model_tpb = MistralForSequenceClassification.from_pretrained('HalalFoodNLP/tpb-model-halal', torch_dtype=torch.bfloat16)
	model_sentiment = MistralForSequenceClassification.from_pretrained('malaysia-ai/sentiment-mistral-191M-MLM', torch_dtype=torch.bfloat16)
	pipeline_tpb = pipeline(task="text-classification", model=model_tpb, tokenizer=tokenizer_tpb)
	sentiment_pipeline = pipeline("sentiment-analysis", model=model_sentiment, tokenizer=tokenizer_tpb)

	data = []
	with open('sentiment-tpb-dataset.jsonl', 'r') as file:
	for line in file:
	data.append(json.loads(line))

	df = pd.DataFrame(data)

	# Update the generate_wordcloud function to return a PIL Image object
	def generate_wordcloud(text):
	# Generate the word cloud
	wordcloud = WordCloud(width=300, height=200, background_color='white').generate(text)

	# Create the plot
	plt.figure(figsize=(10, 5))
	plt.imshow(wordcloud, interpolation='bilinear')
	plt.axis('off')
	plt.tight_layout(pad=0)

	# Save the plot to a bytes buffer
	buf = io.BytesIO()
	plt.savefig(buf, format='png')
	plt.close()
	buf.seek(0)

	# Convert bytes buffer to PIL Image
	image = Image.open(buf)
	return image

	# Add a function to generate bigrams
	def generate_bigrams(text):
	words = nltk.word_tokenize(text.lower())
	words = [word for word in words if word.isalnum() and word not in stopwords]
	bi_grams = list(bigrams(words))
	return Counter(bi_grams).most_common(10)

	def predict_decision(sentiment_label):
	if sentiment_label == 'positive':
	return "High likelihood of purchase"
	elif sentiment_label == 'neutral':
	return "Moderate likelihood of purchase"
	else:
	return "Low likelihood of purchase"

	# Function to generate report based on TPB sentiment
	def generate_report(tpb_sentiment_df):
	report = "## TPB Factor Analysis and Recommendations Report\n\n"

	for _, row in tpb_sentiment_df.iterrows():
	tpb_label = row['tpb_label']
	positive_percentage = row['positive']
	negative_percentage = row['negative']


	if negative_percentage > 70: # Only generate recommendations for positive < 70%
	if tpb_label == "attitude":
	report += f"### {tpb_label.capitalize()} ({negative_percentage:.1f}% Negative)\n"
	report += """
	Current Issues:
	- High negative perception regarding product quality
	- Concerns about halal certification and its authenticity
	- Pricing issues in comparison to perceived value

	Recommended Actions:

	1. Quality Control Improvements
	- Implement enhanced product quality measures
	- Obtain globally recognized halal certifications
	- Conduct regular quality audits

	2. Educational Campaigns
	- Educate customers on halal certification processes
	- Raise awareness about the health benefits of halal products
	- Highlight ethical and sustainable sourcing

	3. Pricing Strategy Adjustment
	- Reassess pricing to align with customer expectations
	- Introduce discount programs or loyalty initiatives
	"""
	if tpb_label == "religious knowledge":
	report += f"### {tpb_label.capitalize()} ({negative_percentage:.1f}% Negative)\n"
	report += """
	Current Issues:
	- Lack of awareness and understanding about the halal process
	- Customers may be unsure of the religious guidelines followed

	Recommended Actions:

	1. Religious Knowledge Enhancement
	- Provide clear educational materials on the halal process
	- Collaborate with religious scholars to endorse products
	- Ensure transparent labeling and certification

	2. Community Engagement
	- Host webinars or community events about halal
	- Partner with local religious organizations for outreach
	- Share customer testimonials emphasizing trust in your certification
	"""

	if tpb_label == "subjective norms":
	report += f"### {tpb_label.capitalize()} ({negative_percentage:.1f}% Negative)\n"
	report += """
	Current Issues:
	- Social influence or peer pressure regarding halal compliance is weak
	- Lack of community-driven recommendations for the product

	Recommended Actions:

	1. Influence Social Circles
	- Engage community leaders or influencers to endorse products
	- Create social campaigns around the halal certification to enhance peer recommendations

	2. Referral Programs
	- Introduce referral programs where existing customers can promote the product
	- Offer incentives for customers who share their experiences with others

	3. Testimonials and Success Stories
	- Use customer testimonials and success stories to strengthen social trust
	"""

	if tpb_label == "perceived behavioural control":
	report += f"### {tpb_label.capitalize()} ({negative_percentage:.1f}% Negative)\n"
	report += """
	Current Issues:
	- Perceived difficulty in understanding or accessing halal-certified products
	- Concerns about control over product quality and sourcing transparency

	Recommended Actions:

	1. Improve Accessibility
	- Make halal products more accessible through multiple platforms (e-commerce, retail stores)
	- Ensure ease of purchase and fast delivery options

	2. Enhance Transparency
	- Provide detailed information about sourcing and production processes
	- Use blockchain or similar technology to enhance transparency in halal certification

	3. Customer Empowerment
	- Offer customer feedback channels to empower users to voice concerns and suggestions
	- Ensure that concerns are addressed promptly to build trust and satisfaction
	"""

	return report


	def search_company(keyword):
	if not keyword:
	return None, None, None, None

	filtered_df = df[df['text'].str.contains(keyword, case=False)]

	if filtered_df.empty:
	return None, None, None, None

	# Calculate sentiment distribution
	sentiment_counts = filtered_df['label'].value_counts(normalize=True) * 100

	colors = ['red' if sentiment == 'negative' else 'gray' if sentiment == 'neutral' else 'blue' for sentiment in sentiment_counts.index]

	# Create the bar plot
	sentiment_fig = go.Figure(data=[go.Bar(
	x=sentiment_counts.index,
	y=sentiment_counts.values,
	text=[f'{val:.1f}%' for val in sentiment_counts.values],
	textposition='auto',
	marker_color=colors
	)])

	sentiment_fig.update_layout(
	title='Overall Sentiment Distribution',
	xaxis_title='Sentiment',
	yaxis_title='Percentage'
	)

	tpb_counts = filtered_df['tpb_label'].value_counts(normalize=True) * 100
	tpb_fig = go.Figure(data=[go.Bar(
	x=tpb_counts.index,
	y=tpb_counts.values,
	text=[f'{val:.1f}%' for val in tpb_counts.values],
	textposition='auto'
	)])
	tpb_fig.update_layout(title='Overall TPB Factor Distribution', xaxis_title='TPB Factor', yaxis_title='Percentage')

	# Calculate sentiment distribution within each TPB factor
	tpb_sentiment_df = filtered_df.groupby(['tpb_label', 'label']).size().unstack(fill_value=0)
	tpb_sentiment_df = tpb_sentiment_df.div(tpb_sentiment_df.sum(axis=1), axis=0) * 100

	# Define colors for each sentiment
	color_map = {
	'negative': 'red',
	'neutral': 'gray',
	'positive': 'blue'
	}

	tpb_sentiment_fig = go.Figure()
	for sentiment in tpb_sentiment_df.columns:
	tpb_sentiment_fig.add_trace(go.Bar(
	name=sentiment,
	x=tpb_sentiment_df.index,
	y=tpb_sentiment_df[sentiment],
	text=[f'{val:.1f}%' for val in tpb_sentiment_df[sentiment]],
	textposition='auto',
	marker_color=color_map.get(sentiment, 'gray')
	))

	tpb_sentiment_fig.update_layout(
	barmode='stack',
	title='Sentiment Distribution within TPB Factors',
	xaxis_title='TPB Factor',
	yaxis_title='Percentage'
	)

	report = generate_report(tpb_sentiment_df.reset_index())

	wordclouds = {}
	bigrams_data = {}
	for label in filtered_df['tpb_label'].unique():
	text = ' '.join(filtered_df[filtered_df['tpb_label'] == label]['text']).replace('QUOTE','').replace('quote','').replace('sijil halal','').replace('halal','')
	wordclouds[label] = generate_wordcloud(text)
	bigrams_data[label] = generate_bigrams(text)

	# Extract only the words
	words_only = {
	key: [word_pair for word_pair, _ in value]
	for key, value in bigrams_data.items()
	}
	# Create a single DataFrame for bigrams, with only the bigram text (no frequency)
	bigram_df = pd.DataFrame({
	label: data for label, data in words_only.items()
	})

	print(bigrams_data.items())
	bigram_df.index = [f"Top {i+1}" for i in range(len(bigram_df))]

	return (sentiment_fig, tpb_fig, tpb_sentiment_fig, filtered_df[filtered_df['text'].str.len() < 300].head(5),
	report, wordclouds.get('attitude'), wordclouds.get('religious knowledge'),
	wordclouds.get('subjective norms'), wordclouds.get('perceived behavioural control'),bigram_df)



	def text_classification_and_sentiment(text, keywords_df):
	result_tpb = pipeline_tpb(text)
	tpb_label = result_tpb[0]['label']
	tpb_score = result_tpb[0]['score']

	result_sentiment = sentiment_pipeline(text)
	sentiment_label = result_sentiment[0]['label']
	sentiment_score = result_sentiment[0]['score']

	keywords_df = pd.read_excel('IMG_8137.xlsx')

	# Check for keywords in the first column of the DataFrame
	keywords = keywords_df.iloc[:, 0].tolist()
	for keyword in keywords:
	if keyword.lower() in text.lower():
	sentiment_label = 'negative'
	sentiment_score = 1.0

	decision = predict_decision(sentiment_label)

	tpb_output = f"TPB Label: {tpb_label}"
	sentiment_output = f"Sentiment: {sentiment_label}\nProbability: {sentiment_score * 100:.2f}%"
	decision_output = f"Decision: {decision}"

	return tpb_output, sentiment_output, decision_output


	examples = [
	"Alhamdulillah, hari ni dapat makan dekat restoran halal baru. Rasa puas hati dan tenang bila tau makanan yang kita makan dijamin halal.",
	"Semua orang cakap kena check logo halal sebelum beli makanan. Dah jadi macam second nature dah sekarang. Korang pun sama kan?"
	]

	css = """
	:root {
	--bg: #FFFFFF; /* Set the background color to white */
	--col: #191919; /* Define primary text color */
	--bg-dark: #000000; /* Define dark background color if needed */
	--col-dark: #ECF2F7; /* Define dark text color if needed */
	----body-background-fill: #FFFFFF;
	}

	html, body {
	background-color: var(--bg); /* Set the background color to white for the entire page */
	margin: 0; /* Remove default body margin */
	padding: 0; /* Remove default body padding */
	}

	.container {
	max-width: 1000px;
	margin: auto;
	padding: 20px;
	}

	.title {
	text-align: center;
	margin-bottom: 20px;
	}

	.nav-buttons {
	display: flex;
	justify-content: center;
	gap: 10px;
	margin-bottom: 20px;
	}

	#recommendation_report {
	background-color: #f9f9f9; /* Keep this background light for the report section */
	padding: 20px;
	border: 2px solid #e0e0e0;
	border-radius: 10px;
	margin-top: 20px;
	font-family: Arial, sans-serif;
	font-size: 14px;
	}

	.wrap-text {
	white-space: normal !important;
	word-wrap: break-word;
	}

	.footer {visibility: hidden}

	"""

	with gr.Blocks(css=css + """
	body, .gradio-container, .root, .wrap, #root .background .container {
	background-color: white !important;
	background-image: none !important;
	background-fill: white !important;
	}

	""", theme='aisyahhrazak/miku-aisyah@=1.2.2') as demo:

	with gr.Tabs() as tabs:
	with gr.TabItem("User View", id=0):
	gr.Markdown("## Text Classification and Sentiment Analysis Based on User Input About Halal Food Acquisition")
	gr.Markdown("Enter a text to see TPB classification, sentiment analysis, and purchase prediction results!")
	input_text = gr.Textbox(lines=2, label="Input Comment", placeholder="Model can make mistakes, we are striving to improve the model.")
	with gr.Row():
	tpb_output = gr.Textbox(lines=3, label="TPB Classification")
	sentiment_output = gr.Textbox(lines=3, label="Sentiment Analysis")
	decision_output = gr.Textbox(lines=3, label="Purchase Prediction")
	# This needs to be called at some point prior to the first call to callback.flag()
	hf_writer.setup([input_text,tpb_output, sentiment_output], "flagged_data_points")
	classify_button = gr.Button("Analyze")
	classify_button.click(lambda *args: hf_writer.flag(list(args)),fn=text_classification_and_sentiment, inputs=input_text, outputs=[tpb_output, sentiment_output, decision_output])
	gr.Examples(examples=examples, inputs=input_text)



	with gr.TabItem("Company View", id=1):
	gr.Markdown("# Sentiment Analysis and Purchase Decision Factor for Halal Food Acquisition")

	input_text = gr.Textbox(lines=1, label="Search Keyword", placeholder="Enter keyword")
	search_button = gr.Button("Search")

	with gr.Row():
	sentiment_chart = gr.Plot(label="Sentiment Distribution")
	tpb_chart = gr.Plot(label="TPB Factor Distribution")

	tpb_sentiment_chart = gr.Plot(label="Sentiment Distribution within TPB Factors")
	# Update word cloud outputs to be in a single row
	gr.Markdown("### Word Clouds by TPB Label")

	with gr.Row():
	attitude_wc = gr.Image(label="Attitude Word Cloud", height=200, width=300)
	religious_knowledge_wc = gr.Image(label="Religious Knowledge Word Cloud", height=200, width=300)
	subjective_norms_wc = gr.Image(label="Subjective Norms Word Cloud",height=200, width=300)
	perceived_behavioural_control_wc = gr.Image(label="Perceived Behavioural Control Word Cloud", height=200, width=300)

	with gr.Accordion("See Recommendation Details"):
	report_output = gr.Markdown(label="Recommendation Report", elem_id="recommendation_report")

	gr.Markdown("### Top Bigrams by TPB Label")
	bigram_table = gr.Dataframe(label="Top Bigrams for Each TPB Label")

	output_table = gr.Dataframe(
	headers=["text", "tpb_label", "sentiment", "score"],
	label="Company Analysis Results",
	wrap=True
	)

	search_button.click(
	fn=search_company,
	inputs=input_text,
	outputs=[
	sentiment_chart, tpb_chart, tpb_sentiment_chart, output_table, report_output,
	attitude_wc, religious_knowledge_wc, subjective_norms_wc, perceived_behavioural_control_wc,bigram_table
	]
	)

	demo.launch()