Spaces:

eliolio
/

yelp-reviews

Build error

yelp-reviews / app.py

EliottZemour

aaa

c0cf912 about 2 years ago

4.62 kB

	import torch
	import transformers
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
	import gradio as gr
	import os

	model_name = 'eliolio/bart-finetuned-yelpreviews'
	bert_model_name = 'eliolio/bert-correlation-yelpreviews'

	access_token = os.environ.get('private_token')

	model = AutoModelForSeq2SeqLM.from_pretrained(
	model_name, use_auth_token=access_token
	)
	tokenizer = AutoTokenizer.from_pretrained(
	model_name, use_auth_token=access_token
	)

	bert_tokenizer = AutoTokenizer.from_pretrained(
	bert_model_name, use_auth_token=access_token
	)
	bert_model = AutoModelForSequenceClassification.from_pretrained(
	bert_model_name, use_auth_token=access_token
	)


	def correlation_score(table, review):
	# Compute the correlation score
	args = ((table, review))
	inputs = bert_tokenizer(*args, padding=True, max_length=128, truncation=True, return_tensors="pt")
	logits = bert_model(**inputs).logits
	probs = logits.softmax(dim=-1)
	return {
	"correlation": probs[:, 1].item()
	}

	def create_prompt(stars, useful, funny, cool):
	return f"Generate review: stars: {stars}, useful: {useful}, funny: {funny}, cool: {cool}"


	def postprocess(review):
	dot = review.rfind('.')
	return review[:dot+1]


	def generate_reviews(stars, useful, funny, cool):
	text = create_prompt(stars, useful, funny, cool)
	inputs = tokenizer(text, return_tensors='pt')
	out = model.generate(
	input_ids=inputs.input_ids,
	attention_mask=inputs.attention_mask,
	do_sample=True,
	num_return_sequences=3,
	temperature=1.2,
	top_p=0.9
	)
	reviews = []
	scores = []
	for review in out:
	reviews.append(postprocess(tokenizer.decode(review, skip_special_tokens=True)))
	for review in reviews:
	scores.append(
	correlation_score(text[17:], review)
	)

	return reviews[0], reviews[1], reviews[2], scores[0], scores[1], scores[2]


	css = """
	#ctr {text-align: center;}
	#btn {color: white; background: linear-gradient( 90deg, rgba(255,166,0,1) 14.7%, rgba(255,99,97,1) 73% );}
	"""


	md_text = """<h1 style='text-align: center; margin-bottom: 1rem'>Generating Yelp reviews with BART-base ⭐⭐⭐</h1>

	This space demonstrates how synthetic data generation can be performed on natural language columns, as found in the Yelp reviews dataset.

	\| review id \| stars \| useful \| funny \| cool \| text \|
	\|:---:\|:---:\|:---:\|:---:\|:---:\|:---:\|
	\| 0 \| 5 \| 1 \| 0 \| 1 \| "Wow! Yummy, different, delicious. Our favorite is the lamb curry and korma. With 10 different kinds of naan!!! Don't let the outside deter you (because we almost changed our minds)...go in and try something new! You'll be glad you did!"




	The model is a fine-tuned version of [facebook/bart-base](https://huggingface.com/facebook/bart-base) on Yelp reviews with the following input-output pairs:

	- Input: "Generate review: stars: 5, useful: 1, funny: 0, cool: 1"
	- Output: "Wow! Yummy, different, delicious. Our favorite is the lamb curry and korma. With 10 different kinds of naan!!! Don't let the outside deter you (because we almost changed our minds)...go in and try something new! You'll be glad you did!"
	"""

	resources = """## Resources
	- The Yelp reviews dataset can be found in json format [here](https://www.yelp.com/dataset)."""

	demo = gr.Blocks(css=css)
	with demo:
	with gr.Row():
	gr.Markdown(md_text)

	with gr.Row():
	stars = gr.inputs.Slider(minimum=0, maximum=5,
	step=1, default=0, label="stars")
	useful = gr.inputs.Slider(
	minimum=0, maximum=5, step=1, default=0, label="useful")
	funny = gr.inputs.Slider(minimum=0, maximum=5,
	step=1, default=0, label="funny")
	cool = gr.inputs.Slider(minimum=0, maximum=5,
	step=1, default=0, label="cool")
	with gr.Row():
	button = gr.Button("Generate reviews !", elem_id='btn')

	with gr.Row():
	output1 = gr.Textbox(label="Review #1")
	output2 = gr.Textbox(label="Review #2")
	output3 = gr.Textbox(label="Review #3")

	with gr.Row():
	score1 = gr.Label(label="Correlation score #1")
	score2 = gr.Label(label="Correlation score #2")
	score3 = gr.Label(label="Correlation score #3")

	with gr.Row():
	gr.Markdown(resources)

	button.click(
	fn=generate_reviews,
	inputs=[stars, useful, funny, cool],
	outputs=[output1, output2, output3, score1, score2, score3]
	)

	demo.launch()