DeBerta / app.py
Ravi21's picture
Update app.py
227e7c3
raw
history blame contribute delete
No virus
2.1 kB
import gradio as gr
import torch
import numpy as np
from transformers import AutoModelForMultipleChoice, AutoTokenizer
model_id = "microsoft/deberta-v2-xlarge"
# Load the model and tokenizer
model = AutoModelForMultipleChoice.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Define the preprocessing function
def preprocess(text):
# Split the input text into lines
lines = text.strip().split("\n")
samples = []
# Loop through each line and create a sample
for line in lines:
parts = line.split("\t")
if len(parts) >= 6:
sample = {
"prompt": parts[0],
"A": parts[1],
"B": parts[2],
"C": parts[3],
"D": parts[4],
"E": parts[5]
}
samples.append(sample)
return samples
# Define the prediction function
def predict(data):
results = []
for sample in data:
first_sentences = [sample["prompt"]] * 5
second_sentences = [sample[option] for option in "ABCDE"]
tokenized_sentences = tokenizer(first_sentences, second_sentences, truncation=True, padding=True, return_tensors="pt")
inputs = tokenized_sentences["input_ids"]
masks = tokenized_sentences["attention_mask"]
with torch.no_grad():
logits = model(inputs, attention_mask=masks).logits
predictions_as_ids = torch.argsort(-logits, dim=1)
answers = np.array(list("ABCDE"))[predictions_as_ids.tolist()]
results.append(["".join(i) for i in answers[:, :3]])
return results
# Create the Gradio interface
iface = gr.Interface(
fn=predict,
inputs=gr.inputs.Textbox(placeholder="Paste multiple-choice questions (prompt and options separated by tabs, one question per line) ..."),
outputs=gr.outputs.Label(num_top_classes=3),
live=True,
title="LLM Science Exam Demo",
description="Enter multiple-choice questions (prompt and options) below and get predictions.",
)
# Run the interface
iface.launch()
iface.integrate(wandb=wandb)