import gradio as gr import torch import numpy as np from transformers import AutoModelForMultipleChoice, AutoTokenizer model_id = "microsoft/deberta-v2-xlarge" # Load the model and tokenizer model = AutoModelForMultipleChoice.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) # Define the preprocessing function def preprocess(text): # Split the input text into lines lines = text.strip().split("\n") samples = [] # Loop through each line and create a sample for line in lines: parts = line.split("\t") if len(parts) >= 6: sample = { "prompt": parts[0], "A": parts[1], "B": parts[2], "C": parts[3], "D": parts[4], "E": parts[5] } samples.append(sample) return samples # Define the prediction function def predict(data): results = [] for sample in data: first_sentences = [sample["prompt"]] * 5 second_sentences = [sample[option] for option in "ABCDE"] tokenized_sentences = tokenizer(first_sentences, second_sentences, truncation=True, padding=True, return_tensors="pt") inputs = tokenized_sentences["input_ids"] masks = tokenized_sentences["attention_mask"] with torch.no_grad(): logits = model(inputs, attention_mask=masks).logits predictions_as_ids = torch.argsort(-logits, dim=1) answers = np.array(list("ABCDE"))[predictions_as_ids.tolist()] results.append(["".join(i) for i in answers[:, :3]]) return results # Create the Gradio interface iface = gr.Interface( fn=predict, inputs=gr.inputs.Textbox(placeholder="Paste multiple-choice questions (prompt and options separated by tabs, one question per line) ..."), outputs=gr.outputs.Label(num_top_classes=3), live=True, title="LLM Science Exam Demo", description="Enter multiple-choice questions (prompt and options) below and get predictions.", ) # Run the interface iface.launch() iface.integrate(wandb=wandb)