File size: 2,097 Bytes
003e0e3
debd5f8
a7a53ff
5e0d651
 
227e7c3
d95bc39
 
5e0d651
 
 
 
a7a53ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e0d651
 
 
a7a53ff
 
 
 
 
 
 
 
 
 
 
 
 
 
5e0d651
 
 
a7a53ff
 
5e0d651
 
a7a53ff
5e0d651
 
 
d95bc39
5033ff2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
import torch
import numpy as np
from transformers import AutoModelForMultipleChoice, AutoTokenizer

model_id = "microsoft/deberta-v2-xlarge"

# Load the model and tokenizer
model = AutoModelForMultipleChoice.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Define the preprocessing function
def preprocess(text):
    # Split the input text into lines
    lines = text.strip().split("\n")
    samples = []

    # Loop through each line and create a sample
    for line in lines:
        parts = line.split("\t")
        if len(parts) >= 6:
            sample = {
                "prompt": parts[0],
                "A": parts[1],
                "B": parts[2],
                "C": parts[3],
                "D": parts[4],
                "E": parts[5]
            }
            samples.append(sample)
    
    return samples

# Define the prediction function
def predict(data):
    results = []
    for sample in data:
        first_sentences = [sample["prompt"]] * 5
        second_sentences = [sample[option] for option in "ABCDE"]
        tokenized_sentences = tokenizer(first_sentences, second_sentences, truncation=True, padding=True, return_tensors="pt")
        inputs = tokenized_sentences["input_ids"]
        masks = tokenized_sentences["attention_mask"]
        with torch.no_grad():
            logits = model(inputs, attention_mask=masks).logits
        predictions_as_ids = torch.argsort(-logits, dim=1)
        answers = np.array(list("ABCDE"))[predictions_as_ids.tolist()]
        results.append(["".join(i) for i in answers[:, :3]])
    return results

# Create the Gradio interface
iface = gr.Interface(
    fn=predict,
    inputs=gr.inputs.Textbox(placeholder="Paste multiple-choice questions (prompt and options separated by tabs, one question per line) ..."),
    outputs=gr.outputs.Label(num_top_classes=3),
    live=True,
    title="LLM Science Exam Demo",
    description="Enter multiple-choice questions (prompt and options) below and get predictions.",
)

# Run the interface
iface.launch()
iface.integrate(wandb=wandb)