File size: 1,205 Bytes
6d26209
 
1b7edf3
 
 
 
6d26209
1b7edf3
 
6d26209
 
 
1b7edf3
6d26209
 
1b7edf3
6d26209
 
 
1b7edf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d26209
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import gradio as gr
from transformers import pipeline
from datasets import Dataset, DatasetDict
import pandas as pd
import numpy as np
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification,Trainer, TrainingArguments

model = RobertaForSequenceClassification.from_pretrained('Prakhar618/Gptdetect')
tokenizer = RobertaTokenizerFast.from_pretrained('Prakhar618/Gptdetect', max_length = 256)

def predict(text):
    # Convert test dataframe to Hugging Face dataset
    test_dataset = Dataset.from_pandas(pd.DataFrame(text,columns=['text']))

    # Apply the tokenization function to the train dataset
    train_dataset1 = test_dataset.map(tokenize_function, batched=True,)
    predictions, label_probs, _ = trainer.predict(train_dataset1)
    y_pred = np.argmax(predictions, axis=1)
    return y_pred
    

def tokenize_function(examples):
    return tokenizer(examples['text'], padding=True, truncation=True,
        max_length=256)

test_args = TrainingArguments(
    do_train=False,
    do_predict=True,
    per_device_eval_batch_size = 2

)
trainer = Trainer(
    model=model,
    args=test_args,
)

iface = gr.Interface(fn=predict, inputs="text", outputs="text")
iface.launch()