File size: 2,517 Bytes
3bc5cbd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
import pandas as pd


def auth(username, password):
    if username == "SIGMOID" and password == "2A4S39H7E7GR1172":
        return True
    else:
        return False


def predict(df):
    # LOAD TRAINER AND TOKENIZER AND TOKENIZE DATA
    from transformers import AutoModel, AutoTokenizer, TrainingArguments, Trainer, BertForSequenceClassification
    from datasets import Dataset
    import numpy as np
    model = BertForSequenceClassification.from_pretrained("sentiment_model", num_labels = 6)
    tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")

    df_ids = df.pop('id')
    test_dataset = Dataset.from_dict(df)
    
    from transformers import AutoTokenizer
    
    def tokenize_function(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True)

    tokenized_test_datasets = test_dataset.map(tokenize_function, batched=True)
    
    trainer = Trainer(
            model=model,                         # the instantiated Transformers model to be trained
    )
    
    # PREDICT TEXT VALUES USING LOADED MODEL AND EDIT DATAFRAME'S OFFANSIVE AND TARGET COLUMNS
    preds = trainer.predict(tokenized_test_datasets)
    max_indices = np.argmax(preds[0], axis=1)
    
    df['offansive'] = None
    df['target'] = None
    
    for i in range(len(df)):
        if max_indices[i] == 0:
            df['offansive'][i] = 1
            df["target"][i] = 'INSULT'

        elif max_indices[i] == 1:
            df['offansive'][i] = 1
            df["target"][i] = 'RACIST'

        elif max_indices[i] == 2:
            df['offansive'][i] = 1
            df["target"][i] = 'SEXIST'

        elif max_indices[i] == 3:
            df['offansive'][i] = 1
            df["target"][i] = 'PROFANITY'

        elif max_indices[i] == 4:
            df['offansive'][i] = 0
            df["target"][i] = 'OTHER'

        elif max_indices[i] == 5:
            df['offansive'][i] = 1
            df["target"][i] = 'OTHER'
    
    df['id'] = df_ids
    # *********** END ***********
    return df

def get_file(file):
    output_file = "output_SIGMOID.csv"

    # For windows users, replace path seperator
    file_name = file.name.replace("\\", "/")

    df = pd.read_csv(file_name, sep="|")

    predict(df)
    df.to_csv(output_file, index=False, sep="|")
    return (output_file)



# Launch the interface with user password
iface = gr.Interface(get_file, "file", "file")

if __name__ == "__main__":
    iface.launch(share=True, auth=auth)