File size: 4,207 Bytes
8b51029
 
 
 
 
 
 
 
 
2f9bbf9
 
 
8b51029
2f9bbf9
8b51029
2f9bbf9
 
8b51029
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f9bbf9
8b51029
 
 
a863b73
4fd1c71
 
a863b73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e20b8ee
 
 
 
a863b73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b51029
a863b73
8b51029
a863b73
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# -*- coding: utf-8 -*-
"""Meena_A_Multilingual_Chatbot (1).ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1-IfUcnDUppyMArHonc_iesEcN2gSKU-j
"""

#!pip3 install transformers
#!pip install -q translate
#!pip install polyglot

#!pip install Pyicu

#!pip install Morfessor
#!pip install pycld2

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from translate import Translator
from polyglot.detect import Detector

# model_name = "microsoft/DialoGPT-large"
model_name = "microsoft/DialoGPT-large"
# model_name = "microsoft/DialoGPT-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# # chatting 5 times with nucleus sampling & tweaking temperature
# step=-1
# while(True):
#     step+=1
#     # take user input
#     text = input(">> You:>")
#     detected_language=Detector(text,quiet=True).language.code
#     translator=Translator(from_lang=detected_language,to_lang="en")
#     translated_input=translator.translate(text)
#     print(translated_input)
#     if text.lower().find("bye")!=-1:
#       print(f">> Meena:> Bye Bye!")
#       break;
#     # encode the input and add end of string token
#     input_ids = tokenizer.encode(translated_input+tokenizer.eos_token, return_tensors="pt")
#     # concatenate new user input with chat history (if there is)
#     bot_input_ids = torch.cat([chat_history_ids, input_ids], dim=-1) if step > 0 else input_ids
#     # generate a bot response
#     chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id,do_sample=True,top_p=0.9,top_k=50,temperature=0.7,num_beams=5,no_repeat_ngram_size=2)
#     #print the output
#     output = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
#     print(output)
#     translator=Translator(from_lang="en",to_lang=detected_language)
#     translated_output=translator.translate(output)
  
#     print(f">> Meena:> {translated_output}")

#!pip install gradio

import gradio as gr

with gr.Blocks() as meena:
    chatbot = gr.Chatbot(label="Meena- A Multilingual Chatbot")
    msg = gr.Textbox(label="You")
    clear = gr.Button("Clear") 
    def set(chat_history_ids1):
        global chat_history_ids
        chat_history_ids=chat_history_ids1
    def get():
        return chat_history_ids
    def set2(step1):
        global step
        step=step1
    def get2():
        return step
    def generate_text(text,chat_history):
        step=-1
        if len(chat_history)==0:
            step=-1
        else:
            step=get2()
        step+=1
        set2(step)
        print(step)
        if step!=0:
            chat_history_ids=get()
        if text.isdigit():
            detected_language='en'
        else:
            detected_language=Detector(text,quiet=True).language.code
        translator=Translator(from_lang=detected_language,to_lang="en")
        translated_input=translator.translate(text)
        # encode the input and add end of string token
        input_ids=tokenizer.encode(translated_input+tokenizer.eos_token,return_tensors="pt")
        # concatenate new user input with chat history (if there is)
        bot_input_ids=torch.cat([chat_history_ids,input_ids],dim=-1) if step>0 else input_ids
        # generate a bot response
        chat_history_ids=model.generate(bot_input_ids,max_length=1000,pad_token_id=tokenizer.eos_token_id,do_sample=True,top_p=0.9,top_k=50,temperature=0.7,num_beams=5,no_repeat_ngram_size=2)
        print(chat_history_ids)
        set(chat_history_ids)
        #print the output
        output=tokenizer.decode(chat_history_ids[:,bot_input_ids.shape[-1]:][0],skip_special_tokens=True)  
        translator=Translator(from_lang="en",to_lang=detected_language)
        translated_output=translator.translate(output)
        chat_history.append((text,translated_output))
        if step==5:
            set(-1)
            set2(-1)

        return "",chat_history

    msg.submit(generate_text, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

meena.queue().launch()