Spaces:

VGG11
/

Armenian_Chatbot_another_version

Paused

App Files Files Community

Armenian_Chatbot_another_version / app.py

Zaven

Update app.py

41f1c13 over 1 year ago

raw

history blame contribute delete

4.3 kB

	# -- coding: utf-8 --
	"""Flan-t5-xl_with_GPU.ipynb
	Automatically generated by Colaboratory.
	Original file is located at
	https://colab.research.google.com/drive/15P4GWaUNFBqJf5_I58DxSiusjPiHUeU6
	"""

	#import gradio as gr

	#def greet(name)
	# return "Hello" + name + "!"

	#iface = gr.Interface(fn=greet, inputs="text", outputs="text")
	#iface.launch()

	#theme = gr.themes.Soft().set(
	# body_background_fill='*background_fill_secondary',
	# body_text_color_subdued='*body_text_color',
	# body_text_color_subdued_dark='*chatbot_code_background_color'
	#)

	#app = gr.Interface(
	# fn=qa_result,
	# btn=gr.UploadButton("📁", file_types=[".pdf", ".csv", ".doc"], ),
	# inputs=['textbox', 'text', 'file'],
	# outputs='textbox',
	# title='Բարև՛, ինչպե՞ս ես։',
	# theme=theme,
	# description='Ի՞նչ հարցեր ունես։'
	#)

	import os
	os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"

	from IPython.display import HTML, display

	def set_css():
	display(HTML('''
	<style>
	pre {
	white-space: pre-wrap;
	}
	</style>
	'''))
	#get_ipython().events.register('pre_run_cell', set_css)

	import multiprocessing
	import torch

	torch.cuda.empty_cache()

	from deep_translator import GoogleTranslator

	# Use any translator you like, in this example GoogleTranslator
	#translated = GoogleTranslator(source='hy', target='en').translate("Բարև, ո՞նց ես։") # output -> Hello, how are you?

	#device = "cuda:0" if torch.cuda.is_available() else "cpu"
	#device

	import streamlit as st

	# Set the query parameter to request GPU support
	#st.experimental_set_query_parameter('gpu', 'true')

	#x = st.slider('Select a value')
	#st.write(x, 'squared is', x * x)

	import torch
	from transformers import T5Tokenizer, T5ForConditionalGeneration

	tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
	model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", device_map = "auto")
	# Move the model to the GPU
	model = model.to('cuda')

	# We are running FP32!

	#my_text = "Summarize: \
	#Science can ignite new discoveries for society, \
	#Society has the tendency to refer to old, familiar ways of doing. \
	#Chaos is also a part of our society, although increasingly often so.\
	#Innovative ways lead to new growthin businesses and under certain conditions all of society participates."

	#my_text = "Write an essay with 100 words about Quantum Physics and it's problems regarding our understanding of laws of Physics."

	#my_text = "Q: Can Geoffrey Hinton have a conversation with George Washington? Give the rationale before answering."
	# my_text = "A short explanation of machine learning for medical applications."

	def process():
	##translated = st.text_input("Գրեք ձեր հարցը: ")
	##my_text = GoogleTranslator(source='hy', target='en').translate(translated)

	##input_ids = tokenizer(my_text, return_tensors = "pt").input_ids.to("cuda")

	#From Here

	# User input
	user_input = st.text_input("Գրեք ձեր հարցը...", "")
	my_text = GoogleTranslator(source='hy', target='en').translate(user_input)

	if my_text:
	# Tokenize input and move to the GPU
	input_ids = tokenizer.encode(my_text, return_tensors="pt").to('cuda')

	# Generate text
	with torch.no_grad():
	outputs = model.generate(input_ids)
	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	#To Here

	#outputs = model.generate(input_ids,
	#min_length = 20,
	#max_new_tokens = 600,
	#length_penalty = 1.0, # Set to values < 1.0 in order to encourage the model to generate shorter answers.
	#num_beams = 10,
	#no_repeat_ngram_size = 3,
	#temperature = 0,
	#top_k = 150, # default 50
	#top_p = 0.92,
	#repetition_penalty = 2.1)
	#generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	st.write(GoogleTranslator(source='en', target='hy').translate(generated_text))
	process()