Spaces:
Sleeping
Sleeping
import spaces | |
import streamlit as st | |
from transformers import pipeline | |
from PIL import Image | |
import os | |
from peft import AutoPeftModelForCausalLM | |
from transformers import AutoTokenizer | |
import torch | |
import torch | |
print(f"Is CUDA available: {torch.cuda.is_available()}") | |
# True | |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") | |
# Tesla T4 | |
# @spaces.GPU | |
def main(): | |
def get_tokens_as_list(word_list): | |
"Converts a sequence of words into a list of tokens" | |
tokens_list = [] | |
for word in word_list: | |
tokenized_word = tokenizer_with_prefix_space([word], add_special_tokens=False).input_ids[0] | |
tokens_list.append(tokenized_word) | |
return tokens_list | |
def translate(text, tokenizer, model, do_sample, max_new_tokens, temperature, top_k, top_p, bad_words_ids): | |
# Prepare the prompt | |
prompts = f"Translate from Korean to English: {text}" | |
messages = [{"role": "user", "content": prompts}] | |
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt") | |
input_ids = input_ids.to("cuda") | |
prompt_padded_len = len(input_ids[0]) | |
# Generate the translation | |
gen_tokens = model.generate(input_ids, do_sample = do_sample, max_length=max_new_tokens, temperature=temperature, top_k=top_k, top_p=top_p, bad_words_ids = bad_words_ids) | |
gen_tokens = [ | |
gt[prompt_padded_len:] for gt in gen_tokens | |
] | |
translation = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True) | |
return translation | |
st.title("LLM Translate for ko->eng") | |
# adding the text that will show in the text box as default | |
text_default = """ | |
๊ทธ๋ฅผ ์ค์ฌ์ผ๋ก ํ๋ชฐ์์น๋ ๋ง๋ํ๋ง๋. ํ๊ณต์์ ํผ์ด์ค๋ฅธ ๋ค์ฏ ๊ฐ์๋ถ๊ฝ์ด ํฌ๊ธฐ๋ฅผ ๋ถํ๋ฆฌ๊ณ , ์ด๋ด ํฌํ์ฒ๋ผ ์์์ก๋ค. | |
ํ์ฐ์ฐ์ฐ์ , ๊น์! | |
์๋ง์ ๋ชฌ์คํฐ๋ก ์ด๋ฃจ์ด์ง ๊ฒ์ ํ๋๊ฐ ๊ฐ๋ผ์ก๋ค. ์ด๊ณ ์จ์ ์ด๊ธฐ๊ฐ ์ด๊ณผ ๋ผ๋ฅผ ํ์ฐ๊ณ ์ง๋ฉด์ ๋ น์๋ค.""" | |
hf_token = os.getenv("HF_ACCESS_TOKEN") | |
# attn_implementation = None | |
# USE_FLASH_ATTENTION = False | |
# if USE_FLASH_ATTENTION: | |
# attn_implementation="flash_attention_2" | |
model_id = "r1208/c4ai-command-r-v01-4bit_32r" | |
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, use_auth_token=hf_token) | |
model = model.to("cuda") | |
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token) | |
tokenizer_with_prefix_space = AutoTokenizer.from_pretrained(model_id, add_prefix_space=True, use_auth_token=hf_token) | |
bad_words_ids = get_tokens_as_list( word_list=["\n", "\n\n", "\ ", " \ ", "\\", "'\n'"] ) | |
max_new_tokens = st.sidebar.slider("Max Length", value=400, min_value=10, max_value=1000) | |
temperature = st.sidebar.slider("Temperature", value=0.3, min_value=0.0, max_value=1.0, step=0.05) | |
top_k = st.sidebar.slider("Top-k", min_value=0, max_value=50, value=0) | |
top_p = st.sidebar.slider("Top-p", min_value=0.75, max_value=1.0, step=0.05, value=0.9) | |
do_sample = st.selectbox("do_sample: ", | |
['True', 'False']) | |
st.subheader("Enter text to translate") | |
input_text = st.text_area("Text to Translate", value= "Enter Korean text here", height=300) | |
if st.button("Translate"): | |
if input_text: | |
translation = translate(input_text, tokenizer, model, do_sample = do_sample, max_new_tokens = max_new_tokens, temperature = temperature, top_k = top_k, top_p = top_p, bad_words_ids = bad_words_ids) | |
translation = translation[0] | |
st.text_area("Translated Text", value=translation, height=300) | |
else: | |
st.error("Please enter some text to translate.") | |
# st.subheader("Enter text to translate") | |
# input_text = st.text_area("", height=300) | |
# if st.button("Translate"): | |
# if input_text: | |
# translation = translate(input_text) | |
# st.text_area("Translated Text", value=translation, height=300) | |
# else: | |
# st.error("Please enter some text to translate.") | |
if __name__ == "__main__": | |
main() | |