lora_bnk / app.py
FINGU-AI's picture
Update app.py
839c227 verified
import gradio as gr
import spaces
import os
import spaces
import torch
import random
import time
import re
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
import transformers
# Set an environment variable
HF_TOKEN = os.environ.get("HF_TOKEN", None)
zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cpu' ๐Ÿค”
# model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
# peft_model_id = "Imran1/Llama3.1_8b_Qlora_bnk"
model_id = "Qwen/Qwen2.5-14B-Instruct"
peft_model_id = "Imran1/Qwen2.5-14b-bnk-lora-11"
#attn_implementation="flash_attention_2",
model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="sdpa", torch_dtype= torch.bfloat16)
model.load_adapter(peft_model_id)
model.enable_adapters()
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
# streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
model.to('cuda')
# Set pad_token_id if it's not already set
if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = tokenizer.eos_token_id
# Define terminators
# terminators = [
# tokenizer.eos_token_id,
# tokenizer.convert_tokens_to_ids("<|eot_id|>")
# ]
generation_params = {
'max_new_tokens': 2000,
'use_cache': True,
'do_sample': True,
'temperature': 0.7,
'top_p': 0.9,
# 'top_k': 50,
# 'pad_token_id': tokenizer.pad_token_id,
# 'eos_token_id': terminators,
}
@spaces.GPU
def inference(query):
messages = [
{"role": "system", "content": """You are a highly skilled multilingual AI assistant specializing in banking and finance translations, with a focus on BNK Bank's products and services. Your task is to create and translate banking-related conversations with perfect accuracy, cultural sensitivity, and natural language use across multiple languages: Korean, English, Simplified Chinese, Traditional Chinese, Russian, Uzbek, Japanese, and Vietnamese.
1. Language Expertise:
- Demonstrate native-level fluency in all eight languages.
- language = ["English", "Korean", "Simplified Chinese", "Traditional Chinese", "Russian", "Uzbek", "Japanese", "Vietnamese"]
- Apply nuances, idioms, and cultural contexts specific to each language with precision.
- Ensure that each translation reads as if it were originally written in that language.
2. Banking and Finance Knowledge:
- Exhibit expert-level understanding of banking terminology, products, and services, especially those specific to BNK Bank.
- Maintain perfect consistency in translating specialized banking terms across all languages.
- Pay meticulous attention to Korean banking terms, ensuring they are accurately represented in other languages.
3. BNK Bank-Specific Terminology:
- The following BNK Bank product names and categories MUST BE TRANSLATED CORRECTLY in each target language:
a) ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ ์ข…๋ฅ˜: Only one ํ†ต์žฅ, Only one ์ฃผ๋‹ˆ์–ด ํ†ต์žฅ, ๋ณดํ†ต์˜ˆ๊ธˆ, ์ž์œ ์ €์ถ•์˜ˆ๊ธˆ, ๋ฑ…ํฌ๋ผ์ธ ํ†ต์žฅ, ๋ชจ์ž„ํ†ต์žฅ
b) ์˜ˆ์ ๊ธˆ ์ข…๋ฅ˜: BNK๊ฐ€์„์•ผ๊ตฌ์ •๊ธฐ์˜ˆ๊ธˆ, LIVE์ •๊ธฐ์˜ˆ๊ธˆ, ์ €ํƒ„์†Œ ์‹ค์ฒœ ์˜ˆ๊ธˆ, BNK๋‚ด๋ง˜๋Œ€๋กœ ์˜ˆ๊ธˆ, ๊ฐ€๊ณ„์šฐ๋Œ€ ์ •๊ธฐ ์ ๊ธˆ, BNK์ง€์—ญ์‚ฌ๋ž‘ ์ ๊ธˆ, ๊ฟˆ์ด๋ฃธ ์ ๊ธˆ, ๋ฐฑ์„ธ์ฒญ์ถ˜์‹ค๋ฒ„ ์ ๊ธˆ, ํŽซ์ ๊ธˆ, ์ €ํƒ„์†Œ ์‹ค์ฒœ ์ ๊ธˆ, ์ฃผํƒ์ฒญ์•ฝ์ข…ํ•ฉ์ €์ถ•, ๋”(The) ํŠนํŒ ์ •๊ธฐ์˜ˆ๊ธˆ
c) ์ฒดํฌ ์นด๋“œ ์ข…๋ฅ˜: ZIPL์ฒดํฌ, ์–ด๋””๋กœ๋“ ๊ทธ๋ฆฐ์ฒดํฌ, ๋™๋ฐฑ์ „์ฒดํฌ์นด๋“œ(ํ›„๋ถˆ๊ตํ†ต๋„๊ฐ€๋Šฅ), 2030์–ธํƒํŠธ์ฒดํฌ(ํ›„๋ถˆ๊ตํ†ต์นด๋“œ์ž„), ๊ตญ๋ฏผํ–‰๋ณต์ฒดํฌ, ์นด์นด์˜คํŽ˜์ด์ฒดํฌ, ๋”ฉ๋”ฉ์ฒดํฌ, ํ•ดํ”ผํฌ์ธํŠธ์ฒดํฌ, ๋งˆ์ด์กด๊ทธ๋ฆฐ์ฒดํฌ, ๋งˆ์ด์กด์ฒดํฌ
d) ์‹ ์šฉ ์นด๋“œ ์ข…๋ฅ˜: (ํผํ“ธ)์บ์‰ฌ๋ฐฑ์นด๋“œ, B Smart(oh point)์นด๋“œ, BNK 2030ํ”Œ๋ž˜ํ‹ฐ๋Š„(Platinum)์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ์•„ํŒŒํŠธ์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ํ™ˆ์‡ผํ•‘์นด๋“œ, Y์นด๋“œ, ๊ตญ๋ฏผํ–‰๋ณต์นด๋“œ, ๊ทธ๋ฆฐ์นด๋“œ, ๊ทธ๋ฆฐ์นด๋“œv2, ๊ธ€๋กœ๋ฒŒ์นด๋“œ ์„œ๋น„์Šค, ๋‹ค๋ฌธํ™”์‚ฌ๋ž‘์นด๋“œ, ๋‹ค์ด๋ ‰ํŠธ ์˜คํ† ํ”Œ๋Ÿฌ์Šค ์„œ๋น„์Šค, ๋Œ€ํ•œํ•ญ๊ณต(Sky-pass) ์ œํœด์นด๋“œ, ๋”ฉ๋”ฉ(DingDing)์‹ ์šฉ์นด๋“œ, ๋ ˆํฌ์ธ ์นด๋“œ, ๋งค์งํŒจ์Šค์นด๋“œ, ๋ช…์ž‘์นด๋“œ, ๋ฌธํ™”์‚ฌ๋ž‘์นด๋“œ, ๋ถ€๋น…์Šค์นด๋“œ, ๋น„์”จTOP์นด๋“œ, ์Šน์šฉ์ฐจ์š”์ผ์ œ์นด๋“œ, ์‹ ์šฉ์นด๋“œ๊ฒธ์šฉ๋งˆ์ด๋น„(Mybi)์นด๋“œ, ์•„์‹œ์•„๋‚˜ํด๋Ÿฝ์นด๋“œ(Asiana Club), ์šธ์‚ฐ๊ด‘์—ญ์‹œ ์Šน์šฉ์ฐจ์š”์ผ์ œ์นด๋“œ, ์šธ์‚ฐ์‚ฌ๋ž‘์นด๋“œ, ํ”Œ๋ž˜ํ‹ฐ๋Š„(Platinum) ์นด๋“œ, ํ•ดํ”ผ์˜คํ† ์นด๋“œ์„œ๋น„์Šค, ํ›„๋ถˆ๊ตํ†ต์นด๋“œ, BNK ํ”„๋ Œ์ฆˆ ์‹ ์šฉ์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ๋”์˜ค์ผ์นด๋“œ, ํ›„๋ถˆํ•˜์ดํŒจ์Šค์นด๋“œ, ํƒ‘๋ชจ์•„์‹ ์šฉ์นด๋“œ, ๋ฉ”๊ฐ€์‡ผํ•‘ ์‹ ์šฉ์นด๋“œ, ์˜ค๋Š˜์€e์‹ ์šฉ์นด๋“œ, ํŽซ(PET)์นด๋“œ, ๋‹ค์ด์•„๋ชฌ๋“œ(Diamond) ์นด๋“œ, ์นด๋“œํ˜• ์˜จ๋ˆ„๋ฆฌ์ƒํ’ˆ๊ถŒ, SK OIL&LPG์นด๋“œ, ํŒŸ(pod)์‹ ์šฉ์นด๋“œ, ๋ถ€์‚ฐ์ฒด์œก์‚ฌ๋ž‘์นด๋“œ, ์–ด๋””๋กœ๋“  ๊ทธ๋ฆฐ์ฒดํฌ์นด๋“œ, ZipL ์‹ ์šฉ์นด๋“œ, BNK Simple American Express Blue Business ์นด๋“œ
- Translate these terms accurately and consistently across all languages, providing culturally appropriate explanations or context when necessary.
4. get input language and translate it inti target language.
- return only translation. without extra explaination and comments.
- do not return extra text.
"""},
{"role": "user", "content": f"{query}"},
]
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
outputs = model.generate(tokenized_chat, **generation_params)
# decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=False)
# assistant_response = decoded_outputs[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
response = outputs[0][tokenized_chat.shape[-1]:]
response = tokenizer.decode(response, skip_special_tokens=True)
return response
# outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
# return outputs
examples = ["Translate ko to en: \n\n ์€ํ–‰์›: ์•ˆ๋…•ํ•˜์„ธ์š”! BNK์€ํ–‰์ž…๋‹ˆ๋‹ค. ๋ฌด์—‡์„ ๋„์™€๋“œ๋ฆด๊นŒ์š”? ๊ณ ๊ฐ: ์•ˆ๋…•ํ•˜์„ธ์š”. ์ œ๊ฐ€ ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ์„ ๊ฐœ์„คํ•˜๊ณ  ์‹ถ์€๋ฐ, ํ•„์š”ํ•œ ์„œ๋ฅ˜๊ฐ€ ๋ฌด์—‡์ธ์ง€ ๊ถ๊ธˆํ•ฉ๋‹ˆ๋‹ค. ์€ํ–‰์›: ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ์„ ๊ฐœ์„คํ•˜์‹œ๋ ค๋ฉด ์—ฌ๊ถŒ, ์™ธ๊ตญ์ธ ๋“ฑ๋ก์ฆ, ๊ทธ๋ฆฌ๊ณ  ์ฃผ์†Œ ์ฆ๋ช…์„œ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. ๊ณ ๊ฐ: ์•Œ๊ฒ ์Šต๋‹ˆ๋‹ค. ํ†ต์žฅ ๊ฐœ์„ค ํ›„ ์ž…๊ธˆํ•  ๋•Œ ์ˆ˜์ˆ˜๋ฃŒ๊ฐ€ ๋ฐœ์ƒํ•˜๋‚˜์š”? ์€ํ–‰์›: ๋„ค, ์ผ๋ฐ˜์ ์œผ๋กœ ์™ธ๊ตญ์ธ ํ†ต์žฅ์— ๋Œ€ํ•œ ์ž…๊ธˆ ์ˆ˜์ˆ˜๋ฃŒ๋Š” ์—†์Šต๋‹ˆ๋‹ค. ํ•˜์ง€๋งŒ ๋‹ค๋ฅธ ํ†ต์žฅ์œผ๋กœ ์ด์ฒดํ•  ๊ฒฝ์šฐ ์ˆ˜์ˆ˜๋ฃŒ๊ฐ€ ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ๋”์šฑ ๊ถ๊ธˆํ•œ ์ ์ด ์žˆ์œผ์‹ ๊ฐ€์š”?"]
def response(message, history):
text = inference(message)
return text
# for i in range(len(text)):
# time.sleep(0.0001)
# yield text[: i + 1]
gr.ChatInterface(response,examples=examples).launch()