import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
base_model = 'bigdefence/llama-3-blossom-kakao-8B'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForCausalLM.from_pretrained(base_model, torch_dtype=torch.float16, device_map="auto")
model.eval() # ๋ชจ๋ธ์ ํ๊ฐ ๋ชจ๋๋ก ์ค์
def generate_response(prompt, model, tokenizer, max_new_tokens=256):
inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
inputs = inputs.to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.replace(prompt, '').strip()
key = "์นด์นด์คvx์ ๋ํด ์ค๋ช
ํด์ค"
prompt = f"""๋น์ ์ ํ๊ตญ์ด๋ก ๋๋ตํ๋ ์ด์์คํดํธ์
๋๋ค.
### ์ง๋ฌธ:
{key}
### ๋ต๋ณ:"""
response = generate_response(prompt, model, tokenizer)
print(response)
- Downloads last month
- 23
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.