|
--- |
|
license: llama3 |
|
--- |
|
|
|
- Foundation Model [Bllossom 8B](https://huggingface.co/MLP-KTLim/llama-3-Korean-Bllossom-8B) |
|
- datasets |
|
- [jojo0217/korean_safe_conversation](https://huggingface.co/datasets/jojo0217/korean_safe_conversation) |
|
|
|
# Usage for Transformers |
|
```python |
|
|
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
|
BASE_MODEL = "sh2orc/Llama-3-Korean-8B" |
|
|
|
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, |
|
torch_dtype=torch.bfloat16, |
|
attn_implementation="flash_attention_2", |
|
device_map="cuda:0") |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
tokenizer.padding_side = 'right' |
|
|
|
instruction = 'ν λΆ κ²°μ λν΄μ μ€λͺ
ν΄μ€' |
|
|
|
pipe = pipeline("text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_new_tokens=1024) |
|
|
|
messages = [ |
|
{"role": "user", "content": instruction}, |
|
] |
|
|
|
prompt = pipe.tokenizer.apply_chat_template( |
|
messages, |
|
tokenize=False, |
|
add_generation_prompt=True |
|
) |
|
|
|
outputs = pipe( |
|
prompt, |
|
do_sample=True, |
|
temperature=0.8, |
|
top_k=10, |
|
top_p=0.9, |
|
add_special_tokens=True, |
|
eos_token_id = [ |
|
pipe.tokenizer.eos_token_id, |
|
pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>") |
|
] |
|
) |
|
|
|
print(outputs[0]['generated_text'][len(prompt):]) |
|
print(outputs[0]['generated_text'][len(prompt):]) |
|
``` |
|
|
|
# Result |
|
<pre> |
|
|
|
ν λΆ μκΈμ 물건μ μ΄ λ, κ·Έ κ°μ μΌμ κΈ°κ° λμ λλ μ μ§λΆνλ λ°©μμ
λλ€. μλ₯Ό λ€μ΄, 50λ§μμ§λ¦¬ μ νμ 10κ°μ ν λΆλ‘ ꡬ맀νλ€λ©΄, κ° λ¬λ§λ€ 5λ§μμ© 10κ°μμ΄ λμ μ§λΆνκ² λ©λλ€. μ΄λ, ν λΆ μκΈμ μΌμ κΈ°κ° λμ μ΄μ μμ΄ λ¬Όκ±΄μ μ¬μ©ν μ μλ μ΄μ μ΄ μμ§λ§, λμμ μ°μ²΄λ£κ° λΆκ³Όλ μ μμΌλ©°, μ±λ¬΄κ° λ°μνκ² λ©λλ€. λ°λΌμ, ν λΆλ₯Ό μ¬μ©ν λλ μμ μ μ¬μ μνμ ꡬ맀ν 물건μ μ κ³ λ €ν΄μΌ ν©λλ€. |
|
|
|
</pre> |
|
|
|
|
|
|
|
|
|
# Usage for VLLM |
|
```python |
|
from vllm import LLM, SamplingParams |
|
from transformers import AutoTokenizer, pipeline |
|
|
|
BASE_MODEL = "sh2orc/Llama-3-Korean-8B" |
|
|
|
llm = LLM(model=BASE_MODEL) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
tokenizer.padding_side = 'right' |
|
|
|
instruction = 'μΉ΄λ ν λΆ κ²°μ μ λν΄μ μλ €μ€' |
|
|
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": "λΉμ μ νλ₯ν AI λΉμμ
λλ€. You are a great AI assistant." |
|
}, |
|
{ |
|
"role": "user", |
|
"content": instruction |
|
}, |
|
] |
|
|
|
|
|
prompt_message = tokenizer.apply_chat_template( |
|
messages, |
|
tokenize=False, |
|
add_generation_prompt=True, |
|
) |
|
|
|
eos_token_id = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")] |
|
|
|
outputs = llm.generate(prompt_message, SamplingParams(stop_token_ids=eos_token_id, temperature=0.6, top_p=0.8,max_tokens=4096)) |
|
|
|
for output in outputs: |
|
propt = output.prompt |
|
generated_text = output.outputs[0].text |
|
print(generated_text) |
|
|
|
``` |
|
|
|
|
|
# Result |
|
<pre> |
|
|
|
μΉ΄λ ν λΆ κ²°μ λ κ²°μ ν κΈμ‘μ μΌμ κΈ°κ° λμ λλ μ κ°λ λ°©μμΌλ‘, μΉ΄λμ¬μ μν΄ λμΆλ κΈμ‘μ κ°λ κ²μ
λλ€. μΉ΄λ ν λΆ κ²°μ λ μΌμ ν κΈ°κ° λμ μνν μ μλ κΈμ‘μ μ ννμ¬ κ²°μ ν μ μμΌλ©°, μ΄ κ³Όμ μμ μ΄μλ₯Ό μ§λΆν΄μΌ ν©λλ€. μΉ΄λ ν λΆ κ²°μ λ μΌμλΆ κ²°μ λ³΄λ€ μ 리ν μ μμ§λ§, μ΄μλ₯Ό μ§λΆν΄μΌ νκΈ° λλ¬Έμ λΉμ©μ΄ μ¦κ°ν©λλ€. |
|
|
|
</pre> |
|
|
|
|