File size: 3,572 Bytes
35e7279 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
---
license: llama3
---
- Foundation Model [Bllossom 8B](https://huggingface.co/MLP-KTLim/llama-3-Korean-Bllossom-8B)
- datasets
- [jojo0217/korean_safe_conversation](https://huggingface.co/datasets/jojo0217/korean_safe_conversation)
# Usage for Transformers
```python
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
BASE_MODEL = "sh2orc/Llama-3-Korean-8B"
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL,
torch_dtype=torch.bfloat16,
attn_implementation="flash_attention_2",
device_map="cuda:0")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'
instruction = 'ν λΆ κ²°μ λν΄μ μ€λͺ
ν΄μ€'
pipe = pipeline("text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=1024)
messages = [
{"role": "user", "content": instruction},
]
prompt = pipe.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
outputs = pipe(
prompt,
do_sample=True,
temperature=0.8,
top_k=10,
top_p=0.9,
add_special_tokens=True,
eos_token_id = [
pipe.tokenizer.eos_token_id,
pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]
)
print(outputs[0]['generated_text'][len(prompt):])
print(outputs[0]['generated_text'][len(prompt):])
```
# Result
<pre>
ν λΆ μκΈμ 물건μ μ΄ λ, κ·Έ κ°μ μΌμ κΈ°κ° λμ λλ μ μ§λΆνλ λ°©μμ
λλ€. μλ₯Ό λ€μ΄, 50λ§μμ§λ¦¬ μ νμ 10κ°μ ν λΆλ‘ ꡬ맀νλ€λ©΄, κ° λ¬λ§λ€ 5λ§μμ© 10κ°μμ΄ λμ μ§λΆνκ² λ©λλ€. μ΄λ, ν λΆ μκΈμ μΌμ κΈ°κ° λμ μ΄μ μμ΄ λ¬Όκ±΄μ μ¬μ©ν μ μλ μ΄μ μ΄ μμ§λ§, λμμ μ°μ²΄λ£κ° λΆκ³Όλ μ μμΌλ©°, μ±λ¬΄κ° λ°μνκ² λ©λλ€. λ°λΌμ, ν λΆλ₯Ό μ¬μ©ν λλ μμ μ μ¬μ μνμ ꡬ맀ν 물건μ μ κ³ λ €ν΄μΌ ν©λλ€.
</pre>
# Usage for VLLM
```python
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer, pipeline
BASE_MODEL = "sh2orc/Llama-3-Korean-8B"
llm = LLM(model=BASE_MODEL)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'
instruction = 'μΉ΄λ ν λΆ κ²°μ μ λν΄μ μλ €μ€'
messages = [
{
"role": "system",
"content": "λΉμ μ νλ₯ν AI λΉμμ
λλ€. You are a great AI assistant."
},
{
"role": "user",
"content": instruction
},
]
prompt_message = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
eos_token_id = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
outputs = llm.generate(prompt_message, SamplingParams(stop_token_ids=eos_token_id, temperature=0.6, top_p=0.8,max_tokens=4096))
for output in outputs:
propt = output.prompt
generated_text = output.outputs[0].text
print(generated_text)
```
# Result
<pre>
μΉ΄λ ν λΆ κ²°μ λ κ²°μ ν κΈμ‘μ μΌμ κΈ°κ° λμ λλ μ κ°λ λ°©μμΌλ‘, μΉ΄λμ¬μ μν΄ λμΆλ κΈμ‘μ κ°λ κ²μ
λλ€. μΉ΄λ ν λΆ κ²°μ λ μΌμ ν κΈ°κ° λμ μνν μ μλ κΈμ‘μ μ ννμ¬ κ²°μ ν μ μμΌλ©°, μ΄ κ³Όμ μμ μ΄μλ₯Ό μ§λΆν΄μΌ ν©λλ€. μΉ΄λ ν λΆ κ²°μ λ μΌμλΆ κ²°μ λ³΄λ€ μ 리ν μ μμ§λ§, μ΄μλ₯Ό μ§λΆν΄μΌ νκΈ° λλ¬Έμ λΉμ©μ΄ μ¦κ°ν©λλ€.
</pre>
|