|
--- |
|
language: |
|
- ko |
|
tags: |
|
- gpt2 |
|
license: cc-by-nc-sa-4.0 |
|
--- |
|
|
|
## Model based on |
|
[Ko-GPT-Trinity 1.2B (v0.5)](https://huggingface.co/skt/ko-gpt-trinity-1.2B-v0.5) |
|
|
|
## Example |
|
```python |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
"CheonggyeMountain-Sherpa/kogpt-trinity-punct-wrapper", |
|
revision="punct_wrapper-related_words-overfit", # or punct_wrapper-related_words-minevalloss |
|
bos_token="<s>", |
|
eos_token="</s>", |
|
unk_token="<unk>", |
|
pad_token="<pad>", |
|
mask_token="<mask>", |
|
) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
"CheonggyeMountain-Sherpa/kogpt-trinity-punct-wrapper", |
|
revision="punct_wrapper-related_words-overfit", # or punct_wrapper-related_words-minevalloss |
|
pad_token_id=tokenizer.eos_token_id, |
|
).to(device="cuda") |
|
model.eval() |
|
|
|
prompt = "์์์ด ๋ณด์ด๋ ๊ฒฝ์น" |
|
wrapped_prompt = f"@{prompt}@<usr>\n" |
|
with torch.no_grad(): |
|
tokens = tokenizer.encode(wrapped_prompt, return_tensors="pt").to(device="cuda") |
|
gen_tokens = model.generate( |
|
tokens, |
|
max_length=64, |
|
repetition_penalty=2.0, |
|
pad_token_id=tokenizer.pad_token_id, |
|
eos_token_id=tokenizer.eos_token_id, |
|
bos_token_id=tokenizer.bos_token_id, |
|
top_k=16, |
|
top_p=0.8, |
|
) |
|
generated = tokenizer.decode(gen_tokens[0][len(tokens[0]):]) |
|
|
|
print(generated) |
|
# ํด๊ฐ ์ง๊ณ ์์ ๋ฌด๋ ต |
|
# ๋๋ ์์์ ๋ณด๋ฌ ๊ฐ๋ค |
|
# ๋ถ์ ํ๋๊ณผ ํ์ ๊ตฌ๋ฆ์ด ๋๋ฅผ ๋ฐ๊ฒจ์ค ๊ฒ ๊ฐ์์๋ฆฌ |
|
# ํ์ง๋ง ๋ด๊ฐ ๋ณธ ํด๋ ์ ๋ฌผ์ด๋ง ๊ฐ๊ณ |
|
# ๊ตฌ๋ฆ๋ง์ ์์ทจ๋ฅผ ๊ฐ์ถ ์ด๋ ๋ง์ด ๋จ์์์ ๋ฟ์ด๋ค |
|
# ๋ด๊ฐ ํ ๋ฐฐ๋ ๋ณด์ด์ง๋ ์๊ณ |
|
``` |