pythainlp
/

thainer-corpus-v2-base-model

Token Classification

Inference Endpoints

Model card Files Files and versions Community

wannaphong commited on Mar 23, 2023

Commit

007695a

•

1 Parent(s): b3c710b

Update README.md

Files changed (1) hide show

README.md +2 -0

README.md CHANGED Viewed

@@ -39,6 +39,7 @@ Huggingface doesn't support inference token classification for Thai and It will
 ```python
 from transformers import AutoTokenizer
 from transformers import AutoModelForTokenClassification
 import torch
 name="pythainlp/thainer-corpus-v2-base-model"
@@ -46,6 +47,7 @@ tokenizer = AutoTokenizer.from_pretrained(name)
 model = AutoModelForTokenClassification.from_pretrained(name)
 sentence="ฉันชื่อ นางสาวมะลิวา บุญสระดี อาศัยอยู่ที่อำเภอนางรอง จังหวัดบุรีรัมย์ อายุ 23 ปี เพิ่งเรียนจบจาก มหาวิทยาลัยขอนแก่น และนี่คือข้อมูลปลอมชื่อคนไม่มีอยู่จริง อายุ 23 ปี"
 inputs=tokenizer(cut,is_split_into_words=True,return_tensors="pt")
 ids = inputs["input_ids"]

 ```python
 from transformers import AutoTokenizer
 from transformers import AutoModelForTokenClassification
+from pythainlp.tokenize import word_tokenize # pip install pythainlp
 import torch
 name="pythainlp/thainer-corpus-v2-base-model"
 model = AutoModelForTokenClassification.from_pretrained(name)
 sentence="ฉันชื่อ นางสาวมะลิวา บุญสระดี อาศัยอยู่ที่อำเภอนางรอง จังหวัดบุรีรัมย์ อายุ 23 ปี เพิ่งเรียนจบจาก มหาวิทยาลัยขอนแก่น และนี่คือข้อมูลปลอมชื่อคนไม่มีอยู่จริง อายุ 23 ปี"
+cut=word_tokenize(sentence.replace(" ", "<_>"))
 inputs=tokenizer(cut,is_split_into_words=True,return_tensors="pt")
 ids = inputs["input_ids"]