Spaces:
Sleeping
Sleeping
File size: 6,135 Bytes
87b6d93 fdb2104 87b6d93 e71fac5 87b6d93 b5e2136 87b6d93 ff5a321 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
from transformers import pipeline
import gradio as gr
from nltk.tokenize import sent_tokenize
from newspaper import Article
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
# Define available models
models = {
"PFSA-ID-MED-IndoBERT-LEM": "damand2061/pfsa-id-med-indobert-lem",
"PFSA-ID-IndoBERT-LEM": "damand2061/pfsa-id-indobert-lem"
}
def load_model(model_name):
ner_pipeline = pipeline("ner", model=models[model_name])
ner_pipeline.model.config.id2label = {k: v.replace("L-", "I-").replace("U-", "B-") for k, v in ner_pipeline.model.config.id2label.items()}
return ner_pipeline
def ner_text(Text, model_name):
ner_pipeline = load_model(model_name)
all_entities = []
sentences = sent_tokenize(Text)
for sent in sentences:
output = ner_pipeline(sent, aggregation_strategy="max")
entities = [(ent['word'], ent['entity_group']) for ent in output]
all_entities.extend(entities)
return all_entities
def ner_link(URL, model_name):
ner_pipeline = load_model(model_name)
article = Article(URL, language='id')
article.download()
article.parse()
paragraph = article.text
all_entities = []
sentences = sent_tokenize(paragraph)
for sent in sentences:
output = ner_pipeline(sent, aggregation_strategy="max")
entities = [(ent['word'], ent['entity_group']) for ent in output]
all_entities.extend(entities)
return all_entities
def update_label_descriptions(model_name):
return gr.DataFrame(label_descriptions[model_name], headers=["Label", "Keterangan"])
example_link = [
"https://www.cnnindonesia.com/nasional/20240911102707-32-1143270/budi-arie-soal-jet-pribadi-erina-hamil-tak-boleh-naik-angkutan-umum",
"https://nasional.tempo.co/read/1914500/respons-gibran-soal-akun-kaskus-fufufafa-yang-disebut-menyerang-prabowo",
"https://news.detik.com/berita/d-7535151/komentar-jokowi-dan-gibran-yang-buka-suara-soal-kaesang-naik-jet-pribadi",
"https://www.liputan6.com/news/read/5699055/jokowi-buka-suara-soal-dugaan-gratifikasi-jet-pribadi-kaesang"
]
example_text = ["""Jakarta, CNN Indonesia -- Menteri Komunikasi dan Informatika (Menkominfo) Budi Arie Setiadi mengaku sudah mempelajari Akun Kaskus fufufafa yang dituding warganet sebagai akun milik Wapres terpilih Gibran Rakabuming Raka.
"Udah, udah, udah (didalami). Iya maksudnya udah kita pelajari," kata Budi di Kompleks Parlemen, Jakarta, Selasa (10/9).
Budi Arie menegaskan akun tersebut bukan milik Gibran berdasarkan pendalaman yang dilakukan.
"Bukan lah bukan (Gibran)," tegasnya.
Kendati demikian, Budi tak menjawab dengan tegas ketika ditanya siapa pemilik sebenarnya akun yang disebut sering menghujat Ketua Umum Gerindra Prabowo Subianto selama masa Pemilu 2014 lalu itu.
Sebelumnya, Gibran juga telah buka suara soal akun fufufafa tersebut. Ia mengaku tak tahu menahu soal akun itu. Ia juga meminta wartawan untuk bertanya ke pemilik akun tersebut.
"Lha mbuh, takono sing duwe akun, kok aku (Tidak tahu, tanyakan ke yang punya akun. Kok ke saya)," jawab Gibran singkat usai blusukan di Kelurahan Sondakan, Kecamatan Laweyan, Solo, Selasa (10/9).
Sejumlah netizen mengunggah tangkapan layar beberapa postingan akun fufufafa di Kaskus yang diduga punya Gibran.
Akun itu menyindir Prabowo lewat tulisan, "Kasihan capres yg anaknya fashion designer ****" Postingan itu diunggah 17 September 2017.
Adapula tulisan, "Istri cerai anak **** Trus mau lebaran sama siapa?" Tulisan itu diunggah 19 Juni 2018.
Warganet menduga akun itu milik Gibran. Hal itu disimpulkan dari salah satu postingan fufufafa yang menyebut akun Twitter miliknya adalah @rkgbrn.
Akun Twitter itu pernah di-mention oleh akun @kaesangp milik Kaesang Pangarep, adik Gibran, pada 24 Juni 2012 di Twitter. Namun, cuitan itu telah dihapus.
(mab/isn)
"""]
with gr.Blocks() as demo:
gr.Markdown("""
<div style='text-align: center;'>
<h1>Indonesian Quotation Extraction and Attribution</h1>
</div>
""")
with gr.Tabs():
with gr.TabItem("Input Teks"):
with gr.Row():
with gr.Column(scale=2):
text_input = gr.Textbox(placeholder="Masukkan kalimat...", label="Text")
model_select_text = gr.Dropdown(choices=list(models.keys()), value=list(models.keys())[0], label="Model")
text_button = gr.Button("Predict", variant='primary')
gr.ClearButton(text_input, value="Reset")
with gr.Column(scale=3):
text_output = gr.HighlightedText(label="Output")
text_button.click(fn=ner_text, inputs=[text_input, model_select_text], outputs=text_output)
gr.Examples(example_text, inputs=text_input)
with gr.TabItem("Input Tautan"):
with gr.Row():
with gr.Column(scale=2):
link_input = gr.Textbox(placeholder="Masukkan tautan...", label="URL")
model_select_link = gr.Dropdown(choices=list(models.keys()), value=list(models.keys())[0], label="Model")
link_button = gr.Button("Predict", variant='primary')
gr.ClearButton(link_input, value="Reset")
with gr.Column(scale=3):
link_output = gr.HighlightedText(label="Output")
link_button.click(fn=ner_link, inputs=[link_input, model_select_link], outputs=link_output)
gr.Examples(example_link, inputs=link_input)
gr.Markdown("## Penjelasan Label")
label_descriptions = [
["EVENT", "Acara"],
["LOCATION", "Lokasi"],
["DATETIME", "Waktu"],
["ISSUE", "Isu"],
["STATEMENT", "Pernyataan"],
["CUECOREF", "Isyarat Pronomina"],
["CUE", "Isyarat"],
["AFFILIATION", "Afiliasi"],
["ROLE", "Jabatan"],
["PERSONCOREF", "Pronomina Orang"],
["PERSON", "Orang"],
]
gr.DataFrame(label_descriptions, headers=["Label", "Keterangan"])
if __name__ == "__main__":
demo.launch(debug=True) |