Spaces:
Build error
Build error
add the original indonesians gpt-2 models. update info about the app
Browse files- app/app.py +50 -10
- app/prompts.py +21 -0
app/app.py
CHANGED
@@ -13,28 +13,40 @@ import pathlib
|
|
13 |
# st.set_page_config(page_title="Indonesian GPT-2")
|
14 |
|
15 |
MODELS = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
"Indonesian Literature - GPT-2 Small": {
|
17 |
"group": "Indonesian Literature",
|
18 |
"name": "cahya/gpt2-small-indonesian-story",
|
19 |
-
"description": "Indonesian Literature Generator using fine-tuned small GPT-2 model",
|
20 |
"text_generator": None
|
21 |
},
|
22 |
"Indonesian Literature - GPT-2 Medium": {
|
23 |
"group": "Indonesian Literature",
|
24 |
"name": "cahya/gpt2-medium-indonesian-story",
|
25 |
-
"description": "Indonesian Literature Generator using fine-tuned medium GPT-2 model",
|
26 |
"text_generator": None
|
27 |
},
|
28 |
"Indonesian Academic Journal - GPT-2 Small": {
|
29 |
"group": "Indonesian Journal",
|
30 |
"name": "Galuh/id-journal-gpt2",
|
31 |
-
"description": "Indonesian Journal Generator using fine-tuned small GPT-2 model",
|
32 |
"text_generator": None
|
33 |
},
|
34 |
"Indonesian Persona Chatbot - GPT-2 Small": {
|
35 |
"group": "Indonesian Persona Chatbot",
|
36 |
"name": "cahya/gpt2-small-indonesian-personachat",
|
37 |
-
"description": "Indonesian Persona Chatbot using fine-tuned small GPT-2 model",
|
38 |
"text_generator": None
|
39 |
},
|
40 |
}
|
@@ -52,6 +64,31 @@ def stc_chatbot(root_dir, width=700, height=900):
|
|
52 |
html = html.replace('<script src="js/main.js"></script>', "<script>\n" + js + "\n</script>")
|
53 |
stc.html(html, width=width, height=height, scrolling=True)
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
model = st.sidebar.selectbox('Model', (MODELS.keys()))
|
57 |
|
@@ -77,12 +114,12 @@ def process(text_generator, text: str, max_length: int = 100, do_sample: bool =
|
|
77 |
|
78 |
st.title("Indonesian GPT-2 Applications")
|
79 |
prompt_group_name = MODELS[model]["group"]
|
80 |
-
st.
|
81 |
description = f"This application is a demo for {MODELS[model]['description']}"
|
82 |
st.markdown(description)
|
83 |
model_name = f"Model name: [{MODELS[model]['name']}](https://huggingface.co/{MODELS[model]['name']})"
|
84 |
st.markdown(model_name)
|
85 |
-
if prompt_group_name in ["Indonesian Literature", "Indonesian Journal"]:
|
86 |
session_state = SessionState.get(prompt=None, prompt_box=None, text=None)
|
87 |
ALL_PROMPTS = list(PROMPT_LIST[prompt_group_name].keys())+["Custom"]
|
88 |
|
@@ -128,17 +165,20 @@ if prompt_group_name in ["Indonesian Literature", "Indonesian Journal"]:
|
|
128 |
value=True
|
129 |
)
|
130 |
|
131 |
-
top_k =
|
132 |
top_p = 0.95
|
133 |
|
134 |
if do_sample:
|
135 |
top_k = st.sidebar.number_input(
|
136 |
"Top k",
|
137 |
-
value=top_k
|
|
|
138 |
)
|
139 |
top_p = st.sidebar.number_input(
|
140 |
"Top p",
|
141 |
-
value=top_p
|
|
|
|
|
142 |
)
|
143 |
|
144 |
seed = st.sidebar.number_input(
|
@@ -148,7 +188,7 @@ if prompt_group_name in ["Indonesian Literature", "Indonesian Journal"]:
|
|
148 |
)
|
149 |
|
150 |
for group_name in MODELS:
|
151 |
-
if MODELS[group_name]["group"] in ["Indonesian Literature", "Indonesian Journal"]:
|
152 |
MODELS[group_name]["text_generator"] = get_generator(MODELS[group_name]["name"])
|
153 |
# text_generator = get_generator()
|
154 |
if st.button("Run"):
|
|
|
13 |
# st.set_page_config(page_title="Indonesian GPT-2")
|
14 |
|
15 |
MODELS = {
|
16 |
+
"Indonesian GPT-2 Small": {
|
17 |
+
"group": "Indonesian GPT-2",
|
18 |
+
"name": "flax-community/gpt2-small-indonesian",
|
19 |
+
"description": "The original Indonesian small GPT-2 model.",
|
20 |
+
"text_generator": None
|
21 |
+
},
|
22 |
+
"Indonesian GPT-2 Medium": {
|
23 |
+
"group": "Indonesian GPT-2",
|
24 |
+
"name": "flax-community/gpt2-medium-indonesian",
|
25 |
+
"description": "The original Indonesian medium GPT-2 model.",
|
26 |
+
"text_generator": None
|
27 |
+
},
|
28 |
"Indonesian Literature - GPT-2 Small": {
|
29 |
"group": "Indonesian Literature",
|
30 |
"name": "cahya/gpt2-small-indonesian-story",
|
31 |
+
"description": "The Indonesian Literature Generator using fine-tuned small GPT-2 model.",
|
32 |
"text_generator": None
|
33 |
},
|
34 |
"Indonesian Literature - GPT-2 Medium": {
|
35 |
"group": "Indonesian Literature",
|
36 |
"name": "cahya/gpt2-medium-indonesian-story",
|
37 |
+
"description": "The Indonesian Literature Generator using fine-tuned medium GPT-2 model.",
|
38 |
"text_generator": None
|
39 |
},
|
40 |
"Indonesian Academic Journal - GPT-2 Small": {
|
41 |
"group": "Indonesian Journal",
|
42 |
"name": "Galuh/id-journal-gpt2",
|
43 |
+
"description": "The Indonesian Journal Generator using fine-tuned small GPT-2 model.",
|
44 |
"text_generator": None
|
45 |
},
|
46 |
"Indonesian Persona Chatbot - GPT-2 Small": {
|
47 |
"group": "Indonesian Persona Chatbot",
|
48 |
"name": "cahya/gpt2-small-indonesian-personachat",
|
49 |
+
"description": "The Indonesian Persona Chatbot using fine-tuned small GPT-2 model.",
|
50 |
"text_generator": None
|
51 |
},
|
52 |
}
|
|
|
64 |
html = html.replace('<script src="js/main.js"></script>', "<script>\n" + js + "\n</script>")
|
65 |
stc.html(html, width=width, height=height, scrolling=True)
|
66 |
|
67 |
+
st.sidebar.markdown("""
|
68 |
+
<style>
|
69 |
+
.aligncenter {
|
70 |
+
text-align: center;
|
71 |
+
}
|
72 |
+
</style>
|
73 |
+
<p class="aligncenter">
|
74 |
+
<img src="https://huggingface.co/spaces/flax-community/gpt2-indonesian/resolve/main/huggingwayang.png"/>
|
75 |
+
</p>
|
76 |
+
""", unsafe_allow_html=True)
|
77 |
+
st.sidebar.markdown("""
|
78 |
+
___
|
79 |
+
<p style='text-align: center'>
|
80 |
+
This is a collection of Applications that generates sentences using Indonesian GPT-2 models!
|
81 |
+
</p>
|
82 |
+
<p style='text-align: center'>
|
83 |
+
Created by <a href="https://huggingface.co/indonesian-nlp">Indonesian NLP</a> team @2021
|
84 |
+
<br/>
|
85 |
+
<a href="https://github.com/indonesian-nlp/gpt2-app" target="_blank">GitHub</a> | <a href="https://github.com/indonesian-nlp/gpt2-app" target="_blank">Project Report</a>
|
86 |
+
</p>
|
87 |
+
""", unsafe_allow_html=True)
|
88 |
+
|
89 |
+
st.sidebar.markdown("""
|
90 |
+
___
|
91 |
+
""", unsafe_allow_html=True)
|
92 |
|
93 |
model = st.sidebar.selectbox('Model', (MODELS.keys()))
|
94 |
|
|
|
114 |
|
115 |
st.title("Indonesian GPT-2 Applications")
|
116 |
prompt_group_name = MODELS[model]["group"]
|
117 |
+
st.header(prompt_group_name)
|
118 |
description = f"This application is a demo for {MODELS[model]['description']}"
|
119 |
st.markdown(description)
|
120 |
model_name = f"Model name: [{MODELS[model]['name']}](https://huggingface.co/{MODELS[model]['name']})"
|
121 |
st.markdown(model_name)
|
122 |
+
if prompt_group_name in ["Indonesian GPT-2", "Indonesian Literature", "Indonesian Journal"]:
|
123 |
session_state = SessionState.get(prompt=None, prompt_box=None, text=None)
|
124 |
ALL_PROMPTS = list(PROMPT_LIST[prompt_group_name].keys())+["Custom"]
|
125 |
|
|
|
165 |
value=True
|
166 |
)
|
167 |
|
168 |
+
top_k = 30
|
169 |
top_p = 0.95
|
170 |
|
171 |
if do_sample:
|
172 |
top_k = st.sidebar.number_input(
|
173 |
"Top k",
|
174 |
+
value=top_k,
|
175 |
+
help="The number of highest probability vocabulary tokens to keep for top-k-filtering."
|
176 |
)
|
177 |
top_p = st.sidebar.number_input(
|
178 |
"Top p",
|
179 |
+
value=top_p,
|
180 |
+
help="If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher "
|
181 |
+
"are kept for generation."
|
182 |
)
|
183 |
|
184 |
seed = st.sidebar.number_input(
|
|
|
188 |
)
|
189 |
|
190 |
for group_name in MODELS:
|
191 |
+
if MODELS[group_name]["group"] in ["Indonesian GPT-2", "Indonesian Literature", "Indonesian Journal"]:
|
192 |
MODELS[group_name]["text_generator"] = get_generator(MODELS[group_name]["name"])
|
193 |
# text_generator = get_generator()
|
194 |
if st.button("Run"):
|
app/prompts.py
CHANGED
@@ -1,4 +1,25 @@
|
|
1 |
PROMPT_LIST = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"Indonesian Literature": {
|
3 |
"Adult Romance": [
|
4 |
"Ini adalah kisah tentang seorang laki-laki yang berusaha memperjuangkan cintanya",
|
|
|
1 |
PROMPT_LIST = {
|
2 |
+
"Indonesian GPT-2": {
|
3 |
+
"Resep masakan (recipe)": [
|
4 |
+
"Berikut adalah cara memasak sate ayam:\n",
|
5 |
+
"Langkah-langkah membuat nasi goreng:\n",
|
6 |
+
"Berikut adalah bahan-bahan membuat nastar:\n"
|
7 |
+
],
|
8 |
+
"Puisi (poetry)": [
|
9 |
+
"Aku ingin jadi merpati\nTerbang di langit yang damai\nBernyanyi-nyanyi tentang masa depan\n",
|
10 |
+
"Terdiam aku satu persatu dengan tatapan binar\nSenyawa merasuk dalam sukma membuat lara\nKefanaan membentuk kelemahan"
|
11 |
+
],
|
12 |
+
"Cerpen (short story)": [
|
13 |
+
"Putri memakai sepatunya dengan malas. Kalau bisa, selama seminggu ini ia bolos sekolah saja. Namun, Mama pasti akan marah. Ulangan tengah semester telah selesai. Minggu ini, di sekolah sedang berlangsung pekan olahraga.",
|
14 |
+
"\"Wah, hari ini cerah sekali ya,\" ucap Budi ketika ia keluar rumah.",
|
15 |
+
"Sewindu sudah kita tak berjumpa, rinduku padamu sudah tak terkira."
|
16 |
+
],
|
17 |
+
"Sejarah (history)": [
|
18 |
+
"Mohammad Natsir adalah seorang ulama, politisi, dan pejuang kemerdekaan Indonesia.",
|
19 |
+
"Ir. H. Soekarno adalah Presiden pertama Republik Indonesia. Ia adalah seorang tokoh perjuangan yang memainkan peranan penting dalam memerdekakan bangsa Indonesia",
|
20 |
+
"Borobudur adalah sebuah candi Buddha yang terletak di sebelah barat laut Yogyakarta. Monumen ini merupakan model alam semesta dan dibangun sebagai tempat suci untuk memuliakan Buddha"
|
21 |
+
],
|
22 |
+
},
|
23 |
"Indonesian Literature": {
|
24 |
"Adult Romance": [
|
25 |
"Ini adalah kisah tentang seorang laki-laki yang berusaha memperjuangkan cintanya",
|