Spaces:
Running
Running
kargaranamir
commited on
Commit
•
e7677fd
1
Parent(s):
ded4b0f
update app.py
Browse files- app.py +51 -5
- assets/{GlotLID_logo.svg → glotlid_logo.svg} +0 -0
app.py
CHANGED
@@ -18,6 +18,7 @@ import altair as alt
|
|
18 |
from altair import X, Y, Scale
|
19 |
import base64
|
20 |
import json
|
|
|
21 |
|
22 |
@st.cache_resource
|
23 |
def load_sp():
|
@@ -71,6 +72,34 @@ def render_svg(svg):
|
|
71 |
c.write(html, unsafe_allow_html=True)
|
72 |
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
@st.cache_data
|
75 |
def convert_df(df):
|
76 |
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
@@ -93,7 +122,7 @@ def load_GlotLID_v2(model_name, file_name):
|
|
93 |
model_1 = load_GlotLID_v1(constants.MODEL_NAME, "model_v1.bin")
|
94 |
model_2 = load_GlotLID_v2(constants.MODEL_NAME, "model_v2.bin")
|
95 |
|
96 |
-
@st.cache_resource
|
97 |
def plot(label, prob):
|
98 |
|
99 |
ORANGE_COLOR = "#FF8000"
|
@@ -164,6 +193,11 @@ st.markdown("[![Duplicate Space](https://img.shields.io/badge/-Duplicate%20Space
|
|
164 |
|
165 |
render_svg(open("assets/glotlid_logo.svg").read())
|
166 |
|
|
|
|
|
|
|
|
|
|
|
167 |
tab1, tab2 = st.tabs(["Input a Sentence", "Upload a File"])
|
168 |
|
169 |
with tab1:
|
@@ -192,18 +226,26 @@ with tab1:
|
|
192 |
clicked = st.button("Submit")
|
193 |
|
194 |
if sent:
|
195 |
-
sent = sent.replace('\n', '')
|
196 |
|
197 |
probs, labels = compute([sent], version=version)
|
198 |
prob = probs[0]
|
199 |
label = labels[0]
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
# plot
|
202 |
plot(label, prob)
|
203 |
|
204 |
-
|
205 |
-
with open("logs.txt", "a") as f:
|
206 |
-
f.write(sent + "\n")
|
207 |
with tab2:
|
208 |
|
209 |
version = st.radio(
|
@@ -255,3 +297,7 @@ with tab2:
|
|
255 |
file_name="GlotLID.csv",
|
256 |
mime="text/csv",
|
257 |
)
|
|
|
|
|
|
|
|
|
|
18 |
from altair import X, Y, Scale
|
19 |
import base64
|
20 |
import json
|
21 |
+
import os
|
22 |
|
23 |
@st.cache_resource
|
24 |
def load_sp():
|
|
|
72 |
c.write(html, unsafe_allow_html=True)
|
73 |
|
74 |
|
75 |
+
@st.cache_data
|
76 |
+
def render_metadata():
|
77 |
+
"""Renders the metadata."""
|
78 |
+
html = r"""<p align="center">
|
79 |
+
<a href="https://huggingface.co/cis-lmu/glotlid"><img alt="HuggingFace Model" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-8A2BE2"></a>
|
80 |
+
<a href="https://github.com/cisnlp/GlotLID"><img alt="GitHub" src="https://img.shields.io/badge/%F0%9F%93%A6%20GitHub-orange"></a>
|
81 |
+
<a href="https://github.com/cisnlp/GlotLID/blob/main/LICENSE"><img alt="GitHub license" src="https://img.shields.io/github/license/cisnlp/GlotLID?logoColor=blue"></a>
|
82 |
+
<a href="."><img alt="GitHub stars" src="https://img.shields.io/github/stars/cisnlp/GlotLID"></a>
|
83 |
+
<a href="https://arxiv.org/abs/2310.16248"><img alt="arXiv" src="https://img.shields.io/badge/arXiv-2310.16248-b31b1b.svg"></a>
|
84 |
+
</p>"""
|
85 |
+
c = st.container()
|
86 |
+
c.write(html, unsafe_allow_html=True)
|
87 |
+
|
88 |
+
@st.cache_data
|
89 |
+
def citation():
|
90 |
+
"""Renders the metadata."""
|
91 |
+
_CITATION = """
|
92 |
+
@inproceedings{
|
93 |
+
kargaran2023glotlid,
|
94 |
+
title={GlotLID: Language Identification for Low-Resource Languages},
|
95 |
+
author={Kargaran, Amir Hossein and Imani, Ayyoob and Yvon, Fran{\c{c}}ois and Sch{\"u}tze, Hinrich},
|
96 |
+
booktitle={The 2023 Conference on Empirical Methods in Natural Language Processing},
|
97 |
+
year={2023},
|
98 |
+
url={https://openreview.net/forum?id=dl4e3EBz5j}
|
99 |
+
}"""
|
100 |
+
st.code(_CITATION, language="python", line_numbers=False)
|
101 |
+
|
102 |
+
|
103 |
@st.cache_data
|
104 |
def convert_df(df):
|
105 |
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
|
|
122 |
model_1 = load_GlotLID_v1(constants.MODEL_NAME, "model_v1.bin")
|
123 |
model_2 = load_GlotLID_v2(constants.MODEL_NAME, "model_v2.bin")
|
124 |
|
125 |
+
# @st.cache_resource
|
126 |
def plot(label, prob):
|
127 |
|
128 |
ORANGE_COLOR = "#FF8000"
|
|
|
193 |
|
194 |
render_svg(open("assets/glotlid_logo.svg").read())
|
195 |
|
196 |
+
render_metadata()
|
197 |
+
|
198 |
+
st.markdown("**GlotLID** is an open-source language identification model with support for more than **1600 languages**.")
|
199 |
+
|
200 |
+
|
201 |
tab1, tab2 = st.tabs(["Input a Sentence", "Upload a File"])
|
202 |
|
203 |
with tab1:
|
|
|
226 |
clicked = st.button("Submit")
|
227 |
|
228 |
if sent:
|
229 |
+
sent = sent.replace('\n', ' ')
|
230 |
|
231 |
probs, labels = compute([sent], version=version)
|
232 |
prob = probs[0]
|
233 |
label = labels[0]
|
234 |
|
235 |
+
|
236 |
+
# Check if the file exists
|
237 |
+
if not os.path.exists('logs.txt'):
|
238 |
+
with open('logs.txt', 'w') as file:
|
239 |
+
pass
|
240 |
+
|
241 |
+
print(f"{sent}, {label}: {prob}")
|
242 |
+
with open("logs.txt", "a") as f:
|
243 |
+
f.write(f"{sent}, {label}: {prob}\n")
|
244 |
+
|
245 |
# plot
|
246 |
plot(label, prob)
|
247 |
|
248 |
+
|
|
|
|
|
249 |
with tab2:
|
250 |
|
251 |
version = st.radio(
|
|
|
297 |
file_name="GlotLID.csv",
|
298 |
mime="text/csv",
|
299 |
)
|
300 |
+
|
301 |
+
|
302 |
+
|
303 |
+
# citation()
|
assets/{GlotLID_logo.svg → glotlid_logo.svg}
RENAMED
File without changes
|