Spaces:
Sleeping
Sleeping
from ast import literal_eval | |
def make_lang_list(row): | |
languages = row["languages"] | |
if languages == "none": | |
return [] | |
return literal_eval(languages) | |
def language_count(row): | |
return len(row["languages"]) | |
def process_for_lang(data, modality): | |
# Filter by modality | |
if modality == "NLP": | |
data = data[data["modality"] == "nlp"] | |
elif modality == "Audio": | |
data = data[data["modality"] == "audio"] | |
elif modality == "Multimodal": | |
data = data[data["modality"] == "multimodal"] | |
# Remove rows without languages | |
data.loc[data.languages == "False", 'languages'] = None | |
data.loc[data.languages == {}, 'languages'] = None | |
# Count of rows that have no languages | |
no_lang_count = data["languages"].isna().sum() | |
# As the languages column might have multiple languages, | |
# we need to convert it to a list. We then count the number of languages. | |
data["languages"] = data["languages"].fillna('none') | |
data["languages"] = data.apply(make_lang_list, axis=1) | |
data["language_count"] = data.apply(language_count, axis=1) | |
# Just keep the models with at least one language | |
models_with_langs = data[data["language_count"] > 0] | |
langs = models_with_langs["languages"].explode() | |
langs = langs[langs != {}] | |
total_langs = len(langs.unique()) | |
data['multilingual'] = data.apply(lambda x: int("multilingual" in x['languages']), axis=1) | |
return data, no_lang_count, total_langs, langs.unique() | |
def filter_multilinguality(data, linguality): | |
if linguality == "Just Multilingual": | |
multilingual_tag = data["multilingual"] == 1 | |
multiple_lang_tags = data["language_count"] > 1 | |
return data[multilingual_tag | multiple_lang_tags] | |
elif linguality == "Three or more languages": | |
return data[data["language_count"] >= 3] | |
else: | |
return data |