import streamlit as st from huggingface_hub import InferenceApi import pandas as pd from transformers import pipeline STYLE = """ """ MASK_TOKEN = "" EMOJI_MAP = { "anger": "😡", "fear": "😱", "happy": "😄", "love": "😍", "sadness": "😭", "positive": "🤗", "negative": "😤", "neutral": "😐", } def display_table(df: pd.DataFrame, subheader: str): st.subheader(subheader) st.table(df) def setup(): st.markdown(STYLE, unsafe_allow_html=True) st.markdown( """ # 🇮🇩 Indonesian RoBERTa Base Demo Powered by [Indonesian RoBERTa Base](https://huggingface.co/flax-community/indonesian-roberta-base). """ ) st.sidebar.subheader("Settings") def main(): setup() analyze = st.sidebar.selectbox( "What should we analyze?", ("Emotion", "Sentiment"), help="Classifier model to choose for text analysis", ) user_input = st.text_input( f"Insert a sentence to predict with a {MASK_TOKEN} token // Masukkan kalimat untuk diisi dengan token {MASK_TOKEN}", value=f"Gila! Hari ini aku {MASK_TOKEN} banget..", ) mlm_model = "flax-community/indonesian-roberta-base" mask_api = InferenceApi(mlm_model) if analyze == "Emotion": sa_model = "StevenLimcorn/indonesian-roberta-base-emotion-classifier" elif analyze == "Sentiment": sa_model = "w11wo/indonesian-roberta-base-sentiment-classifier" sa_pipeline = pipeline("sentiment-analysis", model=sa_model, tokenizer=sa_model) if len(user_input) > 0: try: user_input.index(MASK_TOKEN) except ValueError: st.error( f"Please enter a sentence with the correct {MASK_TOKEN} token // Harap masukkan kalimat dengan token {MASK_TOKEN} yang benar" ) else: # render masked language modeling table mlm_result = mask_api(inputs=user_input) if mlm_result == None: st.write("Model is loading. Please try again later...") return mlm_df = pd.DataFrame(mlm_result) mlm_df.drop(columns=["token", "token_str"], inplace=True) mlm_df_styled = mlm_df.copy(deep=False) mlm_df_styled = mlm_df_styled.style.set_properties( subset=["sequence", "score"], **{"text-align": "left"} ) display_table(mlm_df_styled, "🎈 Top 5 Predictions") # render sentiment analysis table sa_df = pd.DataFrame(columns=["sequence", "label", "score"]) for sequence in mlm_df["sequence"].values: sa_output = sa_pipeline(sequence) # predict for every mlm output result_dict = {"sequence": sequence} result_dict.update(sa_output[0]) sa_df = sa_df.append(result_dict, ignore_index=True) sa_df["label"] = sa_df["label"].apply(lambda x: x + " " + EMOJI_MAP[x]) sa_df_styled = sa_df.copy(deep=False) sa_df_styled = sa_df_styled.style.set_properties( subset=["sequence", "label", "score"], **{"text-align": "left"} ) display_table(sa_df_styled, "🤔 By saying that, I guess you are feeling..") if __name__ == "__main__": main()