Spaces:

flax-community
/

roberta-indonesian

Runtime error

File size: 2,614 Bytes

aa805a6
 
 
 
 
 
 
 
 
 
 
 
 
 
5c5eade
 
 
 
aa805a6
 
5c5eade
aa805a6
 
5c5eade
 
 
 
 
 
 
aa805a6
5c5eade
 
 
aa805a6
 
 
5c5eade
aa805a6
5c5eade
 
aa805a6
5c5eade
 
 
 
 
 
 
 
 
 
 
 
 
aa805a6
 
 
 
 
5c5eade
 
 
aa805a6
5c5eade
 
 
 
 
 
 
 
aa805a6
5c5eade
 
 
 
aa805a6
5c5eade
 
aa805a6
5c5eade
 
aa805a6
 
5c5eade
aa805a6

import streamlit as st
from huggingface_hub import InferenceApi
import pandas as pd
from transformers import pipeline

STYLE = """
<style>
img {
    max-width: 100%;
}

th {
    text-align: left!important
}

td {
    font-size:
}
</style>
"""

MASK_TOKEN = "<mask>"

EMOTION_MAP = {
    "anger": "😡",
    "fear": "😱",
    "happy": "😄",
    "love": "😍",
    "sadness": "😭",
}


def display_table(df: pd.DataFrame, subheader: str):
    st.subheader(subheader)
    st.table(df)


def setup():
    st.markdown(STYLE, unsafe_allow_html=True)
    st.title("🇮🇩 Indonesian RoBERTa Base")


def main():
    setup()

    user_input = st.text_input(
        f"Insert a sentence to predict with a {MASK_TOKEN} token // Masukkan kalimat untuk diisi dengan token {MASK_TOKEN}",
        value=f"Aduh... gimana nih.. hari ini {MASK_TOKEN} banget...",
    )

    mlm_model = "flax-community/indonesian-roberta-base"
    mask_api = InferenceApi(mlm_model)

    sa_model = "StevenLimcorn/indonesian-roberta-base-emotion-classifier"
    sa_pipeline = pipeline("sentiment-analysis", model=sa_model, tokenizer=sa_model)

    if len(user_input) > 0:
        try:
            user_input.index(MASK_TOKEN)
        except ValueError:
            st.error(
                f"Please enter a sentence with the correct {MASK_TOKEN} token // Harap masukkan kalimat dengan token {MASK_TOKEN} yang benar"
            )
        else:
            # render masked language modeling table
            mlm_result = mask_api(inputs=user_input)
            mlm_df = pd.DataFrame(mlm_result)
            mlm_df.drop(columns=["token", "token_str"], inplace=True)
            mlm_df_styled = mlm_df.style.set_properties(
                subset=["sequence", "score"], **{"text-align": "left"}
            )
            display_table(mlm_df_styled, "🎈 Top 5 Predictions")

            # render sentiment analysis table
            sa_df = pd.DataFrame(columns=["sequence", "label", "score"])
            for sequence in mlm_df["sequence"].values:
                sa_output = sa_pipeline(sequence)  # predict for every mlm output
                result_dict = {"sequence": sequence}
                result_dict.update(sa_output[0])
                sa_df = sa_df.append(result_dict, ignore_index=True)

            sa_df["label"] = sa_df["label"].apply(lambda x: x + " " + EMOTION_MAP[x])
            sa_df_styled = sa_df.style.set_properties(
                subset=["sequence", "label", "score"], **{"text-align": "left"}
            )
            display_table(sa_df_styled, "🤔 By saying that, I guess you are feeling..")


main()