# Turkish NER Demo for Various Models from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer, DebertaV2Tokenizer, DebertaV2Model import sentencepiece import streamlit as st import pandas as pd import spacy st.set_page_config(layout="wide") example_list = [ "Moso ono mo dikgang tsa ura le ura, o tsoga le Oarabile Moamogwe go simolola ka 05:00 - 10:00" ] st.title("Demo for Sestwana NER Models") model_list = ['dsfsi/PuoBERTa-NER', 'akdeniz27/convbert-base-turkish-cased-ner'] st.sidebar.header("Select NER Model") model_checkpoint = st.sidebar.radio("", model_list) st.sidebar.write("For details of models: 'https://huggingface.co/akdeniz27/") st.sidebar.write("") if model_checkpoint == "akdeniz27/xlm-roberta-base-turkish-ner": aggregation = "simple" elif model_checkpoint == "dsfsi/PuoBERTa-NER": aggregation = "simple" elif model_checkpoint == "xlm-roberta-large-finetuned-conll03-english" or model_checkpoint == "asahi417/tner-xlm-roberta-base-ontonotes5": aggregation = "simple" st.sidebar.write("") st.sidebar.write("The selected NER model is included just to show the zero-shot transfer learning capability of XLM-Roberta pretrained language model.") else: aggregation = "first" st.subheader("Select Text Input Method") input_method = st.radio("", ('Select from Examples', 'Write or Paste New Text','Upload CSV File')) if input_method == 'Select from Examples': selected_text = st.selectbox('Select Text from List', example_list, index=0, key=1) st.subheader("Text to Run") input_text = st.text_area("Selected Text", selected_text, height=128, max_chars=None, key=2) elif input_method == "Write or Paste New Text": st.subheader("Text to Run") input_text = st.text_area('Write or Paste Text Below', value="", height=128, max_chars=None, key=2) elif input_method == "Upload CSV File": st.subheader("Upload CSV File") uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None: df_csv = pd.read_csv(uploaded_file) st.write(df_csv) lines = [] text_column = st.selectbox("Select the column containing text", df_csv.columns) for index, row in df_csv.iterrows(): st.write(f"Processing row {index + 1}:") input_text = row[text_column] @st.cache_resource def setModel(model_checkpoint, aggregation): tokenizer = AutoTokenizer.from_pretrained("dsfsi/PuoBERTa-NER") model = AutoModelForTokenClassification.from_pretrained("dsfsi/PuoBERTa-NER") return pipeline("token-classification", model=model, tokenizer=tokenizer, aggregation_strategy=aggregation) @st.cache_resource def get_html(html: str): WRAPPER = """