Spaces:

x-g85
/

fake-news

Running

File size: 7,347 Bytes

39c1ac4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe86b7f
 
39c1ac4
 
c1810f7
 
39c1ac4
 
 
 
 
 
 
 
fe86b7f
39c1ac4
c1810f7
 
 
39c1ac4
fe86b7f
39c1ac4
 
 
fe86b7f
39c1ac4
 
 
fe86b7f
39c1ac4
 
 
 
fe86b7f
39c1ac4
 
 
 
 
fe86b7f
39c1ac4
 
 
 
 
 
 
 
 
 
 
 
 
fe86b7f
 
 
39c1ac4
 
 
 
c1810f7
39c1ac4
 
 
 
 
 
 
 
 
 
 
fe86b7f
 
39c1ac4
 
fe86b7f
 
 
39c1ac4
 
 
 
 
fe86b7f
 
 
39c1ac4
fe86b7f
39c1ac4
c1810f7
fe86b7f
39c1ac4
fe86b7f
39c1ac4
 
 
 
 
fe86b7f
39c1ac4
fe86b7f
39c1ac4
 
 
 
 
 
 
 
 
 
 
c1810f7
 
39c1ac4
fe86b7f
 
 
 
 
 
 
39c1ac4
c1810f7
39c1ac4
fe86b7f
 
 
 
 
c1810f7
39c1ac4
c1810f7
 
 
 
39c1ac4
c1810f7
 
 
39c1ac4
c1810f7
 
fe86b7f
39c1ac4
c1810f7

import streamlit as st
from transformers import pipeline

############ SETTING UP THE PAGE LAYOUT AND TITLE ############

# `st.set_page_config` is used to display the default layout width, the title of the app, and the emoticon in the browser tab.

st.set_page_config(layout="centered", page_title="X_G85 Fake News", page_icon="📑")

############ CREATE THE LOGO AND HEADING ############

# We create a set of columns to display the logo and the heading next to each other.
c1, c2 = st.columns([0.32, 2])

# The snowflake logo will be displayed in the first column, on the left.
with c1:
    st.caption("")
    st.title("📑")

# The heading will be on the right.
with c2:
    st.caption("")
    st.title("X_G85 Fake News")

# We need to set up session state via st.session_state so that app interactions don't reset the app.
if "valid_inputs_received" not in st.session_state:
    st.session_state["valid_inputs_received"] = False

############ SIDEBAR CONTENT ############

st.sidebar.subheader("Model Options")
st.sidebar.write("")


# Model selection
SELECTED_MODEL = st.sidebar.selectbox("Choose a model", ("Bert", "Roberta", "Lstm"))

if SELECTED_MODEL:
    st.session_state.valid_inputs_received = False

MODEL_INFO = {
    "Bert": """
    #### [BERT base model (uncased)](https://huggingface.co/google-bert/bert-base-uncased)
    Pretrained model on English language using a masked language modeling (MLM) objective. It was introduced in this paper and first released in this repository. This model is uncased: it does not make a difference between english and English.
    """,
    "Roberta": """
    #### [jy46604790/Fake-News-Bert-Detect](https://huggingface.co/jy46604790/Fake-News-Bert-Detect)
    This model is trained by over 40,000 news from different medias based on the 'roberta-base'. It can give result by simply entering the text of the news less than 500 words(the excess will be truncated automatically).
    """,
    "Lstm": """
    #### [X_G85 Fake News LSTM MODEL](https://huggingface.co/x-g85)
    It is trained on the provided datasets\n
    Notebook: [Fake News using Lstm](https://www.kaggle.com/code/adamalrahman/fake-news-using-lstm)
    """,
    None: "NO MODEL SELECTED",
}


model_info_container = st.sidebar.container(border=True)
model_info_container.markdown("### Model Information\n")
model_info_container.markdown(MODEL_INFO[SELECTED_MODEL])


copyright_container = st.sidebar.container(border=True)
copyright_container.markdown("Copyright ©️ 2024 [X_G85](https://huggingface.co/x-g85)")


############ TABBED NAVIGATION ############


MainTab, InfoTab = st.tabs(["Main", "Info"])

with InfoTab:
    st.subheader("X_G85 Fake News")
    st.markdown(
        "It is fake news detection based on the following models trained on datasets"
    )

    st.subheader("Datasets")
    st.markdown(
        """
        We have used the following datasets to create our own datasets and train models.
        - [Kaggle: Fake news detection dataset english](https://www.kaggle.com/datasets/sadikaljarif/fake-news-detection-dataset-english)
        - [Kaggle: Liar Preprocessed](https://www.kaggle.com/datasets/khandalaryan/liar-preprocessed-dataset)
        - [Kaggle: Stocknews](https://www.kaggle.com/datasets/aaron7sun/stocknews)
        """
    )

    st.subheader("Credits")
    st.markdown(
        """
        - Bert: [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) 
        - Roberta: [jy46604790/Fake-News-Bert-Detect](https://huggingface.co/jy46604790/Fake-News-Bert-Detect)
        """
    )
    st.write("")
    copyright_container = st.container(border=True)
    copyright_container.markdown(
        "Copyright ©️ 2024 [X_G85](https://huggingface.co/x-g85)"
    )


def MODEL_RESULT(model: str, news: str) -> str | None:
    if model == "Roberta":
        MODEL_jy46604790 = "jy46604790/Fake-News-Bert-Detect"
        classifier = pipeline(
            "text-classification", model=MODEL_jy46604790, tokenizer=MODEL_jy46604790
        )
        result = classifier(news)

        if result[0]["label"] == "LABEL_1":
            return "REAL NEWS"
        else:
            return "FAKE NEWS"

    # TODO(Adam-Al-Rahman): Complete the statement
    if model == "Bert":
        pass

    if model == "Lstm":
        from utils import modelx

        return modelx(arch=model, model_path="models/lstm/x_g85_lstm.keras", text=news)


with MainTab:
    # Then, we create a intro text for the app, which we wrap in a st.markdown() widget.

    st.write("")
    st.markdown("Classify News based on the selected ml model.")
    st.write("")
    container = st.container(border=True)
    container.write(f"Selected model: {SELECTED_MODEL}")

    with st.form(key="form"):
        pre_defined_news = "Indonesian police have recaptured a U.S. citizen who escaped a week ago from an overcrowded prison on the holiday island of Bali, the jail's second breakout of foreign inmates this year.  Cristian Beasley from California was rearrested on Sunday, Badung Police Chief Yudith Satria Hananta said, without providing further details.  Beasley was a suspect in crimes related to narcotics but had not been sentenced when he escaped from Kerobokan prison in Bali last week. The 32-year-old is believed to have cut through bars in the ceiling of his cell before scaling a perimeter wall of the prison in an area being refurbished. The Kerobokan prison, about 10 km (six miles) from the main tourist beaches in the Kuta area, often holds foreigners facing drug-related charges. Representatives of Beasley could not immediately be reached for comment. In June, an Australian, a Bulgarian, an Indian, and a Malaysian tunneled to freedom about 12 meters (13 yards) under Kerobokan prison s walls. The Indian and the Bulgarian were caught soon after in neighboring East Timor, but Australian Shaun Edward Davidson and Malaysian Tee Kok King remain at large. Davidson has taunted authorities by saying he was enjoying life in various parts of the world, in purported posts on Facebook.  Kerobokan has housed several well-known foreign drug convicts, including Australian Schappelle Corby, whose 12-1/2-year sentence for marijuana smuggling got huge media attention."

        news = st.text_area(
            "Enter news to classify",
            pre_defined_news,
            height=200,
            help="Please provide the news that you need to verify for its truthfulness.",
            key="news",
        )

        submit_button = st.form_submit_button(label="Submit")

        if (
            not news
            and not submit_button
            and not st.session_state.valid_inputs_received
        ):
            st.stop()

        elif submit_button and not news:
            st.warning("📑 There is no news to classify")
            st.session_state.valid_inputs_received = False
            st.stop()

        elif submit_button or st.session_state.valid_inputs_received:
            if submit_button:
                st.session_state.valid_inputs_received = True

            # Default Model: Bert
            MODEL = SELECTED_MODEL if SELECTED_MODEL else "Bert"
            result = MODEL_RESULT(model=MODEL, news=news)

            if result:
                st.success(f"Result: {result}")
            else:
                st.error(f"{MODEL} model error")