HarryLee's picture
Add app.py and datasets
88cace9
raw
history blame
1.4 kB
import streamlit as st
from bertopic import BERTopic
import re
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
st.set_page_config(page_title='eRupt Topic Trendy (e-Commerce x Social Media)', page_icon=None, layout='centered', initial_sidebar_state='auto')
st.markdown("<h1 style='text-align: center;'>Topic Trendy</h1>", unsafe_allow_html=True)
BerTopic_model = BERTopic.load("my_topics_model")
input_text = st.text_area("Enter product topic here")
topic = pd.read_csv('./Data/tiktok_utf8.csv')
timestamps = topic.date.to_list()
tiktok = topic.text.to_list()
vectorizer_model = CountVectorizer(stop_words="english")
topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
topics, probs = topic_model.fit_transform(tiktok)
similar_topics, similarity = topic_model.find_topics(input_text, top_n=20)
most_similar = similar_topics[0]
print(similar_topics[0])
print("Most Similar Topic Info: \n{}".format(topic_model.get_topic(most_similar)))
print("Similarity Score: {}".format(similarity[0]))
answer_as_string = topic_model.get_topic(most_similar)
st.text_area("Most Similar Topic List is Here",answer_as_string,key="topic_list")
st.image('https://freepngimg.com/download/keyboard/6-2-keyboard-png-file.png',use_column_width=True)
st.markdown("<h6 style='text-align: center; color: #808080;'>Created By LiHE</a></h6>", unsafe_allow_html=True)