Spaces:
Runtime error
Runtime error
File size: 2,815 Bytes
88cace9 d68475c 88cace9 d68475c 88cace9 f47ef77 d68475c c45d768 d68475c c45d768 88cace9 3bc02d0 9d4506f ba12c1c c45d768 88cace9 1d605d4 736fc04 1d605d4 9afc67e b45f0c1 88cace9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
from bertopic import BERTopic
import streamlit as st
import streamlit.components.v1 as components
from datasets import load_dataset
import pandas as pd
from sentence_transformers import SentenceTransformer
from umap import UMAP
from hdbscan import HDBSCAN
from sklearn.feature_extraction.text import CountVectorizer
st.set_page_config(page_title='eRupt Topic Trendy (e-Commerce x Social Media)', page_icon=None, layout='centered', initial_sidebar_state='auto')
st.markdown("<h1 style='text-align: center;'>Topic Trendy</h1>", unsafe_allow_html=True)
#BerTopic_model = BERTopic.load("my_topics_model")
#sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
#umap_model = UMAP(n_neighbors=15, n_components=2, min_dist=0.1, metric="cosine")
#hdbscan_model = HDBSCAN(min_cluster_size=5, min_samples = 3, metric="euclidean", prediction_data=True)
#vectorizer_model = CountVectorizer(lowercase = True, ngram_range=(1, 3), analyzer="word", max_df=1.0, min_df=0.5, stop_words="english")
#kw_model = BERTopic(embedding_model=sentence_model, umap_model = umap_model, hdbscan_model = hdbscan_model, vectorizer_model = vectorizer_model, nr_topics = "auto", calculate_probabilities = True)
#BerTopic_model = kw_model
topic = pd.read_csv('./Data/tiktok_utf8.csv')
timestamps = topic.date.to_list()
tiktok = topic.text.to_list()
vectorizer_model = CountVectorizer(stop_words="english")
topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
@st.cache()
def fit_transform(model, docs):
topics, probs = model.fit_transform(docs)
return topics, probs
topics, probs = fit_transform(topic_model, tiktok)
#topics_over_times = topic_model.topics_over_time(tiktok, topics, timestamps, nr_bins=20)
#topic_model.visualize_topics_over_time(topics_over_times, top_n_topics=30)
#topics, probs = topic_model.fit_transform(tiktok)
placeholder = st.empty()
text_input = placeholder.text_area("Enter product topic here", height=300)
print(text_input)
print(type(text_input))
top_n = st.sidebar.slider("Select a number of keywords", 1, 30, 5, 1)
if text_input=="":
text_input = "motor"
similar_topics, similarity = topic_model.find_topics(text_input, top_n=top_n)
if similar_topics != []:
most_similar = similar_topics[0]
print(similar_topics[0])
print("Most Similar Topic Info: \n{}".format(topic_model.get_topic(most_similar)))
print("Similarity Score: {}".format(similarity[0]))
answer_as_string = topic_model.get_topic(most_similar)
st.info("Extracted Topic")
st.text_area("Most Similar Topic List is Here",answer_as_string,key="topic_list")
st.image('https://freepngimg.com/download/keyboard/6-2-keyboard-png-file.png',use_column_width=True)
st.markdown("<h6 style='text-align: center; color: #808080;'>Created By LiHE</a></h6>", unsafe_allow_html=True)
|