Spaces:
Runtime error
Runtime error
File size: 4,372 Bytes
88cace9 d68475c 88cace9 d68475c 88cace9 f47ef77 d68475c c45d768 d68475c c45d768 88cace9 3bc02d0 72c5fda 9d4506f ba12c1c c45d768 88cace9 32e768f 0b86869 87cd979 0b86869 72c5fda 9afc67e 2256ce9 0b86869 70f605e 72c5fda 0b86869 70f605e 72c5fda 0b86869 70f605e 72c5fda 0b86869 70f605e 72c5fda 0b86869 70f605e 72c5fda 0b86869 70f605e 72c5fda 0b86869 b45f0c1 32e768f b45f0c1 32e768f 88cace9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
from bertopic import BERTopic
import streamlit as st
import streamlit.components.v1 as components
from datasets import load_dataset
import pandas as pd
from sentence_transformers import SentenceTransformer
from umap import UMAP
from hdbscan import HDBSCAN
from sklearn.feature_extraction.text import CountVectorizer
st.set_page_config(page_title='eRupt Topic Trendy (e-Commerce x Social Media)', page_icon=None, layout='centered', initial_sidebar_state='auto')
st.markdown("<h1 style='text-align: center;'>Topic Trendy</h1>", unsafe_allow_html=True)
#BerTopic_model = BERTopic.load("my_topics_model")
#sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
#umap_model = UMAP(n_neighbors=15, n_components=2, min_dist=0.1, metric="cosine")
#hdbscan_model = HDBSCAN(min_cluster_size=5, min_samples = 3, metric="euclidean", prediction_data=True)
#vectorizer_model = CountVectorizer(lowercase = True, ngram_range=(1, 3), analyzer="word", max_df=1.0, min_df=0.5, stop_words="english")
#kw_model = BERTopic(embedding_model=sentence_model, umap_model = umap_model, hdbscan_model = hdbscan_model, vectorizer_model = vectorizer_model, nr_topics = "auto", calculate_probabilities = True)
#BerTopic_model = kw_model
topic = pd.read_csv('./Data/tiktok_utf8.csv')
timestamps = topic.date.to_list()
tiktok = topic.text.to_list()
vectorizer_model = CountVectorizer(stop_words="english")
topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
@st.cache()
def fit_transform(model, docs):
topics, probs = model.fit_transform(docs)
return topics, probs
#topics, probs = fit_transform(topic_model, tiktok)
#topics_over_times = topic_model.topics_over_time(tiktok, topics, timestamps, nr_bins=20)
#topic_model.visualize_topics_over_time(topics_over_times, top_n_topics=30)
#topics, probs = topic_model.fit_transform(tiktok)
#placeholder = st.empty()
#text_input = placeholder.text_area("Enter product topic here", height=300)
#text_input = st.text_area("Enter product topic here", value = "motor")
form = st.sidebar.form("Main Settings")
form.header("Main Settings")
ebay_topic= form.selectbox("eBay Products Topic Selection", ["Motor", "Bicycle", "Beauty", "Basketball", "Fitness"])
top_n = form.number_input("What's the max length of the text?", value = 10)
form.form_submit_button("Run")
if ebay_topic == "Motor":
topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Motor", top_n=top_n)
elif ebay_topic == "Bicycle":
topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Bicycle", top_n=top_n)
elif ebay_topic == "Beauty":
topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Beauty", top_n=top_n)
elif ebay_topic == "Basketball":
topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Basketball", top_n=top_n)
elif ebay_topic == "Fitness":
topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Fitness", top_n=top_n)
else:
topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
topics, probs = fit_transform(topic_model, tiktok)
similar_topics, similarity = topic_model.find_topics("Motor", top_n=top_n)
if similar_topics != []:
most_similar = similar_topics[0]
#print(similar_topics[0])
#print("Most Similar Topic Info: \n{}".format(topic_model.get_topic(most_similar)))
#print("Similarity Score: {}".format(similarity[0]))
answer_as_string = topic_model.get_topic(most_similar)
st.info("Extracted Topic")
st.text_area("Most Similar Topic List is Here",answer_as_string,key="topic_list")
st.image('https://freepngimg.com/download/keyboard/6-2-keyboard-png-file.png',use_column_width=True)
#st.markdown("<h6 style='text-align: center; color: #808080;'>Created By LiHE</a></h6>", unsafe_allow_html=True)
|