Spaces:
Running
Running
Update pages/2 Topic Modeling.py
Browse files- pages/2 Topic Modeling.py +19 -20
pages/2 Topic Modeling.py
CHANGED
@@ -74,6 +74,9 @@ with st.popover("🔗 Menu"):
|
|
74 |
st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
|
75 |
st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
|
76 |
st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
|
|
|
|
|
|
|
77 |
|
78 |
st.header("Topic Modeling", anchor=False)
|
79 |
st.subheader('Put your file here...', anchor=False)
|
@@ -196,7 +199,7 @@ if uploaded_file is not None:
|
|
196 |
method = c1.selectbox(
|
197 |
'Choose method',
|
198 |
('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
|
199 |
-
ColCho = c2.selectbox('Choose column', (["Title","Abstract"]))
|
200 |
num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
|
201 |
|
202 |
d1, d2 = st.columns([3,7])
|
@@ -235,8 +238,8 @@ if uploaded_file is not None:
|
|
235 |
if fine_tuning:
|
236 |
topic_labelling = st.toggle("Automatic topic labelling")
|
237 |
if topic_labelling:
|
238 |
-
|
239 |
-
if
|
240 |
api_key = st.text_input("API Key")
|
241 |
|
242 |
else:
|
@@ -245,6 +248,10 @@ if uploaded_file is not None:
|
|
245 |
#===clean csv===
|
246 |
@st.cache_data(ttl=3600, show_spinner=False)
|
247 |
def clean_csv(extype):
|
|
|
|
|
|
|
|
|
248 |
paper = papers.dropna(subset=[ColCho])
|
249 |
|
250 |
#===mapping===
|
@@ -527,37 +534,30 @@ if uploaded_file is not None:
|
|
527 |
"MMR": mmr,
|
528 |
}
|
529 |
if topic_labelling:
|
530 |
-
if
|
531 |
client = openai.OpenAI(api_key=api_key)
|
532 |
representation_model = {
|
533 |
"KeyBERT": keybert,
|
534 |
"MMR": mmr,
|
535 |
"test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
|
536 |
}
|
537 |
-
elif
|
538 |
-
|
539 |
-
clientmod = TextGeneration(
|
540 |
representation_model = {
|
541 |
"KeyBERT": keybert,
|
542 |
"MMR": mmr,
|
543 |
"test": clientmod
|
544 |
}
|
545 |
-
elif
|
546 |
-
|
547 |
-
|
548 |
-
torch_dtype = "auto",
|
549 |
-
device_map = "auto",
|
550 |
-
)
|
551 |
-
clientmod = TextGeneration(gen)
|
552 |
-
|
553 |
representation_model = {
|
554 |
"KeyBERT": keybert,
|
555 |
"MMR": mmr,
|
556 |
-
"test":
|
557 |
}
|
558 |
|
559 |
-
|
560 |
-
|
561 |
vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
|
562 |
topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
|
563 |
topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
|
@@ -668,7 +668,6 @@ if uploaded_file is not None:
|
|
668 |
st.button("Download Results")
|
669 |
st.text("Click Download results button at bottom of page")
|
670 |
|
671 |
-
except
|
672 |
st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
|
673 |
-
st.write(e)
|
674 |
st.stop()
|
|
|
74 |
st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
|
75 |
st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
|
76 |
st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
|
77 |
+
st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
|
78 |
+
st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
|
79 |
+
st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
|
80 |
|
81 |
st.header("Topic Modeling", anchor=False)
|
82 |
st.subheader('Put your file here...', anchor=False)
|
|
|
199 |
method = c1.selectbox(
|
200 |
'Choose method',
|
201 |
('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
|
202 |
+
ColCho = c2.selectbox('Choose column', (["Abstract","Title", "Abstract + Title"]))
|
203 |
num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
|
204 |
|
205 |
d1, d2 = st.columns([3,7])
|
|
|
238 |
if fine_tuning:
|
239 |
topic_labelling = st.toggle("Automatic topic labelling")
|
240 |
if topic_labelling:
|
241 |
+
llm_provider = st.selectbox("Model",["OpenAI/gpt-4o","Google/flan-t5","LiquidAI/LFM2-350M"])
|
242 |
+
if llm_provider == "OpenAI/gpt-4o":
|
243 |
api_key = st.text_input("API Key")
|
244 |
|
245 |
else:
|
|
|
248 |
#===clean csv===
|
249 |
@st.cache_data(ttl=3600, show_spinner=False)
|
250 |
def clean_csv(extype):
|
251 |
+
if (ColCho=="Abstract + Title"):
|
252 |
+
papers["Abstract + Title"] = papers["Title"] + " " + papers["Abstract"]
|
253 |
+
st.write(papers["Abstract + Title"])
|
254 |
+
|
255 |
paper = papers.dropna(subset=[ColCho])
|
256 |
|
257 |
#===mapping===
|
|
|
534 |
"MMR": mmr,
|
535 |
}
|
536 |
if topic_labelling:
|
537 |
+
if llm_provider == "OpenAI/gpt-4o":
|
538 |
client = openai.OpenAI(api_key=api_key)
|
539 |
representation_model = {
|
540 |
"KeyBERT": keybert,
|
541 |
"MMR": mmr,
|
542 |
"test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
|
543 |
}
|
544 |
+
elif llm_provider == "Google/flan-t5":
|
545 |
+
pipe = pipeline("text2text-generation", model = "google/flan-t5-base")
|
546 |
+
clientmod = TextGeneration(pipe)
|
547 |
representation_model = {
|
548 |
"KeyBERT": keybert,
|
549 |
"MMR": mmr,
|
550 |
"test": clientmod
|
551 |
}
|
552 |
+
elif llm_provider == "LiquidAI/LFM2-350M":
|
553 |
+
pipe = pipeline("text-generation", model = "LiquidAI/LFM2-350M")
|
554 |
+
clientmod = TextGeneration(pipe)
|
|
|
|
|
|
|
|
|
|
|
555 |
representation_model = {
|
556 |
"KeyBERT": keybert,
|
557 |
"MMR": mmr,
|
558 |
+
"test": clientmod
|
559 |
}
|
560 |
|
|
|
|
|
561 |
vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
|
562 |
topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
|
563 |
topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
|
|
|
668 |
st.button("Download Results")
|
669 |
st.text("Click Download results button at bottom of page")
|
670 |
|
671 |
+
except:
|
672 |
st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
|
|
|
673 |
st.stop()
|