Spaces:
Running
Running
Merge branch 'main' of https://huggingface.co/spaces/haotle/sdkTest
Browse files- pages/2 Topic Modeling.py +19 -20
pages/2 Topic Modeling.py
CHANGED
@@ -749,6 +749,9 @@ with st.popover("🔗 Menu"):
|
|
749 |
st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
|
750 |
st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
|
751 |
st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
|
|
|
|
|
|
|
752 |
|
753 |
st.header("Topic Modeling", anchor=False)
|
754 |
st.subheader('Put your file here...', anchor=False)
|
@@ -871,7 +874,7 @@ if uploaded_file is not None:
|
|
871 |
method = c1.selectbox(
|
872 |
'Choose method',
|
873 |
('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
|
874 |
-
ColCho = c2.selectbox('Choose column', (["Title","Abstract"]))
|
875 |
num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
|
876 |
|
877 |
d1, d2 = st.columns([3,7])
|
@@ -910,8 +913,8 @@ if uploaded_file is not None:
|
|
910 |
if fine_tuning:
|
911 |
topic_labelling = st.toggle("Automatic topic labelling")
|
912 |
if topic_labelling:
|
913 |
-
|
914 |
-
if
|
915 |
api_key = st.text_input("API Key")
|
916 |
|
917 |
else:
|
@@ -920,6 +923,10 @@ if uploaded_file is not None:
|
|
920 |
#===clean csv===
|
921 |
@st.cache_data(ttl=3600, show_spinner=False)
|
922 |
def clean_csv(extype):
|
|
|
|
|
|
|
|
|
923 |
paper = papers.dropna(subset=[ColCho])
|
924 |
|
925 |
#===mapping===
|
@@ -1202,37 +1209,30 @@ if uploaded_file is not None:
|
|
1202 |
"MMR": mmr,
|
1203 |
}
|
1204 |
if topic_labelling:
|
1205 |
-
if
|
1206 |
client = openai.OpenAI(api_key=api_key)
|
1207 |
representation_model = {
|
1208 |
"KeyBERT": keybert,
|
1209 |
"MMR": mmr,
|
1210 |
"test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
|
1211 |
}
|
1212 |
-
elif
|
1213 |
-
|
1214 |
-
clientmod = TextGeneration(
|
1215 |
representation_model = {
|
1216 |
"KeyBERT": keybert,
|
1217 |
"MMR": mmr,
|
1218 |
"test": clientmod
|
1219 |
}
|
1220 |
-
elif
|
1221 |
-
|
1222 |
-
|
1223 |
-
torch_dtype = "auto",
|
1224 |
-
device_map = "auto",
|
1225 |
-
)
|
1226 |
-
clientmod = TextGeneration(gen)
|
1227 |
-
|
1228 |
representation_model = {
|
1229 |
"KeyBERT": keybert,
|
1230 |
"MMR": mmr,
|
1231 |
-
"test":
|
1232 |
}
|
1233 |
|
1234 |
-
|
1235 |
-
|
1236 |
vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
|
1237 |
topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
|
1238 |
topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
|
@@ -1343,8 +1343,7 @@ if uploaded_file is not None:
|
|
1343 |
st.button("Download Results")
|
1344 |
st.text("Click Download results button at bottom of page")
|
1345 |
|
1346 |
-
except
|
1347 |
st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
|
1348 |
-
st.write(e)
|
1349 |
st.stop()
|
1350 |
>>>>>>> e52d4a30c18f770eb968980667fa8e5a7b287580
|
|
|
749 |
st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
|
750 |
st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
|
751 |
st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
|
752 |
+
st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
|
753 |
+
st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
|
754 |
+
st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
|
755 |
|
756 |
st.header("Topic Modeling", anchor=False)
|
757 |
st.subheader('Put your file here...', anchor=False)
|
|
|
874 |
method = c1.selectbox(
|
875 |
'Choose method',
|
876 |
('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
|
877 |
+
ColCho = c2.selectbox('Choose column', (["Abstract","Title", "Abstract + Title"]))
|
878 |
num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
|
879 |
|
880 |
d1, d2 = st.columns([3,7])
|
|
|
913 |
if fine_tuning:
|
914 |
topic_labelling = st.toggle("Automatic topic labelling")
|
915 |
if topic_labelling:
|
916 |
+
llm_provider = st.selectbox("Model",["OpenAI/gpt-4o","Google/flan-t5","LiquidAI/LFM2-350M"])
|
917 |
+
if llm_provider == "OpenAI/gpt-4o":
|
918 |
api_key = st.text_input("API Key")
|
919 |
|
920 |
else:
|
|
|
923 |
#===clean csv===
|
924 |
@st.cache_data(ttl=3600, show_spinner=False)
|
925 |
def clean_csv(extype):
|
926 |
+
if (ColCho=="Abstract + Title"):
|
927 |
+
papers["Abstract + Title"] = papers["Title"] + " " + papers["Abstract"]
|
928 |
+
st.write(papers["Abstract + Title"])
|
929 |
+
|
930 |
paper = papers.dropna(subset=[ColCho])
|
931 |
|
932 |
#===mapping===
|
|
|
1209 |
"MMR": mmr,
|
1210 |
}
|
1211 |
if topic_labelling:
|
1212 |
+
if llm_provider == "OpenAI/gpt-4o":
|
1213 |
client = openai.OpenAI(api_key=api_key)
|
1214 |
representation_model = {
|
1215 |
"KeyBERT": keybert,
|
1216 |
"MMR": mmr,
|
1217 |
"test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
|
1218 |
}
|
1219 |
+
elif llm_provider == "Google/flan-t5":
|
1220 |
+
pipe = pipeline("text2text-generation", model = "google/flan-t5-base")
|
1221 |
+
clientmod = TextGeneration(pipe)
|
1222 |
representation_model = {
|
1223 |
"KeyBERT": keybert,
|
1224 |
"MMR": mmr,
|
1225 |
"test": clientmod
|
1226 |
}
|
1227 |
+
elif llm_provider == "LiquidAI/LFM2-350M":
|
1228 |
+
pipe = pipeline("text-generation", model = "LiquidAI/LFM2-350M")
|
1229 |
+
clientmod = TextGeneration(pipe)
|
|
|
|
|
|
|
|
|
|
|
1230 |
representation_model = {
|
1231 |
"KeyBERT": keybert,
|
1232 |
"MMR": mmr,
|
1233 |
+
"test": clientmod
|
1234 |
}
|
1235 |
|
|
|
|
|
1236 |
vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
|
1237 |
topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
|
1238 |
topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
|
|
|
1343 |
st.button("Download Results")
|
1344 |
st.text("Click Download results button at bottom of page")
|
1345 |
|
1346 |
+
except:
|
1347 |
st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
|
|
|
1348 |
st.stop()
|
1349 |
>>>>>>> e52d4a30c18f770eb968980667fa8e5a7b287580
|