T Le commited on
Commit
06e7e95
·
2 Parent(s): f742eb8 9cc6596

Merge branch 'main' of https://huggingface.co/spaces/haotle/sdkTest

Browse files
Files changed (1) hide show
  1. pages/2 Topic Modeling.py +19 -20
pages/2 Topic Modeling.py CHANGED
@@ -749,6 +749,9 @@ with st.popover("🔗 Menu"):
749
  st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
750
  st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
751
  st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
 
 
 
752
 
753
  st.header("Topic Modeling", anchor=False)
754
  st.subheader('Put your file here...', anchor=False)
@@ -871,7 +874,7 @@ if uploaded_file is not None:
871
  method = c1.selectbox(
872
  'Choose method',
873
  ('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
874
- ColCho = c2.selectbox('Choose column', (["Title","Abstract"]))
875
  num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
876
 
877
  d1, d2 = st.columns([3,7])
@@ -910,8 +913,8 @@ if uploaded_file is not None:
910
  if fine_tuning:
911
  topic_labelling = st.toggle("Automatic topic labelling")
912
  if topic_labelling:
913
- llm_model = st.selectbox("Model",["OpenAI/gpt-4o","Google/Flan-t5","OpenAI/gpt-oss"])
914
- if llm_model == "OpenAI/gpt-4o":
915
  api_key = st.text_input("API Key")
916
 
917
  else:
@@ -920,6 +923,10 @@ if uploaded_file is not None:
920
  #===clean csv===
921
  @st.cache_data(ttl=3600, show_spinner=False)
922
  def clean_csv(extype):
 
 
 
 
923
  paper = papers.dropna(subset=[ColCho])
924
 
925
  #===mapping===
@@ -1202,37 +1209,30 @@ if uploaded_file is not None:
1202
  "MMR": mmr,
1203
  }
1204
  if topic_labelling:
1205
- if llm_model == "OpenAI/gpt-4o":
1206
  client = openai.OpenAI(api_key=api_key)
1207
  representation_model = {
1208
  "KeyBERT": keybert,
1209
  "MMR": mmr,
1210
  "test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
1211
  }
1212
- elif llm_model == "Google/Flan-t5":
1213
- gen = pipeline("text2text-generation", model = "google/flan-t5-base")
1214
- clientmod = TextGeneration(gen)
1215
  representation_model = {
1216
  "KeyBERT": keybert,
1217
  "MMR": mmr,
1218
  "test": clientmod
1219
  }
1220
- elif llm_model == "OpenAI/gpt-oss":
1221
- gen = pipeline("text-generation",
1222
- model = "unsloth/gpt-oss-20b-BF16",
1223
- torch_dtype = "auto",
1224
- device_map = "auto",
1225
- )
1226
- clientmod = TextGeneration(gen)
1227
-
1228
  representation_model = {
1229
  "KeyBERT": keybert,
1230
  "MMR": mmr,
1231
- "test": gen
1232
  }
1233
 
1234
-
1235
-
1236
  vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
1237
  topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
1238
  topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
@@ -1343,8 +1343,7 @@ if uploaded_file is not None:
1343
  st.button("Download Results")
1344
  st.text("Click Download results button at bottom of page")
1345
 
1346
- except Exception as e:
1347
  st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
1348
- st.write(e)
1349
  st.stop()
1350
  >>>>>>> e52d4a30c18f770eb968980667fa8e5a7b287580
 
749
  st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
750
  st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
751
  st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
752
+ st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
753
+ st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
754
+ st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
755
 
756
  st.header("Topic Modeling", anchor=False)
757
  st.subheader('Put your file here...', anchor=False)
 
874
  method = c1.selectbox(
875
  'Choose method',
876
  ('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
877
+ ColCho = c2.selectbox('Choose column', (["Abstract","Title", "Abstract + Title"]))
878
  num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
879
 
880
  d1, d2 = st.columns([3,7])
 
913
  if fine_tuning:
914
  topic_labelling = st.toggle("Automatic topic labelling")
915
  if topic_labelling:
916
+ llm_provider = st.selectbox("Model",["OpenAI/gpt-4o","Google/flan-t5","LiquidAI/LFM2-350M"])
917
+ if llm_provider == "OpenAI/gpt-4o":
918
  api_key = st.text_input("API Key")
919
 
920
  else:
 
923
  #===clean csv===
924
  @st.cache_data(ttl=3600, show_spinner=False)
925
  def clean_csv(extype):
926
+ if (ColCho=="Abstract + Title"):
927
+ papers["Abstract + Title"] = papers["Title"] + " " + papers["Abstract"]
928
+ st.write(papers["Abstract + Title"])
929
+
930
  paper = papers.dropna(subset=[ColCho])
931
 
932
  #===mapping===
 
1209
  "MMR": mmr,
1210
  }
1211
  if topic_labelling:
1212
+ if llm_provider == "OpenAI/gpt-4o":
1213
  client = openai.OpenAI(api_key=api_key)
1214
  representation_model = {
1215
  "KeyBERT": keybert,
1216
  "MMR": mmr,
1217
  "test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
1218
  }
1219
+ elif llm_provider == "Google/flan-t5":
1220
+ pipe = pipeline("text2text-generation", model = "google/flan-t5-base")
1221
+ clientmod = TextGeneration(pipe)
1222
  representation_model = {
1223
  "KeyBERT": keybert,
1224
  "MMR": mmr,
1225
  "test": clientmod
1226
  }
1227
+ elif llm_provider == "LiquidAI/LFM2-350M":
1228
+ pipe = pipeline("text-generation", model = "LiquidAI/LFM2-350M")
1229
+ clientmod = TextGeneration(pipe)
 
 
 
 
 
1230
  representation_model = {
1231
  "KeyBERT": keybert,
1232
  "MMR": mmr,
1233
+ "test": clientmod
1234
  }
1235
 
 
 
1236
  vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
1237
  topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
1238
  topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
 
1343
  st.button("Download Results")
1344
  st.text("Click Download results button at bottom of page")
1345
 
1346
+ except:
1347
  st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
 
1348
  st.stop()
1349
  >>>>>>> e52d4a30c18f770eb968980667fa8e5a7b287580