haotle commited on
Commit
9cc6596
·
verified ·
1 Parent(s): e52d4a3

Update pages/2 Topic Modeling.py

Browse files
Files changed (1) hide show
  1. pages/2 Topic Modeling.py +19 -20
pages/2 Topic Modeling.py CHANGED
@@ -74,6 +74,9 @@ with st.popover("🔗 Menu"):
74
  st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
75
  st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
76
  st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
 
 
 
77
 
78
  st.header("Topic Modeling", anchor=False)
79
  st.subheader('Put your file here...', anchor=False)
@@ -196,7 +199,7 @@ if uploaded_file is not None:
196
  method = c1.selectbox(
197
  'Choose method',
198
  ('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
199
- ColCho = c2.selectbox('Choose column', (["Title","Abstract"]))
200
  num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
201
 
202
  d1, d2 = st.columns([3,7])
@@ -235,8 +238,8 @@ if uploaded_file is not None:
235
  if fine_tuning:
236
  topic_labelling = st.toggle("Automatic topic labelling")
237
  if topic_labelling:
238
- llm_model = st.selectbox("Model",["OpenAI/gpt-4o","Google/Flan-t5","OpenAI/gpt-oss"])
239
- if llm_model == "OpenAI/gpt-4o":
240
  api_key = st.text_input("API Key")
241
 
242
  else:
@@ -245,6 +248,10 @@ if uploaded_file is not None:
245
  #===clean csv===
246
  @st.cache_data(ttl=3600, show_spinner=False)
247
  def clean_csv(extype):
 
 
 
 
248
  paper = papers.dropna(subset=[ColCho])
249
 
250
  #===mapping===
@@ -527,37 +534,30 @@ if uploaded_file is not None:
527
  "MMR": mmr,
528
  }
529
  if topic_labelling:
530
- if llm_model == "OpenAI/gpt-4o":
531
  client = openai.OpenAI(api_key=api_key)
532
  representation_model = {
533
  "KeyBERT": keybert,
534
  "MMR": mmr,
535
  "test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
536
  }
537
- elif llm_model == "Google/Flan-t5":
538
- gen = pipeline("text2text-generation", model = "google/flan-t5-base")
539
- clientmod = TextGeneration(gen)
540
  representation_model = {
541
  "KeyBERT": keybert,
542
  "MMR": mmr,
543
  "test": clientmod
544
  }
545
- elif llm_model == "OpenAI/gpt-oss":
546
- gen = pipeline("text-generation",
547
- model = "unsloth/gpt-oss-20b-BF16",
548
- torch_dtype = "auto",
549
- device_map = "auto",
550
- )
551
- clientmod = TextGeneration(gen)
552
-
553
  representation_model = {
554
  "KeyBERT": keybert,
555
  "MMR": mmr,
556
- "test": gen
557
  }
558
 
559
-
560
-
561
  vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
562
  topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
563
  topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
@@ -668,7 +668,6 @@ if uploaded_file is not None:
668
  st.button("Download Results")
669
  st.text("Click Download results button at bottom of page")
670
 
671
- except Exception as e:
672
  st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
673
- st.write(e)
674
  st.stop()
 
74
  st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
75
  st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
76
  st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
77
+ st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
78
+ st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
79
+ st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
80
 
81
  st.header("Topic Modeling", anchor=False)
82
  st.subheader('Put your file here...', anchor=False)
 
199
  method = c1.selectbox(
200
  'Choose method',
201
  ('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
202
+ ColCho = c2.selectbox('Choose column', (["Abstract","Title", "Abstract + Title"]))
203
  num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
204
 
205
  d1, d2 = st.columns([3,7])
 
238
  if fine_tuning:
239
  topic_labelling = st.toggle("Automatic topic labelling")
240
  if topic_labelling:
241
+ llm_provider = st.selectbox("Model",["OpenAI/gpt-4o","Google/flan-t5","LiquidAI/LFM2-350M"])
242
+ if llm_provider == "OpenAI/gpt-4o":
243
  api_key = st.text_input("API Key")
244
 
245
  else:
 
248
  #===clean csv===
249
  @st.cache_data(ttl=3600, show_spinner=False)
250
  def clean_csv(extype):
251
+ if (ColCho=="Abstract + Title"):
252
+ papers["Abstract + Title"] = papers["Title"] + " " + papers["Abstract"]
253
+ st.write(papers["Abstract + Title"])
254
+
255
  paper = papers.dropna(subset=[ColCho])
256
 
257
  #===mapping===
 
534
  "MMR": mmr,
535
  }
536
  if topic_labelling:
537
+ if llm_provider == "OpenAI/gpt-4o":
538
  client = openai.OpenAI(api_key=api_key)
539
  representation_model = {
540
  "KeyBERT": keybert,
541
  "MMR": mmr,
542
  "test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
543
  }
544
+ elif llm_provider == "Google/flan-t5":
545
+ pipe = pipeline("text2text-generation", model = "google/flan-t5-base")
546
+ clientmod = TextGeneration(pipe)
547
  representation_model = {
548
  "KeyBERT": keybert,
549
  "MMR": mmr,
550
  "test": clientmod
551
  }
552
+ elif llm_provider == "LiquidAI/LFM2-350M":
553
+ pipe = pipeline("text-generation", model = "LiquidAI/LFM2-350M")
554
+ clientmod = TextGeneration(pipe)
 
 
 
 
 
555
  representation_model = {
556
  "KeyBERT": keybert,
557
  "MMR": mmr,
558
+ "test": clientmod
559
  }
560
 
 
 
561
  vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
562
  topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
563
  topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
 
668
  st.button("Download Results")
669
  st.text("Click Download results button at bottom of page")
670
 
671
+ except:
672
  st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
 
673
  st.stop()