Spaces:

haotle
/

LibTesting

Running

App Files Files

haotle commited on 9 days ago

Commit

9cc6596

verified ·

1 Parent(s): e52d4a3

Update pages/2 Topic Modeling.py

Browse files

Files changed (1) hide show

pages/2 Topic Modeling.py +19 -20

pages/2 Topic Modeling.py CHANGED Viewed

@@ -74,6 +74,9 @@ with st.popover("🔗 Menu"):
     st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
     st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
     st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
 st.header("Topic Modeling", anchor=False)
 st.subheader('Put your file here...', anchor=False)
@@ -196,7 +199,7 @@ if uploaded_file is not None:
         method = c1.selectbox(
                 'Choose method',
                 ('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
-        ColCho = c2.selectbox('Choose column', (["Title","Abstract"]))
         num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
         d1, d2 = st.columns([3,7])
@@ -235,8 +238,8 @@ if uploaded_file is not None:
                 if fine_tuning:
                     topic_labelling = st.toggle("Automatic topic labelling")
                     if topic_labelling:
-                        llm_model = st.selectbox("Model",["OpenAI/gpt-4o","Google/Flan-t5","OpenAI/gpt-oss"])
-                        if llm_model == "OpenAI/gpt-4o":
                             api_key = st.text_input("API Key")
             else:
@@ -245,6 +248,10 @@ if uploaded_file is not None:
         #===clean csv===
         @st.cache_data(ttl=3600, show_spinner=False)
         def clean_csv(extype):
             paper = papers.dropna(subset=[ColCho])
             #===mapping===
@@ -527,37 +534,30 @@ if uploaded_file is not None:
                         "MMR": mmr,
                     }
                     if topic_labelling:
-                        if llm_model == "OpenAI/gpt-4o":
                             client = openai.OpenAI(api_key=api_key)
                             representation_model = {
                                 "KeyBERT": keybert,
                                 "MMR": mmr,
                                 "test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
                             }
-                        elif llm_model == "Google/Flan-t5":
-                            gen = pipeline("text2text-generation", model = "google/flan-t5-base")
-                            clientmod = TextGeneration(gen)
                             representation_model = {
                                 "KeyBERT": keybert,
                                 "MMR": mmr,
                                 "test": clientmod
                             }
-                        elif llm_model == "OpenAI/gpt-oss":
-                            gen = pipeline("text-generation",
-                            model = "unsloth/gpt-oss-20b-BF16",
-                            torch_dtype = "auto",
-                            device_map = "auto",
-                            )
-                            clientmod = TextGeneration(gen)
                             representation_model = {
                                 "KeyBERT": keybert,
                                 "MMR": mmr,
-                                "test": gen
                             }
                 vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
                 topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
                 topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
@@ -668,7 +668,6 @@ if uploaded_file is not None:
                 st.button("Download Results")
                 st.text("Click Download results button at bottom of page")
-    except Exception as e:
         st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
-        st.write(e)
         st.stop()

     st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
     st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
     st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
+    st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
+    st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
+    st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
 st.header("Topic Modeling", anchor=False)
 st.subheader('Put your file here...', anchor=False)
         method = c1.selectbox(
                 'Choose method',
                 ('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
+        ColCho = c2.selectbox('Choose column', (["Abstract","Title", "Abstract + Title"]))
         num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
         d1, d2 = st.columns([3,7])
                 if fine_tuning:
                     topic_labelling = st.toggle("Automatic topic labelling")
                     if topic_labelling:
+                        llm_provider = st.selectbox("Model",["OpenAI/gpt-4o","Google/flan-t5","LiquidAI/LFM2-350M"])
+                        if llm_provider == "OpenAI/gpt-4o":
                             api_key = st.text_input("API Key")
             else:
         #===clean csv===
         @st.cache_data(ttl=3600, show_spinner=False)
         def clean_csv(extype):
+            if (ColCho=="Abstract + Title"):
+                papers["Abstract + Title"] = papers["Title"] + " " + papers["Abstract"]
+                st.write(papers["Abstract + Title"])
             paper = papers.dropna(subset=[ColCho])
             #===mapping===
                         "MMR": mmr,
                     }
                     if topic_labelling:
+                        if llm_provider == "OpenAI/gpt-4o":
                             client = openai.OpenAI(api_key=api_key)
                             representation_model = {
                                 "KeyBERT": keybert,
                                 "MMR": mmr,
                                 "test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
                             }
+                        elif llm_provider == "Google/flan-t5":
+                            pipe = pipeline("text2text-generation", model = "google/flan-t5-base")
+                            clientmod = TextGeneration(pipe)
                             representation_model = {
                                 "KeyBERT": keybert,
                                 "MMR": mmr,
                                 "test": clientmod
                             }
+                        elif llm_provider == "LiquidAI/LFM2-350M":
+                            pipe = pipeline("text-generation", model = "LiquidAI/LFM2-350M")
+                            clientmod = TextGeneration(pipe)
                             representation_model = {
                                 "KeyBERT": keybert,
                                 "MMR": mmr,
+                                "test": clientmod
                             }
                 vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
                 topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
                 topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
                 st.button("Download Results")
                 st.text("Click Download results button at bottom of page")
+    except:
         st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
         st.stop()