Spaces:
Running
Running
T Le
commited on
Commit
·
cd8b282
1
Parent(s):
d054553
Topic modeling model update
Browse files
pages/2 Topic Modeling.py
CHANGED
@@ -196,7 +196,7 @@ if uploaded_file is not None:
|
|
196 |
method = c1.selectbox(
|
197 |
'Choose method',
|
198 |
('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
|
199 |
-
ColCho = c2.selectbox('Choose column', (
|
200 |
num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
|
201 |
|
202 |
d1, d2 = st.columns([3,7])
|
@@ -235,8 +235,8 @@ if uploaded_file is not None:
|
|
235 |
if fine_tuning:
|
236 |
topic_labelling = st.toggle("Automatic topic labelling")
|
237 |
if topic_labelling:
|
238 |
-
|
239 |
-
if
|
240 |
api_key = st.text_input("API Key")
|
241 |
|
242 |
else:
|
@@ -527,21 +527,36 @@ if uploaded_file is not None:
|
|
527 |
"MMR": mmr,
|
528 |
}
|
529 |
if topic_labelling:
|
530 |
-
if
|
531 |
client = openai.OpenAI(api_key=api_key)
|
532 |
representation_model = {
|
533 |
"KeyBERT": keybert,
|
534 |
"MMR": mmr,
|
535 |
"test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
|
536 |
}
|
537 |
-
elif
|
538 |
-
|
539 |
-
clientmod = TextGeneration(
|
540 |
representation_model = {
|
541 |
"KeyBERT": keybert,
|
542 |
"MMR": mmr,
|
543 |
"test": clientmod
|
544 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
545 |
|
546 |
vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
|
547 |
topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
|
|
|
196 |
method = c1.selectbox(
|
197 |
'Choose method',
|
198 |
('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
|
199 |
+
ColCho = c2.selectbox('Choose column', (["Title","Abstract"]))
|
200 |
num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
|
201 |
|
202 |
d1, d2 = st.columns([3,7])
|
|
|
235 |
if fine_tuning:
|
236 |
topic_labelling = st.toggle("Automatic topic labelling")
|
237 |
if topic_labelling:
|
238 |
+
llm_model = st.selectbox("Model",["OpenAI/gpt-4o","Google/Flan-t5","OpenAI/gpt-oss"])
|
239 |
+
if llm_model == "OpenAI/gpt-4o":
|
240 |
api_key = st.text_input("API Key")
|
241 |
|
242 |
else:
|
|
|
527 |
"MMR": mmr,
|
528 |
}
|
529 |
if topic_labelling:
|
530 |
+
if llm_model == "OpenAI/gpt-4o":
|
531 |
client = openai.OpenAI(api_key=api_key)
|
532 |
representation_model = {
|
533 |
"KeyBERT": keybert,
|
534 |
"MMR": mmr,
|
535 |
"test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
|
536 |
}
|
537 |
+
elif llm_model == "Google/Flan-t5":
|
538 |
+
gen = pipeline("text2text-generation", model = "google/flan-t5-base")
|
539 |
+
clientmod = TextGeneration(gen)
|
540 |
representation_model = {
|
541 |
"KeyBERT": keybert,
|
542 |
"MMR": mmr,
|
543 |
"test": clientmod
|
544 |
}
|
545 |
+
elif llm_model == "OpenAI/gpt-oss":
|
546 |
+
gen = pipeline("text-generation",
|
547 |
+
model = "openai/gpt-oss-20b",
|
548 |
+
torch_dtype = "auto",
|
549 |
+
device_map = "auto",
|
550 |
+
)
|
551 |
+
clientmod = TextGeneration(gen)
|
552 |
+
|
553 |
+
representation_model = {
|
554 |
+
"KeyBERT": keybert,
|
555 |
+
"MMR": mmr,
|
556 |
+
"test": gen
|
557 |
+
}
|
558 |
+
|
559 |
+
|
560 |
|
561 |
vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
|
562 |
topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
|
tools/__pycache__/sourceformat.cpython-310.pyc
DELETED
Binary file (5.74 kB)
|
|