Spaces:

KevSun
/

LinguisticFeatures

Sleeping

App Files Files Community

KevSun commited on Jul 25, 2024

Commit

ff38d81

verified ·

1 Parent(s): cf15af0

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -12

app.py CHANGED Viewed

@@ -58,9 +58,9 @@ def detect_language(text):
 @st.cache_data
 def tsne_visualization(embeddings, words):
-    n_samples = len(embeddings)
-    perplexity = min(30, n_samples - 1)  # Ensure perplexity is less than the number of samples
-    tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42)
     embeddings_2d = tsne.fit_transform(embeddings)
     df = pd.DataFrame(embeddings_2d, columns=['x', 'y'])
     df['word'] = words
@@ -69,7 +69,7 @@ def tsne_visualization(embeddings, words):
 st.title("Multilingual Text Analysis System")
 user_input = st.text_area("Enter your text here:")
-if st.button("Analyze") or user_input:
     if user_input:
         lang = detect_language(user_input)
         st.write(f"Detected language: {lang}")
@@ -84,13 +84,17 @@ if st.button("Analyze") or user_input:
             embeddings = embedding_agent.get_embeddings(words)
         st.success("Word Embeddings Generated.")
-        with st.spinner("Creating t-SNE visualization..."):
             tsne_df = tsne_visualization(embeddings, words)
-            fig, ax = plt.subplots()
-            ax.scatter(tsne_df['x'], tsne_df['y'])
-            for i, word in enumerate(tsne_df['word']):
-                ax.annotate(word, (tsne_df['x'][i], tsne_df['y'][i]))
-            st.pyplot(fig)
         with st.spinner("Extracting topics..."):
             texts = [user_input, "Another text to improve topic modeling."]
@@ -109,5 +113,4 @@ if st.button("Analyze") or user_input:
         st.warning("Please enter some text to analyze.")
 st.sidebar.title("About")
-st.sidebar.info("This app performs multilingual text analysis using various NLP techniques.")

 @st.cache_data
 def tsne_visualization(embeddings, words):
+    if len(words) < 30:
+        return pd.DataFrame({'word': words})
+    tsne = TSNE(n_components=2, random_state=42, perplexity=min(30, len(words) - 1))
     embeddings_2d = tsne.fit_transform(embeddings)
     df = pd.DataFrame(embeddings_2d, columns=['x', 'y'])
     df['word'] = words
 st.title("Multilingual Text Analysis System")
 user_input = st.text_area("Enter your text here:")
+if st.button("Analyze"):
     if user_input:
         lang = detect_language(user_input)
         st.write(f"Detected language: {lang}")
             embeddings = embedding_agent.get_embeddings(words)
         st.success("Word Embeddings Generated.")
+        with st.spinner("Visualizing words..."):
             tsne_df = tsne_visualization(embeddings, words)
+            if 'x' in tsne_df.columns:
+                fig, ax = plt.subplots()
+                ax.scatter(tsne_df['x'], tsne_df['y'])
+                for i, word in enumerate(tsne_df['word']):
+                    ax.annotate(word, (tsne_df['x'][i], tsne_df['y'][i]))
+                st.pyplot(fig)
+            else:
+                st.write("Word list (not enough words for t-SNE visualization):")
+                st.write(", ".join(words))
         with st.spinner("Extracting topics..."):
             texts = [user_input, "Another text to improve topic modeling."]
         st.warning("Please enter some text to analyze.")
 st.sidebar.title("About")
+st.sidebar.info("This app performs multilingual text analysis using various NLP techniques.")