KevSun commited on
Commit
ff38d81
·
verified ·
1 Parent(s): cf15af0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -12
app.py CHANGED
@@ -58,9 +58,9 @@ def detect_language(text):
58
 
59
  @st.cache_data
60
  def tsne_visualization(embeddings, words):
61
- n_samples = len(embeddings)
62
- perplexity = min(30, n_samples - 1) # Ensure perplexity is less than the number of samples
63
- tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42)
64
  embeddings_2d = tsne.fit_transform(embeddings)
65
  df = pd.DataFrame(embeddings_2d, columns=['x', 'y'])
66
  df['word'] = words
@@ -69,7 +69,7 @@ def tsne_visualization(embeddings, words):
69
  st.title("Multilingual Text Analysis System")
70
  user_input = st.text_area("Enter your text here:")
71
 
72
- if st.button("Analyze") or user_input:
73
  if user_input:
74
  lang = detect_language(user_input)
75
  st.write(f"Detected language: {lang}")
@@ -84,13 +84,17 @@ if st.button("Analyze") or user_input:
84
  embeddings = embedding_agent.get_embeddings(words)
85
  st.success("Word Embeddings Generated.")
86
 
87
- with st.spinner("Creating t-SNE visualization..."):
88
  tsne_df = tsne_visualization(embeddings, words)
89
- fig, ax = plt.subplots()
90
- ax.scatter(tsne_df['x'], tsne_df['y'])
91
- for i, word in enumerate(tsne_df['word']):
92
- ax.annotate(word, (tsne_df['x'][i], tsne_df['y'][i]))
93
- st.pyplot(fig)
 
 
 
 
94
 
95
  with st.spinner("Extracting topics..."):
96
  texts = [user_input, "Another text to improve topic modeling."]
@@ -109,5 +113,4 @@ if st.button("Analyze") or user_input:
109
  st.warning("Please enter some text to analyze.")
110
 
111
  st.sidebar.title("About")
112
- st.sidebar.info("This app performs multilingual text analysis using various NLP techniques.")
113
-
 
58
 
59
  @st.cache_data
60
  def tsne_visualization(embeddings, words):
61
+ if len(words) < 30:
62
+ return pd.DataFrame({'word': words})
63
+ tsne = TSNE(n_components=2, random_state=42, perplexity=min(30, len(words) - 1))
64
  embeddings_2d = tsne.fit_transform(embeddings)
65
  df = pd.DataFrame(embeddings_2d, columns=['x', 'y'])
66
  df['word'] = words
 
69
  st.title("Multilingual Text Analysis System")
70
  user_input = st.text_area("Enter your text here:")
71
 
72
+ if st.button("Analyze"):
73
  if user_input:
74
  lang = detect_language(user_input)
75
  st.write(f"Detected language: {lang}")
 
84
  embeddings = embedding_agent.get_embeddings(words)
85
  st.success("Word Embeddings Generated.")
86
 
87
+ with st.spinner("Visualizing words..."):
88
  tsne_df = tsne_visualization(embeddings, words)
89
+ if 'x' in tsne_df.columns:
90
+ fig, ax = plt.subplots()
91
+ ax.scatter(tsne_df['x'], tsne_df['y'])
92
+ for i, word in enumerate(tsne_df['word']):
93
+ ax.annotate(word, (tsne_df['x'][i], tsne_df['y'][i]))
94
+ st.pyplot(fig)
95
+ else:
96
+ st.write("Word list (not enough words for t-SNE visualization):")
97
+ st.write(", ".join(words))
98
 
99
  with st.spinner("Extracting topics..."):
100
  texts = [user_input, "Another text to improve topic modeling."]
 
113
  st.warning("Please enter some text to analyze.")
114
 
115
  st.sidebar.title("About")
116
+ st.sidebar.info("This app performs multilingual text analysis using various NLP techniques.")