Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from wordcloud import WordCloud, STOPWORDS | |
import matplotlib.pyplot as plt | |
import io | |
# Libraries for file processing | |
import PyPDF2 | |
from docx import Document | |
def extract_text_from_file(uploaded_file): | |
"""Extracts text from various file formats.""" | |
if uploaded_file.name.endswith('.pdf'): | |
pdf_reader = PyPDF2.PdfReader(uploaded_file) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
return text | |
elif uploaded_file.name.endswith('.docx'): | |
doc = Document(uploaded_file) | |
text = "\n".join([para.text for para in doc.paragraphs]) | |
return text | |
elif uploaded_file.name.endswith('.txt'): | |
# To read bytes, decode it to string | |
return uploaded_file.read().decode('utf-8') | |
elif uploaded_file.name.endswith(('.csv', '.xlsx')): | |
return pd.read_excel(uploaded_file) if uploaded_file.name.endswith('.xlsx') else pd.read_csv(uploaded_file) | |
return None | |
def render_word_cloud_page(): | |
""" | |
Renders the UI and logic for the Word Cloud Generator page. | |
""" | |
st.markdown("<h2 style='text-align: center; color: #4A90E2;'>πΆβπ«οΈ Word Cloud Generator</h2>", unsafe_allow_html=True) | |
st.markdown("<p style='text-align: center;'>Create beautiful word clouds from your text data. Supports PDF, DOCX, TXT, CSV, and Excel files.</p>", unsafe_allow_html=True) | |
uploaded_file = st.file_uploader( | |
"Choose a file", | |
type=['pdf', 'docx', 'txt', 'csv', 'xlsx'] | |
) | |
text_data = None | |
if uploaded_file is not None: | |
with st.spinner("Processing file..."): | |
extracted_content = extract_text_from_file(uploaded_file) | |
if isinstance(extracted_content, pd.DataFrame): | |
st.info("CSV/Excel file detected. Please select the column to generate the word cloud from.") | |
df = extracted_content | |
text_columns = df.select_dtypes(include=['object', 'string']).columns.tolist() | |
if not text_columns: | |
st.error("No text-based columns found in the uploaded file.") | |
return | |
column_to_use = st.selectbox("Select a column:", text_columns) | |
if column_to_use: | |
text_data = " ".join(df[column_to_use].dropna().astype(str)) | |
else: | |
text_data = extracted_content | |
if text_data: | |
st.markdown("---") | |
st.subheader("Customize Your Word Cloud") | |
col1, col2 = st.columns(2) | |
with col1: | |
colormap = st.selectbox("Color Scheme", ["viridis", "plasma", "inferno", "magma", "cividis", "Greys", "Purples", "Blues", "Greens", "Oranges", "Reds"]) | |
max_words = st.slider("Maximum Words", 50, 500, 200) | |
bg_color = st.color_picker("Background Color", "#FFFFFF") | |
with col2: | |
contour_width = st.slider("Contour Width", 0.0, 5.0, 0.0, 0.1) | |
contour_color = st.color_picker("Contour Color", "#0000FF") | |
add_stopwords = st.text_area("Add Custom Stopwords (comma-separated)") | |
if st.button("Generate Word Cloud β¨"): | |
with st.spinner("Creating your masterpiece..."): | |
custom_stopwords = set(STOPWORDS) | |
if add_stopwords: | |
custom_stopwords.update(add_stopwords.lower().split(',')) | |
try: | |
wordcloud = WordCloud( | |
width=1200, | |
height=600, | |
background_color=bg_color, | |
stopwords=custom_stopwords, | |
max_words=max_words, | |
colormap=colormap, | |
contour_width=contour_width, | |
contour_color=contour_color | |
).generate(text_data) | |
st.markdown("---") | |
st.subheader("Generated Word Cloud") | |
fig, ax = plt.subplots(figsize=(12, 6)) | |
ax.imshow(wordcloud, interpolation='bilinear') | |
ax.axis('off') | |
st.pyplot(fig) | |
# --- ADD THIS BLOCK --- | |
st.session_state['word_cloud_result'] = { | |
"figure": fig, # The matplotlib figure object | |
"source": uploaded_file.name, | |
"settings": f"Colors: {colormap}, Max Words: {max_words}" | |
} | |
st.success("β Word cloud saved to the session report.") | |
# ---------------------- | |
# Create a download button for the image | |
buf = io.BytesIO() | |
fig.savefig(buf, format="png", bbox_inches='tight') | |
st.download_button( | |
label="π₯ Download Image", | |
data=buf.getvalue(), | |
file_name="word_cloud.png", | |
mime="image/png" | |
) | |
except Exception as e: | |
st.error(f"An error occurred while generating the word cloud: {e}") |