import streamlit as st import pandas as pd from wordcloud import WordCloud, STOPWORDS import matplotlib.pyplot as plt import io # Libraries for file processing import PyPDF2 from docx import Document def extract_text_from_file(uploaded_file): """Extracts text from various file formats.""" if uploaded_file.name.endswith('.pdf'): pdf_reader = PyPDF2.PdfReader(uploaded_file) text = "" for page in pdf_reader.pages: text += page.extract_text() return text elif uploaded_file.name.endswith('.docx'): doc = Document(uploaded_file) text = "\n".join([para.text for para in doc.paragraphs]) return text elif uploaded_file.name.endswith('.txt'): # To read bytes, decode it to string return uploaded_file.read().decode('utf-8') elif uploaded_file.name.endswith(('.csv', '.xlsx')): return pd.read_excel(uploaded_file) if uploaded_file.name.endswith('.xlsx') else pd.read_csv(uploaded_file) return None def render_word_cloud_page(): """ Renders the UI and logic for the Word Cloud Generator page. """ st.markdown("

πŸ˜Άβ€πŸŒ«οΈ Word Cloud Generator

", unsafe_allow_html=True) st.markdown("

Create beautiful word clouds from your text data. Supports PDF, DOCX, TXT, CSV, and Excel files.

", unsafe_allow_html=True) uploaded_file = st.file_uploader( "Choose a file", type=['pdf', 'docx', 'txt', 'csv', 'xlsx'] ) text_data = None if uploaded_file is not None: with st.spinner("Processing file..."): extracted_content = extract_text_from_file(uploaded_file) if isinstance(extracted_content, pd.DataFrame): st.info("CSV/Excel file detected. Please select the column to generate the word cloud from.") df = extracted_content text_columns = df.select_dtypes(include=['object', 'string']).columns.tolist() if not text_columns: st.error("No text-based columns found in the uploaded file.") return column_to_use = st.selectbox("Select a column:", text_columns) if column_to_use: text_data = " ".join(df[column_to_use].dropna().astype(str)) else: text_data = extracted_content if text_data: st.markdown("---") st.subheader("Customize Your Word Cloud") col1, col2 = st.columns(2) with col1: colormap = st.selectbox("Color Scheme", ["viridis", "plasma", "inferno", "magma", "cividis", "Greys", "Purples", "Blues", "Greens", "Oranges", "Reds"]) max_words = st.slider("Maximum Words", 50, 500, 200) bg_color = st.color_picker("Background Color", "#FFFFFF") with col2: contour_width = st.slider("Contour Width", 0.0, 5.0, 0.0, 0.1) contour_color = st.color_picker("Contour Color", "#0000FF") add_stopwords = st.text_area("Add Custom Stopwords (comma-separated)") if st.button("Generate Word Cloud ✨"): with st.spinner("Creating your masterpiece..."): custom_stopwords = set(STOPWORDS) if add_stopwords: custom_stopwords.update(add_stopwords.lower().split(',')) try: wordcloud = WordCloud( width=1200, height=600, background_color=bg_color, stopwords=custom_stopwords, max_words=max_words, colormap=colormap, contour_width=contour_width, contour_color=contour_color ).generate(text_data) st.markdown("---") st.subheader("Generated Word Cloud") fig, ax = plt.subplots(figsize=(12, 6)) ax.imshow(wordcloud, interpolation='bilinear') ax.axis('off') st.pyplot(fig) # --- ADD THIS BLOCK --- st.session_state['word_cloud_result'] = { "figure": fig, # The matplotlib figure object "source": uploaded_file.name, "settings": f"Colors: {colormap}, Max Words: {max_words}" } st.success("βœ… Word cloud saved to the session report.") # ---------------------- # Create a download button for the image buf = io.BytesIO() fig.savefig(buf, format="png", bbox_inches='tight') st.download_button( label="πŸ“₯ Download Image", data=buf.getvalue(), file_name="word_cloud.png", mime="image/png" ) except Exception as e: st.error(f"An error occurred while generating the word cloud: {e}")