import re import os import streamlit as st import pandas as pd import json from typing import List, Dict from groq import Groq import time from dotenv import load_dotenv import math from collections import Counter # Load environment variables from .env file load_dotenv() reviews_data = {} # Configure the Streamlit page st.set_page_config( page_title="AI FAQ Generator", page_icon="🤖", layout="wide", initial_sidebar_state="expanded" ) class OptimizedFAQGenerator: def __init__(self, api_key: str): """Initialize the FAQ Generator with Groq API key.""" self.client = Groq(api_key=api_key) self.model = "llama3-8b-8192" # Fast and efficient model self.batch_size = 100 # Process reviews in batches of 100 self.max_text_length = 3000 # Maximum text length per API call def chunk_reviews_by_size(self, reviews_data: List[Dict], max_chars: int = 3000) -> List[List[Dict]]: """Chunk reviews by character count to stay within API limits.""" chunks = [] current_chunk = [] current_length = 0 for review in reviews_data: review_text = review.get('review_text', '') review_length = len(review_text) + 50 # Add buffer for formatting # If adding this review would exceed the limit, start a new chunk if current_length + review_length > max_chars and current_chunk: chunks.append(current_chunk) current_chunk = [review] current_length = review_length else: current_chunk.append(review) current_length += review_length # Add the last chunk if it has content if current_chunk: chunks.append(current_chunk) return chunks def extract_keywords_from_batch(self, review_batch: List[Dict]) -> List[str]: """Extract keywords from a batch of reviews.""" # Combine review texts from the batch batch_text = " ".join([review.get('review_text', '') for review in review_batch if review.get('review_text')]) # Truncate if too long if len(batch_text) > self.max_text_length: batch_text = batch_text[:self.max_text_length] prompt = f""" Analyze these customer reviews and extract SEO keywords and phrases for a business website. IMPORTANT RULES: 1. DO NOT include any specific brand names or business names 2. Focus on generic industry terms and services 3. Extract keywords that any similar business could use 4. Focus on customer pain points and solutions Extract keywords for: - Products and services mentioned (generic terms only) - Common customer concerns and questions - Industry terminology - Customer experience themes - Service quality aspects Reviews: {batch_text} Return exactly 15 relevant SEO keywords/phrases, one per line, without numbering or bullets. Use generic terms that any business in this industry could use. """ try: response = self.client.chat.completions.create( messages=[{"role": "user", "content": prompt}], model=self.model, temperature=0.3, max_tokens=400 ) keywords = [kw.strip() for kw in response.choices[0].message.content.strip().split('\n') if kw.strip()] return keywords[:15] # Limit to 15 keywords per batch except Exception as e: st.error(f"Error extracting keywords from batch: {str(e)}") return [] def extract_seo_keywords(self, reviews_data: List[Dict]) -> List[str]: """Extract SEO keywords from all reviews using batch processing.""" st.info(f"Processing {len(reviews_data)} reviews in batches...") # Create progress bar progress_bar = st.progress(0) status_text = st.empty() # Chunk reviews by character count review_chunks = self.chunk_reviews_by_size(reviews_data, self.max_text_length) all_keywords = [] for i, chunk in enumerate(review_chunks): status_text.text(f"Processing batch {i+1}/{len(review_chunks)} ({len(chunk)} reviews)...") # Extract keywords from this batch batch_keywords = self.extract_keywords_from_batch(chunk) all_keywords.extend(batch_keywords) # Update progress progress_bar.progress((i + 1) / len(review_chunks)) # Small delay to avoid rate limiting time.sleep(0.5) # Count keyword frequency and get top keywords keyword_counts = Counter(all_keywords) top_keywords = [kw for kw, count in keyword_counts.most_common(25)] progress_bar.empty() status_text.empty() st.success(f"Extracted {len(top_keywords)} unique keywords from {len(review_chunks)} batches") return top_keywords def get_review_insights(self, reviews_data: List[Dict]) -> Dict: """Extract insights from reviews for better FAQ generation.""" # Sample reviews for analysis sample_size = min(50, len(reviews_data)) sample_reviews = reviews_data[:sample_size] insights = { 'total_reviews': len(reviews_data), 'avg_rating': sum(int(r.get('rating', 0)) for r in reviews_data) / len(reviews_data), 'positive_reviews': sum(1 for r in reviews_data if int(r.get('rating', 0)) >= 4), 'common_themes': [], 'pain_points': [], 'positive_aspects': [] } # Analyze positive vs negative reviews positive_reviews = [r for r in sample_reviews if int(r.get('rating', 0)) >= 4] negative_reviews = [r for r in sample_reviews if int(r.get('rating', 0)) <= 2] insights['sample_positive'] = positive_reviews[:5] insights['sample_negative'] = negative_reviews[:3] return insights def clean_json_response(self, response_text: str) -> str: """Clean and extract JSON from AI response.""" # Remove markdown code blocks response_text = re.sub(r'```json\s*', '', response_text) response_text = re.sub(r'```\s*', '', response_text) # Find the JSON array json_start = response_text.find('[') json_end = response_text.rfind(']') + 1 if json_start != -1 and json_end > json_start: json_content = response_text[json_start:json_end] # Clean common JSON issues json_content = re.sub(r'\n\s*', ' ', json_content) # Remove newlines and extra spaces json_content = re.sub(r'"\s*,\s*"', '", "', json_content) # Fix spacing around commas json_content = re.sub(r'}\s*,\s*{', '}, {', json_content) # Fix object separators return json_content return None def generate_faqs(self, keywords: List[str], reviews_data: List[Dict], num_faqs: int = 20) -> List[Dict]: """Generate FAQs based on SEO keywords and review insights.""" # Get review insights insights = self.get_review_insights(reviews_data) # Create sample review context (limit to prevent token overflow) sample_reviews = [] for review in insights['sample_positive']: sample_reviews.append(f"Rating: {review.get('rating', 'N/A')}/5 - {review.get('review_text', '')[:150]}...") for review in insights['sample_negative']: sample_reviews.append(f"Rating: {review.get('rating', 'N/A')}/5 - {review.get('review_text', '')[:150]}...") sample_context = "\n".join(sample_reviews[:8]) # Limit to 8 samples # Limit FAQs to maximum of 30 num_faqs = min(num_faqs, 15) prompt = f""" Based on the following SEO keywords and customer review insights, generate exactly {num_faqs} comprehensive FAQ pairs for a business website. CRITICAL REQUIREMENTS: 1. DO NOT use any specific brand names or business names in questions or answers 2. Use generic terms like "our store", "our business", "our team", "our services" 3. Focus on universal customer concerns and solutions SEO Keywords: {', '.join(keywords[:20])} Business Insights: - Total Reviews Analyzed: {insights['total_reviews']} - Average Rating: {insights['avg_rating']:.1f}/5 - Positive Reviews: {insights['positive_reviews']}/{insights['total_reviews']} Sample Customer Feedback: {sample_context} IMPORTANT: Respond with ONLY a valid JSON array. No additional text or markdown. Format: [ {{ "question": "Why should I choose your business for my needs?", "answer": "Our experienced team provides personalized service with attention to detail. We focus on understanding your specific requirements and delivering solutions that exceed expectations, backed by our commitment to quality and customer satisfaction." }} ] """ try: st.info("Generating FAQs with AI...") response = self.client.chat.completions.create( messages=[ {"role": "system", "content": "You are a helpful assistant that generates brand-neutral JSON responses for FAQ content. Always respond with valid JSON only, without any brand names."}, {"role": "user", "content": prompt} ], model=self.model, temperature=0.2, # Lower temperature for more consistent output max_tokens=3000 # Increased for more FAQs ) # Get the response content content = response.choices[0].message.content.strip() # Clean and extract JSON json_content = self.clean_json_response(content) if json_content: try: faqs = json.loads(json_content) # Validate that it's a list of dictionaries with required keys if isinstance(faqs, list) and all(isinstance(faq, dict) and 'question' in faq and 'answer' in faq for faq in faqs): # Limit to requested number return faqs[:num_faqs] else: st.warning("Invalid FAQ format received, using fallback") return self._get_fallback_faqs(num_faqs) except json.JSONDecodeError as e: st.error(f"JSON parsing error: {str(e)}") return self._get_fallback_faqs(num_faqs) else: st.warning("Could not extract JSON from response, using fallback") return self._get_fallback_faqs(num_faqs) except Exception as e: st.error(f"Error generating FAQs: {str(e)}") return self._get_fallback_faqs(num_faqs) def _get_fallback_faqs(self, num_faqs: int = 20) -> List[Dict]: """Fallback FAQs if API fails - brand neutral and organized by categories.""" base_faqs = [ # Why choose us questions { "question": "Why should I choose your business over competitors?", "answer": "Our experienced team provides personalized service with attention to detail and a commitment to customer satisfaction. We take time to understand your specific needs and work with you throughout the entire process to ensure you're completely happy with the results." }, { "question": "What makes your customer service different?", "answer": "We pride ourselves on patient, welcoming service where customers never feel rushed. Our team focuses on creating a comfortable experience while providing expert guidance to help you make the best decisions for your needs." }, { "question": "How experienced is your team?", "answer": "Our team consists of experienced professionals who are passionate about helping customers achieve their goals. We stay updated with the latest trends and techniques to provide you with the best possible service and advice." }, # Problem-solving questions { "question": "How do you help customers who feel overwhelmed by choices?", "answer": "Our knowledgeable staff guides you through the selection process based on your preferences, budget, and specific needs. We take time to understand your vision and narrow down options so you can make decisions with confidence." }, { "question": "What if I'm not satisfied with the results?", "answer": "Customer satisfaction is our top priority. We work closely with you throughout the process and make adjustments as needed to ensure you're completely happy with the final outcome. Our team is committed to making things right." }, { "question": "How do you handle sizing and fit issues?", "answer": "Our professional team provides expert fitting services and makes necessary adjustments to ensure perfect results. We take precise measurements and work with you through multiple fittings if needed to achieve the ideal fit." }, # Service questions { "question": "What services do you offer besides your main products?", "answer": "In addition to our primary offerings, we provide professional consultation, customization services, and ongoing support. We also offer accessories and complementary products to complete your experience with us." }, { "question": "Do you provide consultation services?", "answer": "Yes, we offer personalized consultations where our experts help you explore options, provide styling advice, and ensure you make choices that align with your vision and budget. These consultations are designed to make your experience as smooth as possible." }, { "question": "What additional products and accessories do you carry?", "answer": "We offer a comprehensive selection of complementary products and accessories to complete your needs. Our team can help coordinate everything to ensure a cohesive and polished final result." }, # Process questions { "question": "Do I need an appointment or can I walk in?", "answer": "While we welcome walk-ins when possible, we highly recommend scheduling an appointment to ensure you receive dedicated attention and personalized service. Appointments allow us to prepare for your visit and provide the best possible experience." }, { "question": "How long does the typical process take?", "answer": "The timeline varies depending on your specific needs, but we work with you to establish realistic expectations from the start. Our team keeps you informed throughout the process and ensures everything is completed according to your schedule." }, { "question": "What should I expect during my first visit?", "answer": "During your initial visit, we'll discuss your needs, preferences, and budget. Our team will guide you through available options, provide expert recommendations, and create a plan tailored to your specific requirements." }, # Quality questions { "question": "How do you ensure quality in your products and services?", "answer": "We maintain high standards through careful selection of products, skilled craftsmanship, and thorough quality checks. Our experienced team pays attention to every detail to ensure you receive exceptional results that meet our quality standards." }, { "question": "What is your experience with customers who have specific requirements?", "answer": "Our team has extensive experience working with diverse customer needs and preferences. We pride ourselves on our ability to accommodate special requirements and provide customized solutions that exceed expectations." }, { "question": "How do you stay current with industry trends?", "answer": "Our team continuously educates themselves on the latest trends, techniques, and products in the industry. We attend training sessions and stay connected with industry developments to provide you with current options and expert advice." }, # Additional comprehensive questions { "question": "What price ranges do you offer?", "answer": "We offer options across various price points to accommodate different budgets. Our team can help you find quality solutions within your budget and provide transparent pricing information upfront so you can make informed decisions." }, { "question": "Do you offer payment plans or financing options?", "answer": "Yes, we understand that significant purchases require financial planning. We offer flexible payment options and financing plans to make our services more accessible and help you achieve your goals within your budget." }, { "question": "How far in advance should I start planning?", "answer": "We recommend starting the process several months in advance to allow adequate time for consultation, selection, customization, and any necessary adjustments. Early planning ensures the best selection and reduces stress as your important date approaches." }, { "question": "Do you work with customers who have time constraints?", "answer": "Absolutely! We understand that sometimes timelines are tight, and we're experienced in working efficiently to meet urgent deadlines. Our team will discuss your timeline and work diligently to accommodate your schedule while maintaining quality standards." }, { "question": "What sets your customer experience apart?", "answer": "We focus on creating a welcoming, pressure-free environment where customers feel comfortable and supported. Our personalized approach, attention to detail, and commitment to customer satisfaction ensure that your experience with us is positive and memorable." }, { "question": "How do you handle special requests or customizations?", "answer": "We welcome special requests and customizations as part of our personalized service approach. Our skilled team works with you to understand your vision and explore options for creating something unique that perfectly meets your specific needs and preferences." } ] # Return the requested number of FAQs, up to the available amount return base_faqs[:min(num_faqs, len(base_faqs))] def load_sample_data(): """Load sample data if no file is uploaded.""" sample_data = [ { "reviewer_name": "Customer A", "rating": 5, "date": "3 months ago", "review_text": "This past August, I went to the bridal store to look for my dream wedding dress and I found It! I was looking for an elegant, simple, and classic dress. The consultant was extremely helpful, patient, and sweet. The alterations team did a great job making sure I was happy with the alterations done to my dress.", "owner_response": "Thank you so much for taking the time to leave this excellent review!", }, { "reviewer_name": "Customer B", "rating": 5, "date": "2 months ago", "review_text": "A very special shout out to the consultant who made my daughters dress shopping so special. Never rushed her and only everything to accommodate her until she found the right dress to say yes to.", "owner_response": "", }, { "reviewer_name": "Customer C", "rating": 5, "date": "1 month ago", "review_text": "My wedding dress shopping experience was beyond amazing. The consultant was so wonderful to work with. Everyone was so sweet & welcoming when we walked in. She made me feel so comfortable as we tried on many different dresses.", "owner_response": "Thank you for the wonderful review!", } ] return sample_data def main(): st.title("🤖 AI FAQ Generator") st.markdown("Generate SEO-optimized, FAQs from customer reviews") # Sidebar for configuration with st.sidebar: st.header("⚙️ Configuration") # API Key input api_key = os.getenv("GROQ_API_KEY") if not api_key: st.warning("Please enter your Groq API key to use AI features") st.markdown("Get your API key from [Groq Console](https://console.groq.com)") st.divider() # FAQ Configuration st.subheader("FAQ Settings") num_faqs = st.slider("Number of FAQs to generate", 5, 15, 10, 1, help="Select how many FAQs to generate based on the reviews") if num_faqs < 5: st.warning("Generating fewer than 5 FAQs may not provide enough coverage of customer concerns") elif num_faqs > 15: st.warning("Generating more than 15 FAQs may lead to less focused content") st.info(f"Will generate {num_faqs} FAQs") st.divider() # File upload uploaded_file = st.file_uploader( "Upload Reviews CSV", type=['csv'], help="Upload a CSV file with customer reviews (supports large files)" ) # Use sample data option use_sample = st.checkbox("Use sample data", value=False) # Load data if uploaded_file is not None: try: with st.spinner("Loading CSV file..."): df = pd.read_csv(uploaded_file) # Data cleaning df['rating'] = df['rating'].astype(str) df = df.drop(columns=['review_id', 'scraped_at'], axis=1, errors='ignore') # Remove empty reviews df = df.dropna(subset=['review_text']) df = df[df['review_text'].str.strip() != ''] reviews_data = df.to_dict('records') st.success(f"✅ Loaded {len(reviews_data)} reviews from uploaded file") # Show file statistics col1, col2, col3 = st.columns(3) with col1: st.metric("Total Reviews", len(reviews_data)) with col2: avg_rating = sum(int(r.get('rating', 0)) for r in reviews_data if r.get('rating', '0').isdigit()) / len([r for r in reviews_data if r.get('rating', '0').isdigit()]) st.metric("Average Rating", f"{avg_rating:.1f}") with col3: positive_reviews = sum(1 for r in reviews_data if r.get('rating', '0').isdigit() and int(r.get('rating', 0)) >= 4) st.metric("Positive Reviews", f"{positive_reviews}/{len(reviews_data)}") except Exception as e: st.error(f"Error loading file: {str(e)}") st.info("Please ensure your CSV has columns: 'review_text', 'rating'") reviews_data = load_sample_data() elif use_sample: reviews_data = load_sample_data() st.info("Using sample data for demonstration") else: st.warning("Please upload a CSV file or use sample data") return # Display data overview if reviews_data: with st.expander("📊 Data Overview", expanded=False): # Sample reviews preview st.subheader("Sample Reviews") for i, review in enumerate(reviews_data[:3]): with st.container(): st.write(f"**{review.get('reviewer_name', 'Anonymous')}** - {review.get('rating', 'N/A')} ⭐") st.write(review.get('review_text', 'No review text')[:300] + "...") if i < 2: # Don't show divider after last item st.divider() # Generate FAQs if api_key and reviews_data: st.header("🚀 Generate AI-Powered FAQs") col1, col2 = st.columns(2) with col1: if st.button("🤖 Generate Keywords & FAQs with AI", type="primary"): start_time = time.time() with st.spinner("Processing large dataset..."): # Initialize FAQ generator faq_gen = OptimizedFAQGenerator(api_key) # Extract keywords with batch processing st.info("🔍 Extracting SEO keywords from all reviews...") keywords = faq_gen.extract_seo_keywords(reviews_data) # Store in session state st.session_state.keywords = keywords # Generate FAQs st.info("📝 Generating brand-neutral FAQs...") faqs = faq_gen.generate_faqs(keywords, reviews_data, num_faqs) # Store in session state st.session_state.faqs = faqs generation_time = time.time() - start_time st.session_state.generation_time = generation_time st.success(f"✅ Generated {len(faqs)} FAQs in {generation_time:.1f} seconds!") # with col2: # if st.button("⚡ Use Quick Fallback"): # faq_gen = OptimizedFAQGenerator("") # Empty API key for fallback # st.session_state.keywords = ["customer service", "quality products", "professional consultation", "experienced team", "customer satisfaction"] # st.session_state.faqs = faq_gen._get_fallback_faqs(num_faqs) # st.info("Using pre-built content") # Display results if hasattr(st.session_state, 'keywords') and hasattr(st.session_state, 'faqs'): # Performance metrics if hasattr(st.session_state, 'generation_time'): st.info(f"⏱️ Generation completed in {st.session_state.generation_time:.1f} seconds") st.header("📈 Extracted SEO Keywords") # Display keywords in a nice format keywords = st.session_state.keywords # Show keywords in columns cols = st.columns(3) for i, keyword in enumerate(keywords): with cols[i % 3]: st.markdown(f"`{keyword}`") st.header("❓ Generated Brand-Neutral FAQs") st.info(f"Generated {len(st.session_state.faqs)} FAQs that can be used by any business in this industry") # Display FAQs with search functionality search_term = st.text_input("🔍 Search FAQs", placeholder="Enter keywords to filter FAQs...") faqs = st.session_state.faqs # Filter FAQs if search term is provided if search_term: filtered_faqs = [ faq for faq in faqs if search_term.lower() in faq.get('question', '').lower() or search_term.lower() in faq.get('answer', '').lower() ] st.info(f"Showing {len(filtered_faqs)} FAQs matching '{search_term}'") faqs_to_show = filtered_faqs else: faqs_to_show = faqs # Display FAQs for i, faq in enumerate(faqs_to_show): with st.expander(f"FAQ {i+1}: {faq.get('question', 'No question')}", expanded=False): st.subheader("Question:") st.write(faq.get('question', 'No question')) st.subheader("Answer:") st.write(faq.get('answer', 'No answer')) # Export options st.header("📥 Export Options") col1, col2, col3 = st.columns(3) with col1: # Export as JSON export_data = { "metadata": { "generated_at": time.strftime("%Y-%m-%d %H:%M:%S"), "total_reviews_analyzed": len(reviews_data), "generation_time_seconds": getattr(st.session_state, 'generation_time', 0), "brand_neutral": True }, "keywords": keywords, "faqs": faqs } st.download_button( label="📄 Download JSON", data=json.dumps(export_data, indent=2), file_name="faqs.json", mime="application/json" ) with col2: # Export as CSV faq_df = pd.DataFrame(faqs) csv_data = faq_df.to_csv(index=False) st.download_button( label="📊 Download CSV", data=csv_data, file_name="faqs.csv", mime="text/csv" ) with col3: # Export as HTML html_content = f"""
Generated: {time.strftime("%Y-%m-%d %H:%M:%S")}
Reviews Analyzed: {len(reviews_data)}
Keywords: {', '.join(keywords[:10])}...