File size: 32,154 Bytes
31e11f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
import re
import os
import streamlit as st
import pandas as pd
import json
from typing import List, Dict
from groq import Groq
import time
from dotenv import load_dotenv
import math
from collections import Counter

# Load environment variables from .env file
load_dotenv()

reviews_data = {}

# Configure the Streamlit page
st.set_page_config(
    page_title="AI FAQ Generator",
    page_icon="πŸ€–",
    layout="wide",
    initial_sidebar_state="expanded"
)

class OptimizedFAQGenerator:
    def __init__(self, api_key: str):
        """Initialize the FAQ Generator with Groq API key."""
        self.client = Groq(api_key=api_key)
        self.model = "llama3-8b-8192"  # Fast and efficient model
        self.batch_size = 100  # Process reviews in batches of 100
        self.max_text_length = 3000  # Maximum text length per API call
    
    def chunk_reviews_by_size(self, reviews_data: List[Dict], max_chars: int = 3000) -> List[List[Dict]]:
        """Chunk reviews by character count to stay within API limits."""
        chunks = []
        current_chunk = []
        current_length = 0
        
        for review in reviews_data:
            review_text = review.get('review_text', '')
            review_length = len(review_text) + 50  # Add buffer for formatting
            
            # If adding this review would exceed the limit, start a new chunk
            if current_length + review_length > max_chars and current_chunk:
                chunks.append(current_chunk)
                current_chunk = [review]
                current_length = review_length
            else:
                current_chunk.append(review)
                current_length += review_length
        
        # Add the last chunk if it has content
        if current_chunk:
            chunks.append(current_chunk)
        
        return chunks
    
    def extract_keywords_from_batch(self, review_batch: List[Dict]) -> List[str]:
        """Extract keywords from a batch of reviews."""
        # Combine review texts from the batch
        batch_text = " ".join([review.get('review_text', '') for review in review_batch if review.get('review_text')])
        
        # Truncate if too long
        if len(batch_text) > self.max_text_length:
            batch_text = batch_text[:self.max_text_length]
        
        prompt = f"""
        Analyze these customer reviews and extract SEO keywords and phrases for a business website.
        
        IMPORTANT RULES:
        1. DO NOT include any specific brand names or business names
        2. Focus on generic industry terms and services
        3. Extract keywords that any similar business could use
        4. Focus on customer pain points and solutions
        
        Extract keywords for:
        - Products and services mentioned (generic terms only)
        - Common customer concerns and questions
        - Industry terminology
        - Customer experience themes
        - Service quality aspects
        
        Reviews:
        {batch_text}
        
        Return exactly 15 relevant SEO keywords/phrases, one per line, without numbering or bullets.
        Use generic terms that any business in this industry could use.
        """
        
        try:
            response = self.client.chat.completions.create(
                messages=[{"role": "user", "content": prompt}],
                model=self.model,
                temperature=0.3,
                max_tokens=400
            )
            
            keywords = [kw.strip() for kw in response.choices[0].message.content.strip().split('\n') if kw.strip()]
            return keywords[:15]  # Limit to 15 keywords per batch
            
        except Exception as e:
            st.error(f"Error extracting keywords from batch: {str(e)}")
            return []
    
    def extract_seo_keywords(self, reviews_data: List[Dict]) -> List[str]:
        """Extract SEO keywords from all reviews using batch processing."""
        st.info(f"Processing {len(reviews_data)} reviews in batches...")
        
        # Create progress bar
        progress_bar = st.progress(0)
        status_text = st.empty()
        
        # Chunk reviews by character count
        review_chunks = self.chunk_reviews_by_size(reviews_data, self.max_text_length)
        
        all_keywords = []
        
        for i, chunk in enumerate(review_chunks):
            status_text.text(f"Processing batch {i+1}/{len(review_chunks)} ({len(chunk)} reviews)...")
            
            # Extract keywords from this batch
            batch_keywords = self.extract_keywords_from_batch(chunk)
            all_keywords.extend(batch_keywords)
            
            # Update progress
            progress_bar.progress((i + 1) / len(review_chunks))
            
            # Small delay to avoid rate limiting
            time.sleep(0.5)
        
        # Count keyword frequency and get top keywords
        keyword_counts = Counter(all_keywords)
        top_keywords = [kw for kw, count in keyword_counts.most_common(25)]
        
        progress_bar.empty()
        status_text.empty()
        
        st.success(f"Extracted {len(top_keywords)} unique keywords from {len(review_chunks)} batches")
        
        return top_keywords
    
    def get_review_insights(self, reviews_data: List[Dict]) -> Dict:
        """Extract insights from reviews for better FAQ generation."""
        # Sample reviews for analysis
        sample_size = min(50, len(reviews_data))
        sample_reviews = reviews_data[:sample_size]
        
        insights = {
            'total_reviews': len(reviews_data),
            'avg_rating': sum(int(r.get('rating', 0)) for r in reviews_data) / len(reviews_data),
            'positive_reviews': sum(1 for r in reviews_data if int(r.get('rating', 0)) >= 4),
            'common_themes': [],
            'pain_points': [],
            'positive_aspects': []
        }
        
        # Analyze positive vs negative reviews
        positive_reviews = [r for r in sample_reviews if int(r.get('rating', 0)) >= 4]
        negative_reviews = [r for r in sample_reviews if int(r.get('rating', 0)) <= 2]
        
        insights['sample_positive'] = positive_reviews[:5]
        insights['sample_negative'] = negative_reviews[:3]
        
        return insights
    
    def clean_json_response(self, response_text: str) -> str:
        """Clean and extract JSON from AI response."""
        # Remove markdown code blocks
        response_text = re.sub(r'```json\s*', '', response_text)
        response_text = re.sub(r'```\s*', '', response_text)
        
        # Find the JSON array
        json_start = response_text.find('[')
        json_end = response_text.rfind(']') + 1
        
        if json_start != -1 and json_end > json_start:
            json_content = response_text[json_start:json_end]
            
            # Clean common JSON issues
            json_content = re.sub(r'\n\s*', ' ', json_content)  # Remove newlines and extra spaces
            json_content = re.sub(r'"\s*,\s*"', '", "', json_content)  # Fix spacing around commas
            json_content = re.sub(r'}\s*,\s*{', '}, {', json_content)  # Fix object separators
            
            return json_content
        
        return None
    
    def generate_faqs(self, keywords: List[str], reviews_data: List[Dict], num_faqs: int = 20) -> List[Dict]:
        """Generate FAQs based on SEO keywords and review insights."""
        
        # Get review insights
        insights = self.get_review_insights(reviews_data)
        
        # Create sample review context (limit to prevent token overflow)
        sample_reviews = []
        for review in insights['sample_positive']:
            sample_reviews.append(f"Rating: {review.get('rating', 'N/A')}/5 - {review.get('review_text', '')[:150]}...")
        
        for review in insights['sample_negative']:
            sample_reviews.append(f"Rating: {review.get('rating', 'N/A')}/5 - {review.get('review_text', '')[:150]}...")
        
        sample_context = "\n".join(sample_reviews[:8])  # Limit to 8 samples
        
        # Limit FAQs to maximum of 30
        num_faqs = min(num_faqs, 15)
        
        prompt = f"""
        Based on the following SEO keywords and customer review insights, generate exactly {num_faqs} comprehensive FAQ pairs for a business website.

        CRITICAL REQUIREMENTS:
        1. DO NOT use any specific brand names or business names in questions or answers
        2. Use generic terms like "our store", "our business", "our team", "our services"
        3. Focus on universal customer concerns and solutions

        SEO Keywords: {', '.join(keywords[:20])}

        Business Insights:
        - Total Reviews Analyzed: {insights['total_reviews']}
        - Average Rating: {insights['avg_rating']:.1f}/5
        - Positive Reviews: {insights['positive_reviews']}/{insights['total_reviews']}

        Sample Customer Feedback:
        {sample_context}

        IMPORTANT: Respond with ONLY a valid JSON array. No additional text or markdown.

        Format:
        [
          {{
            "question": "Why should I choose your business for my needs?",
            "answer": "Our experienced team provides personalized service with attention to detail. We focus on understanding your specific requirements and delivering solutions that exceed expectations, backed by our commitment to quality and customer satisfaction."
          }}
        ]
        """
        
        try:
            st.info("Generating FAQs with AI...")
            
            response = self.client.chat.completions.create(
                messages=[
                    {"role": "system", "content": "You are a helpful assistant that generates brand-neutral JSON responses for FAQ content. Always respond with valid JSON only, without any brand names."},
                    {"role": "user", "content": prompt}
                ],
                model=self.model,
                temperature=0.2,  # Lower temperature for more consistent output
                max_tokens=3000   # Increased for more FAQs
            )
            
            # Get the response content
            content = response.choices[0].message.content.strip()
            
            # Clean and extract JSON
            json_content = self.clean_json_response(content)
            
            if json_content:
                try:
                    faqs = json.loads(json_content)
                    # Validate that it's a list of dictionaries with required keys
                    if isinstance(faqs, list) and all(isinstance(faq, dict) and 'question' in faq and 'answer' in faq for faq in faqs):
                        # Limit to requested number
                        return faqs[:num_faqs]
                    else:
                        st.warning("Invalid FAQ format received, using fallback")
                        return self._get_fallback_faqs(num_faqs)
                except json.JSONDecodeError as e:
                    st.error(f"JSON parsing error: {str(e)}")
                    return self._get_fallback_faqs(num_faqs)
            else:
                st.warning("Could not extract JSON from response, using fallback")
                return self._get_fallback_faqs(num_faqs)
                
        except Exception as e:
            st.error(f"Error generating FAQs: {str(e)}")
            return self._get_fallback_faqs(num_faqs)
    
    def _get_fallback_faqs(self, num_faqs: int = 20) -> List[Dict]:
        """Fallback FAQs if API fails - brand neutral and organized by categories."""
        base_faqs = [
            # Why choose us questions
            {
                "question": "Why should I choose your business over competitors?",
                "answer": "Our experienced team provides personalized service with attention to detail and a commitment to customer satisfaction. We take time to understand your specific needs and work with you throughout the entire process to ensure you're completely happy with the results."
            },
            {
                "question": "What makes your customer service different?",
                "answer": "We pride ourselves on patient, welcoming service where customers never feel rushed. Our team focuses on creating a comfortable experience while providing expert guidance to help you make the best decisions for your needs."
            },
            {
                "question": "How experienced is your team?",
                "answer": "Our team consists of experienced professionals who are passionate about helping customers achieve their goals. We stay updated with the latest trends and techniques to provide you with the best possible service and advice."
            },
            
            # Problem-solving questions
            {
                "question": "How do you help customers who feel overwhelmed by choices?",
                "answer": "Our knowledgeable staff guides you through the selection process based on your preferences, budget, and specific needs. We take time to understand your vision and narrow down options so you can make decisions with confidence."
            },
            {
                "question": "What if I'm not satisfied with the results?",
                "answer": "Customer satisfaction is our top priority. We work closely with you throughout the process and make adjustments as needed to ensure you're completely happy with the final outcome. Our team is committed to making things right."
            },
            {
                "question": "How do you handle sizing and fit issues?",
                "answer": "Our professional team provides expert fitting services and makes necessary adjustments to ensure perfect results. We take precise measurements and work with you through multiple fittings if needed to achieve the ideal fit."
            },
            
            # Service questions
            {
                "question": "What services do you offer besides your main products?",
                "answer": "In addition to our primary offerings, we provide professional consultation, customization services, and ongoing support. We also offer accessories and complementary products to complete your experience with us."
            },
            {
                "question": "Do you provide consultation services?",
                "answer": "Yes, we offer personalized consultations where our experts help you explore options, provide styling advice, and ensure you make choices that align with your vision and budget. These consultations are designed to make your experience as smooth as possible."
            },
            {
                "question": "What additional products and accessories do you carry?",
                "answer": "We offer a comprehensive selection of complementary products and accessories to complete your needs. Our team can help coordinate everything to ensure a cohesive and polished final result."
            },
            
            # Process questions
            {
                "question": "Do I need an appointment or can I walk in?",
                "answer": "While we welcome walk-ins when possible, we highly recommend scheduling an appointment to ensure you receive dedicated attention and personalized service. Appointments allow us to prepare for your visit and provide the best possible experience."
            },
            {
                "question": "How long does the typical process take?",
                "answer": "The timeline varies depending on your specific needs, but we work with you to establish realistic expectations from the start. Our team keeps you informed throughout the process and ensures everything is completed according to your schedule."
            },
            {
                "question": "What should I expect during my first visit?",
                "answer": "During your initial visit, we'll discuss your needs, preferences, and budget. Our team will guide you through available options, provide expert recommendations, and create a plan tailored to your specific requirements."
            },
            
            # Quality questions
            {
                "question": "How do you ensure quality in your products and services?",
                "answer": "We maintain high standards through careful selection of products, skilled craftsmanship, and thorough quality checks. Our experienced team pays attention to every detail to ensure you receive exceptional results that meet our quality standards."
            },
            {
                "question": "What is your experience with customers who have specific requirements?",
                "answer": "Our team has extensive experience working with diverse customer needs and preferences. We pride ourselves on our ability to accommodate special requirements and provide customized solutions that exceed expectations."
            },
            {
                "question": "How do you stay current with industry trends?",
                "answer": "Our team continuously educates themselves on the latest trends, techniques, and products in the industry. We attend training sessions and stay connected with industry developments to provide you with current options and expert advice."
            },
            
            # Additional comprehensive questions
            {
                "question": "What price ranges do you offer?",
                "answer": "We offer options across various price points to accommodate different budgets. Our team can help you find quality solutions within your budget and provide transparent pricing information upfront so you can make informed decisions."
            },
            {
                "question": "Do you offer payment plans or financing options?",
                "answer": "Yes, we understand that significant purchases require financial planning. We offer flexible payment options and financing plans to make our services more accessible and help you achieve your goals within your budget."
            },
            {
                "question": "How far in advance should I start planning?",
                "answer": "We recommend starting the process several months in advance to allow adequate time for consultation, selection, customization, and any necessary adjustments. Early planning ensures the best selection and reduces stress as your important date approaches."
            },
            {
                "question": "Do you work with customers who have time constraints?",
                "answer": "Absolutely! We understand that sometimes timelines are tight, and we're experienced in working efficiently to meet urgent deadlines. Our team will discuss your timeline and work diligently to accommodate your schedule while maintaining quality standards."
            },
            {
                "question": "What sets your customer experience apart?",
                "answer": "We focus on creating a welcoming, pressure-free environment where customers feel comfortable and supported. Our personalized approach, attention to detail, and commitment to customer satisfaction ensure that your experience with us is positive and memorable."
            },
            {
                "question": "How do you handle special requests or customizations?",
                "answer": "We welcome special requests and customizations as part of our personalized service approach. Our skilled team works with you to understand your vision and explore options for creating something unique that perfectly meets your specific needs and preferences."
            }
        ]
        
        # Return the requested number of FAQs, up to the available amount
        return base_faqs[:min(num_faqs, len(base_faqs))]

def load_sample_data():
    """Load sample data if no file is uploaded."""
    sample_data = [
        {
            "reviewer_name": "Customer A",
            "rating": 5,
            "date": "3 months ago",
            "review_text": "This past August, I went to the bridal store to look for my dream wedding dress and I found It! I was looking for an elegant, simple, and classic dress. The consultant was extremely helpful, patient, and sweet. The alterations team did a great job making sure I was happy with the alterations done to my dress.",
            "owner_response": "Thank you so much for taking the time to leave this excellent review!",
        },
        {
            "reviewer_name": "Customer B",
            "rating": 5,
            "date": "2 months ago",
            "review_text": "A very special shout out to the consultant who made my daughters dress shopping so special. Never rushed her and only everything to accommodate her until she found the right dress to say yes to.",
            "owner_response": "",
        },
        {
            "reviewer_name": "Customer C",
            "rating": 5,
            "date": "1 month ago",
            "review_text": "My wedding dress shopping experience was beyond amazing. The consultant was so wonderful to work with. Everyone was so sweet & welcoming when we walked in. She made me feel so comfortable as we tried on many different dresses.",
            "owner_response": "Thank you for the wonderful review!",
        }
    ]
    return sample_data

def main():
    st.title("πŸ€– AI FAQ Generator")
    st.markdown("Generate SEO-optimized, FAQs from customer reviews")
    
    # Sidebar for configuration
    with st.sidebar:
        st.header("βš™οΈ Configuration")
        
        # API Key input
        api_key = os.getenv("GROQ_API_KEY")
        
        if not api_key:
            st.warning("Please enter your Groq API key to use AI features")
            st.markdown("Get your API key from [Groq Console](https://console.groq.com)")
        
        st.divider()
        
        # FAQ Configuration
        st.subheader("FAQ Settings")
        num_faqs = st.slider("Number of FAQs to generate", 5, 15, 10, 1, help="Select how many FAQs to generate based on the reviews")
        if num_faqs < 5:
            st.warning("Generating fewer than 5 FAQs may not provide enough coverage of customer concerns")
        elif num_faqs > 15:
            st.warning("Generating more than 15 FAQs may lead to less focused content")
        st.info(f"Will generate {num_faqs} FAQs")
        
        st.divider()
        
        # File upload
        uploaded_file = st.file_uploader(
            "Upload Reviews CSV", 
            type=['csv'],
            help="Upload a CSV file with customer reviews (supports large files)"
        )
        
        # Use sample data option
        use_sample = st.checkbox("Use sample data", value=False)
    
    # Load data
    if uploaded_file is not None:
        try:
            with st.spinner("Loading CSV file..."):
                df = pd.read_csv(uploaded_file)
                
                # Data cleaning
                df['rating'] = df['rating'].astype(str)
                df = df.drop(columns=['review_id', 'scraped_at'], axis=1, errors='ignore')
                
                # Remove empty reviews
                df = df.dropna(subset=['review_text'])
                df = df[df['review_text'].str.strip() != '']
                
                reviews_data = df.to_dict('records')
                
            st.success(f"βœ… Loaded {len(reviews_data)} reviews from uploaded file")
            
            # Show file statistics
            col1, col2, col3 = st.columns(3)
            with col1:
                st.metric("Total Reviews", len(reviews_data))
            with col2:
                avg_rating = sum(int(r.get('rating', 0)) for r in reviews_data if r.get('rating', '0').isdigit()) / len([r for r in reviews_data if r.get('rating', '0').isdigit()])
                st.metric("Average Rating", f"{avg_rating:.1f}")
            with col3:
                positive_reviews = sum(1 for r in reviews_data if r.get('rating', '0').isdigit() and int(r.get('rating', 0)) >= 4)
                st.metric("Positive Reviews", f"{positive_reviews}/{len(reviews_data)}")
                
        except Exception as e:
            st.error(f"Error loading file: {str(e)}")
            st.info("Please ensure your CSV has columns: 'review_text', 'rating'")
            reviews_data = load_sample_data()
    elif use_sample:
        reviews_data = load_sample_data()
        st.info("Using sample data for demonstration")
    else:
        st.warning("Please upload a CSV file or use sample data")
        return
    
    # Display data overview
    if reviews_data:
        with st.expander("πŸ“Š Data Overview", expanded=False):
            # Sample reviews preview
            st.subheader("Sample Reviews")
            for i, review in enumerate(reviews_data[:3]):
                with st.container():
                    st.write(f"**{review.get('reviewer_name', 'Anonymous')}** - {review.get('rating', 'N/A')} ⭐")
                    st.write(review.get('review_text', 'No review text')[:300] + "...")
                    if i < 2:  # Don't show divider after last item
                        st.divider()
    
    # Generate FAQs
    if api_key and reviews_data:
        st.header("πŸš€ Generate AI-Powered FAQs")
        
        col1, col2 = st.columns(2)
        with col1:
            if st.button("πŸ€– Generate Keywords & FAQs with AI", type="primary"):
                start_time = time.time()
                
                with st.spinner("Processing large dataset..."):
                    # Initialize FAQ generator
                    faq_gen = OptimizedFAQGenerator(api_key)
                    
                    # Extract keywords with batch processing
                    st.info("πŸ” Extracting SEO keywords from all reviews...")
                    keywords = faq_gen.extract_seo_keywords(reviews_data)
                    
                    # Store in session state
                    st.session_state.keywords = keywords
                    
                    # Generate FAQs
                    st.info("πŸ“ Generating brand-neutral FAQs...")
                    faqs = faq_gen.generate_faqs(keywords, reviews_data, num_faqs)
                    
                    # Store in session state
                    st.session_state.faqs = faqs
                    
                    generation_time = time.time() - start_time
                    st.session_state.generation_time = generation_time
                    
                    st.success(f"βœ… Generated {len(faqs)} FAQs in {generation_time:.1f} seconds!")
        
        # with col2:
        #     if st.button("⚑ Use Quick Fallback"):
        #         faq_gen = OptimizedFAQGenerator("")  # Empty API key for fallback
        #         st.session_state.keywords = ["customer service", "quality products", "professional consultation", "experienced team", "customer satisfaction"]
        #         st.session_state.faqs = faq_gen._get_fallback_faqs(num_faqs)
        #         st.info("Using pre-built content")
    
    # Display results
    if hasattr(st.session_state, 'keywords') and hasattr(st.session_state, 'faqs'):
        
        # Performance metrics
        if hasattr(st.session_state, 'generation_time'):
            st.info(f"⏱️ Generation completed in {st.session_state.generation_time:.1f} seconds")
        
        st.header("πŸ“ˆ Extracted SEO Keywords")
        
        # Display keywords in a nice format
        keywords = st.session_state.keywords
        
        # Show keywords in columns
        cols = st.columns(3)
        for i, keyword in enumerate(keywords):
            with cols[i % 3]:
                st.markdown(f"`{keyword}`")
        
        st.header("❓ Generated Brand-Neutral FAQs")
        st.info(f"Generated {len(st.session_state.faqs)} FAQs that can be used by any business in this industry")
        
        # Display FAQs with search functionality
        search_term = st.text_input("πŸ” Search FAQs", placeholder="Enter keywords to filter FAQs...")
        
        faqs = st.session_state.faqs
        
        # Filter FAQs if search term is provided
        if search_term:
            filtered_faqs = [
                faq for faq in faqs 
                if search_term.lower() in faq.get('question', '').lower() 
                or search_term.lower() in faq.get('answer', '').lower()
            ]
            st.info(f"Showing {len(filtered_faqs)} FAQs matching '{search_term}'")
            faqs_to_show = filtered_faqs
        else:
            faqs_to_show = faqs
        
        # Display FAQs
        for i, faq in enumerate(faqs_to_show):
            with st.expander(f"FAQ {i+1}: {faq.get('question', 'No question')}", expanded=False):
                st.subheader("Question:")
                st.write(faq.get('question', 'No question'))
                st.subheader("Answer:")
                st.write(faq.get('answer', 'No answer'))
        
        # Export options
        st.header("πŸ“₯ Export Options")
        
        col1, col2, col3 = st.columns(3)
        
        with col1:
            # Export as JSON
            export_data = {
                "metadata": {
                    "generated_at": time.strftime("%Y-%m-%d %H:%M:%S"),
                    "total_reviews_analyzed": len(reviews_data),
                    "generation_time_seconds": getattr(st.session_state, 'generation_time', 0),
                    "brand_neutral": True
                },
                "keywords": keywords,
                "faqs": faqs
            }
            
            st.download_button(
                label="πŸ“„ Download JSON",
                data=json.dumps(export_data, indent=2),
                file_name="faqs.json",
                mime="application/json"
            )
        
        with col2:
            # Export as CSV
            faq_df = pd.DataFrame(faqs)
            csv_data = faq_df.to_csv(index=False)
            
            st.download_button(
                label="πŸ“Š Download CSV",
                data=csv_data,
                file_name="faqs.csv",
                mime="text/csv"
            )
        
        with col3:
            # Export as HTML
            html_content = f"""
            <!DOCTYPE html>
            <html>
            <head>
                <title>Brand-Neutral FAQs</title>
                <style>
                    body {{ font-family: Arial, sans-serif; margin: 40px; line-height: 1.6; }}
                    .faq {{ margin-bottom: 30px; border-left: 4px solid #007bff; padding-left: 20px; }}
                    .question {{ font-weight: bold; font-size: 18px; color: #333; margin-bottom: 10px; }}
                    .answer {{ color: #666; }}
                    .header {{ background: #f8f9fa; padding: 20px; border-radius: 5px; margin-bottom: 30px; }}
                </style>
            </head>
            <body>
                <div class="header">
                    <h1>Brand-Neutral FAQs</h1>
                    <p><strong>Generated:</strong> {time.strftime("%Y-%m-%d %H:%M:%S")}</p>
                    <p><strong>Reviews Analyzed:</strong> {len(reviews_data)}</p>
                    <p><strong>Keywords:</strong> {', '.join(keywords[:10])}...</p>
                </div>
            """
            
            for i, faq in enumerate(faqs, 1):
                html_content += f"""
                <div class="faq">
                    <div class="question">{i}. {faq.get('question', '')}</div>
                    <div class="answer">{faq.get('answer', '')}</div>
                </div>
                """
            
            html_content += "</body></html>"
            
            st.download_button(
                label="🌐 Download HTML",
                data=html_content,
                file_name="faqs.html",
                mime="text/html"
            )
    
    # Footer
    st.markdown("---")
    st.markdown("**Features:** Batch processing for large datasets β€’ Brand-neutral content β€’ SEO optimization β€’ Multiple export formats")

if __name__ == "__main__":
    main()