prathampatel1's picture
Upload 3 files
31e11f7 verified
import re
import os
import streamlit as st
import pandas as pd
import json
from typing import List, Dict
from groq import Groq
import time
from dotenv import load_dotenv
import math
from collections import Counter
# Load environment variables from .env file
load_dotenv()
reviews_data = {}
# Configure the Streamlit page
st.set_page_config(
page_title="AI FAQ Generator",
page_icon="πŸ€–",
layout="wide",
initial_sidebar_state="expanded"
)
class OptimizedFAQGenerator:
def __init__(self, api_key: str):
"""Initialize the FAQ Generator with Groq API key."""
self.client = Groq(api_key=api_key)
self.model = "llama3-8b-8192" # Fast and efficient model
self.batch_size = 100 # Process reviews in batches of 100
self.max_text_length = 3000 # Maximum text length per API call
def chunk_reviews_by_size(self, reviews_data: List[Dict], max_chars: int = 3000) -> List[List[Dict]]:
"""Chunk reviews by character count to stay within API limits."""
chunks = []
current_chunk = []
current_length = 0
for review in reviews_data:
review_text = review.get('review_text', '')
review_length = len(review_text) + 50 # Add buffer for formatting
# If adding this review would exceed the limit, start a new chunk
if current_length + review_length > max_chars and current_chunk:
chunks.append(current_chunk)
current_chunk = [review]
current_length = review_length
else:
current_chunk.append(review)
current_length += review_length
# Add the last chunk if it has content
if current_chunk:
chunks.append(current_chunk)
return chunks
def extract_keywords_from_batch(self, review_batch: List[Dict]) -> List[str]:
"""Extract keywords from a batch of reviews."""
# Combine review texts from the batch
batch_text = " ".join([review.get('review_text', '') for review in review_batch if review.get('review_text')])
# Truncate if too long
if len(batch_text) > self.max_text_length:
batch_text = batch_text[:self.max_text_length]
prompt = f"""
Analyze these customer reviews and extract SEO keywords and phrases for a business website.
IMPORTANT RULES:
1. DO NOT include any specific brand names or business names
2. Focus on generic industry terms and services
3. Extract keywords that any similar business could use
4. Focus on customer pain points and solutions
Extract keywords for:
- Products and services mentioned (generic terms only)
- Common customer concerns and questions
- Industry terminology
- Customer experience themes
- Service quality aspects
Reviews:
{batch_text}
Return exactly 15 relevant SEO keywords/phrases, one per line, without numbering or bullets.
Use generic terms that any business in this industry could use.
"""
try:
response = self.client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model=self.model,
temperature=0.3,
max_tokens=400
)
keywords = [kw.strip() for kw in response.choices[0].message.content.strip().split('\n') if kw.strip()]
return keywords[:15] # Limit to 15 keywords per batch
except Exception as e:
st.error(f"Error extracting keywords from batch: {str(e)}")
return []
def extract_seo_keywords(self, reviews_data: List[Dict]) -> List[str]:
"""Extract SEO keywords from all reviews using batch processing."""
st.info(f"Processing {len(reviews_data)} reviews in batches...")
# Create progress bar
progress_bar = st.progress(0)
status_text = st.empty()
# Chunk reviews by character count
review_chunks = self.chunk_reviews_by_size(reviews_data, self.max_text_length)
all_keywords = []
for i, chunk in enumerate(review_chunks):
status_text.text(f"Processing batch {i+1}/{len(review_chunks)} ({len(chunk)} reviews)...")
# Extract keywords from this batch
batch_keywords = self.extract_keywords_from_batch(chunk)
all_keywords.extend(batch_keywords)
# Update progress
progress_bar.progress((i + 1) / len(review_chunks))
# Small delay to avoid rate limiting
time.sleep(0.5)
# Count keyword frequency and get top keywords
keyword_counts = Counter(all_keywords)
top_keywords = [kw for kw, count in keyword_counts.most_common(25)]
progress_bar.empty()
status_text.empty()
st.success(f"Extracted {len(top_keywords)} unique keywords from {len(review_chunks)} batches")
return top_keywords
def get_review_insights(self, reviews_data: List[Dict]) -> Dict:
"""Extract insights from reviews for better FAQ generation."""
# Sample reviews for analysis
sample_size = min(50, len(reviews_data))
sample_reviews = reviews_data[:sample_size]
insights = {
'total_reviews': len(reviews_data),
'avg_rating': sum(int(r.get('rating', 0)) for r in reviews_data) / len(reviews_data),
'positive_reviews': sum(1 for r in reviews_data if int(r.get('rating', 0)) >= 4),
'common_themes': [],
'pain_points': [],
'positive_aspects': []
}
# Analyze positive vs negative reviews
positive_reviews = [r for r in sample_reviews if int(r.get('rating', 0)) >= 4]
negative_reviews = [r for r in sample_reviews if int(r.get('rating', 0)) <= 2]
insights['sample_positive'] = positive_reviews[:5]
insights['sample_negative'] = negative_reviews[:3]
return insights
def clean_json_response(self, response_text: str) -> str:
"""Clean and extract JSON from AI response."""
# Remove markdown code blocks
response_text = re.sub(r'```json\s*', '', response_text)
response_text = re.sub(r'```\s*', '', response_text)
# Find the JSON array
json_start = response_text.find('[')
json_end = response_text.rfind(']') + 1
if json_start != -1 and json_end > json_start:
json_content = response_text[json_start:json_end]
# Clean common JSON issues
json_content = re.sub(r'\n\s*', ' ', json_content) # Remove newlines and extra spaces
json_content = re.sub(r'"\s*,\s*"', '", "', json_content) # Fix spacing around commas
json_content = re.sub(r'}\s*,\s*{', '}, {', json_content) # Fix object separators
return json_content
return None
def generate_faqs(self, keywords: List[str], reviews_data: List[Dict], num_faqs: int = 20) -> List[Dict]:
"""Generate FAQs based on SEO keywords and review insights."""
# Get review insights
insights = self.get_review_insights(reviews_data)
# Create sample review context (limit to prevent token overflow)
sample_reviews = []
for review in insights['sample_positive']:
sample_reviews.append(f"Rating: {review.get('rating', 'N/A')}/5 - {review.get('review_text', '')[:150]}...")
for review in insights['sample_negative']:
sample_reviews.append(f"Rating: {review.get('rating', 'N/A')}/5 - {review.get('review_text', '')[:150]}...")
sample_context = "\n".join(sample_reviews[:8]) # Limit to 8 samples
# Limit FAQs to maximum of 30
num_faqs = min(num_faqs, 15)
prompt = f"""
Based on the following SEO keywords and customer review insights, generate exactly {num_faqs} comprehensive FAQ pairs for a business website.
CRITICAL REQUIREMENTS:
1. DO NOT use any specific brand names or business names in questions or answers
2. Use generic terms like "our store", "our business", "our team", "our services"
3. Focus on universal customer concerns and solutions
SEO Keywords: {', '.join(keywords[:20])}
Business Insights:
- Total Reviews Analyzed: {insights['total_reviews']}
- Average Rating: {insights['avg_rating']:.1f}/5
- Positive Reviews: {insights['positive_reviews']}/{insights['total_reviews']}
Sample Customer Feedback:
{sample_context}
IMPORTANT: Respond with ONLY a valid JSON array. No additional text or markdown.
Format:
[
{{
"question": "Why should I choose your business for my needs?",
"answer": "Our experienced team provides personalized service with attention to detail. We focus on understanding your specific requirements and delivering solutions that exceed expectations, backed by our commitment to quality and customer satisfaction."
}}
]
"""
try:
st.info("Generating FAQs with AI...")
response = self.client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant that generates brand-neutral JSON responses for FAQ content. Always respond with valid JSON only, without any brand names."},
{"role": "user", "content": prompt}
],
model=self.model,
temperature=0.2, # Lower temperature for more consistent output
max_tokens=3000 # Increased for more FAQs
)
# Get the response content
content = response.choices[0].message.content.strip()
# Clean and extract JSON
json_content = self.clean_json_response(content)
if json_content:
try:
faqs = json.loads(json_content)
# Validate that it's a list of dictionaries with required keys
if isinstance(faqs, list) and all(isinstance(faq, dict) and 'question' in faq and 'answer' in faq for faq in faqs):
# Limit to requested number
return faqs[:num_faqs]
else:
st.warning("Invalid FAQ format received, using fallback")
return self._get_fallback_faqs(num_faqs)
except json.JSONDecodeError as e:
st.error(f"JSON parsing error: {str(e)}")
return self._get_fallback_faqs(num_faqs)
else:
st.warning("Could not extract JSON from response, using fallback")
return self._get_fallback_faqs(num_faqs)
except Exception as e:
st.error(f"Error generating FAQs: {str(e)}")
return self._get_fallback_faqs(num_faqs)
def _get_fallback_faqs(self, num_faqs: int = 20) -> List[Dict]:
"""Fallback FAQs if API fails - brand neutral and organized by categories."""
base_faqs = [
# Why choose us questions
{
"question": "Why should I choose your business over competitors?",
"answer": "Our experienced team provides personalized service with attention to detail and a commitment to customer satisfaction. We take time to understand your specific needs and work with you throughout the entire process to ensure you're completely happy with the results."
},
{
"question": "What makes your customer service different?",
"answer": "We pride ourselves on patient, welcoming service where customers never feel rushed. Our team focuses on creating a comfortable experience while providing expert guidance to help you make the best decisions for your needs."
},
{
"question": "How experienced is your team?",
"answer": "Our team consists of experienced professionals who are passionate about helping customers achieve their goals. We stay updated with the latest trends and techniques to provide you with the best possible service and advice."
},
# Problem-solving questions
{
"question": "How do you help customers who feel overwhelmed by choices?",
"answer": "Our knowledgeable staff guides you through the selection process based on your preferences, budget, and specific needs. We take time to understand your vision and narrow down options so you can make decisions with confidence."
},
{
"question": "What if I'm not satisfied with the results?",
"answer": "Customer satisfaction is our top priority. We work closely with you throughout the process and make adjustments as needed to ensure you're completely happy with the final outcome. Our team is committed to making things right."
},
{
"question": "How do you handle sizing and fit issues?",
"answer": "Our professional team provides expert fitting services and makes necessary adjustments to ensure perfect results. We take precise measurements and work with you through multiple fittings if needed to achieve the ideal fit."
},
# Service questions
{
"question": "What services do you offer besides your main products?",
"answer": "In addition to our primary offerings, we provide professional consultation, customization services, and ongoing support. We also offer accessories and complementary products to complete your experience with us."
},
{
"question": "Do you provide consultation services?",
"answer": "Yes, we offer personalized consultations where our experts help you explore options, provide styling advice, and ensure you make choices that align with your vision and budget. These consultations are designed to make your experience as smooth as possible."
},
{
"question": "What additional products and accessories do you carry?",
"answer": "We offer a comprehensive selection of complementary products and accessories to complete your needs. Our team can help coordinate everything to ensure a cohesive and polished final result."
},
# Process questions
{
"question": "Do I need an appointment or can I walk in?",
"answer": "While we welcome walk-ins when possible, we highly recommend scheduling an appointment to ensure you receive dedicated attention and personalized service. Appointments allow us to prepare for your visit and provide the best possible experience."
},
{
"question": "How long does the typical process take?",
"answer": "The timeline varies depending on your specific needs, but we work with you to establish realistic expectations from the start. Our team keeps you informed throughout the process and ensures everything is completed according to your schedule."
},
{
"question": "What should I expect during my first visit?",
"answer": "During your initial visit, we'll discuss your needs, preferences, and budget. Our team will guide you through available options, provide expert recommendations, and create a plan tailored to your specific requirements."
},
# Quality questions
{
"question": "How do you ensure quality in your products and services?",
"answer": "We maintain high standards through careful selection of products, skilled craftsmanship, and thorough quality checks. Our experienced team pays attention to every detail to ensure you receive exceptional results that meet our quality standards."
},
{
"question": "What is your experience with customers who have specific requirements?",
"answer": "Our team has extensive experience working with diverse customer needs and preferences. We pride ourselves on our ability to accommodate special requirements and provide customized solutions that exceed expectations."
},
{
"question": "How do you stay current with industry trends?",
"answer": "Our team continuously educates themselves on the latest trends, techniques, and products in the industry. We attend training sessions and stay connected with industry developments to provide you with current options and expert advice."
},
# Additional comprehensive questions
{
"question": "What price ranges do you offer?",
"answer": "We offer options across various price points to accommodate different budgets. Our team can help you find quality solutions within your budget and provide transparent pricing information upfront so you can make informed decisions."
},
{
"question": "Do you offer payment plans or financing options?",
"answer": "Yes, we understand that significant purchases require financial planning. We offer flexible payment options and financing plans to make our services more accessible and help you achieve your goals within your budget."
},
{
"question": "How far in advance should I start planning?",
"answer": "We recommend starting the process several months in advance to allow adequate time for consultation, selection, customization, and any necessary adjustments. Early planning ensures the best selection and reduces stress as your important date approaches."
},
{
"question": "Do you work with customers who have time constraints?",
"answer": "Absolutely! We understand that sometimes timelines are tight, and we're experienced in working efficiently to meet urgent deadlines. Our team will discuss your timeline and work diligently to accommodate your schedule while maintaining quality standards."
},
{
"question": "What sets your customer experience apart?",
"answer": "We focus on creating a welcoming, pressure-free environment where customers feel comfortable and supported. Our personalized approach, attention to detail, and commitment to customer satisfaction ensure that your experience with us is positive and memorable."
},
{
"question": "How do you handle special requests or customizations?",
"answer": "We welcome special requests and customizations as part of our personalized service approach. Our skilled team works with you to understand your vision and explore options for creating something unique that perfectly meets your specific needs and preferences."
}
]
# Return the requested number of FAQs, up to the available amount
return base_faqs[:min(num_faqs, len(base_faqs))]
def load_sample_data():
"""Load sample data if no file is uploaded."""
sample_data = [
{
"reviewer_name": "Customer A",
"rating": 5,
"date": "3 months ago",
"review_text": "This past August, I went to the bridal store to look for my dream wedding dress and I found It! I was looking for an elegant, simple, and classic dress. The consultant was extremely helpful, patient, and sweet. The alterations team did a great job making sure I was happy with the alterations done to my dress.",
"owner_response": "Thank you so much for taking the time to leave this excellent review!",
},
{
"reviewer_name": "Customer B",
"rating": 5,
"date": "2 months ago",
"review_text": "A very special shout out to the consultant who made my daughters dress shopping so special. Never rushed her and only everything to accommodate her until she found the right dress to say yes to.",
"owner_response": "",
},
{
"reviewer_name": "Customer C",
"rating": 5,
"date": "1 month ago",
"review_text": "My wedding dress shopping experience was beyond amazing. The consultant was so wonderful to work with. Everyone was so sweet & welcoming when we walked in. She made me feel so comfortable as we tried on many different dresses.",
"owner_response": "Thank you for the wonderful review!",
}
]
return sample_data
def main():
st.title("πŸ€– AI FAQ Generator")
st.markdown("Generate SEO-optimized, FAQs from customer reviews")
# Sidebar for configuration
with st.sidebar:
st.header("βš™οΈ Configuration")
# API Key input
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
st.warning("Please enter your Groq API key to use AI features")
st.markdown("Get your API key from [Groq Console](https://console.groq.com)")
st.divider()
# FAQ Configuration
st.subheader("FAQ Settings")
num_faqs = st.slider("Number of FAQs to generate", 5, 15, 10, 1, help="Select how many FAQs to generate based on the reviews")
if num_faqs < 5:
st.warning("Generating fewer than 5 FAQs may not provide enough coverage of customer concerns")
elif num_faqs > 15:
st.warning("Generating more than 15 FAQs may lead to less focused content")
st.info(f"Will generate {num_faqs} FAQs")
st.divider()
# File upload
uploaded_file = st.file_uploader(
"Upload Reviews CSV",
type=['csv'],
help="Upload a CSV file with customer reviews (supports large files)"
)
# Use sample data option
use_sample = st.checkbox("Use sample data", value=False)
# Load data
if uploaded_file is not None:
try:
with st.spinner("Loading CSV file..."):
df = pd.read_csv(uploaded_file)
# Data cleaning
df['rating'] = df['rating'].astype(str)
df = df.drop(columns=['review_id', 'scraped_at'], axis=1, errors='ignore')
# Remove empty reviews
df = df.dropna(subset=['review_text'])
df = df[df['review_text'].str.strip() != '']
reviews_data = df.to_dict('records')
st.success(f"βœ… Loaded {len(reviews_data)} reviews from uploaded file")
# Show file statistics
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Reviews", len(reviews_data))
with col2:
avg_rating = sum(int(r.get('rating', 0)) for r in reviews_data if r.get('rating', '0').isdigit()) / len([r for r in reviews_data if r.get('rating', '0').isdigit()])
st.metric("Average Rating", f"{avg_rating:.1f}")
with col3:
positive_reviews = sum(1 for r in reviews_data if r.get('rating', '0').isdigit() and int(r.get('rating', 0)) >= 4)
st.metric("Positive Reviews", f"{positive_reviews}/{len(reviews_data)}")
except Exception as e:
st.error(f"Error loading file: {str(e)}")
st.info("Please ensure your CSV has columns: 'review_text', 'rating'")
reviews_data = load_sample_data()
elif use_sample:
reviews_data = load_sample_data()
st.info("Using sample data for demonstration")
else:
st.warning("Please upload a CSV file or use sample data")
return
# Display data overview
if reviews_data:
with st.expander("πŸ“Š Data Overview", expanded=False):
# Sample reviews preview
st.subheader("Sample Reviews")
for i, review in enumerate(reviews_data[:3]):
with st.container():
st.write(f"**{review.get('reviewer_name', 'Anonymous')}** - {review.get('rating', 'N/A')} ⭐")
st.write(review.get('review_text', 'No review text')[:300] + "...")
if i < 2: # Don't show divider after last item
st.divider()
# Generate FAQs
if api_key and reviews_data:
st.header("πŸš€ Generate AI-Powered FAQs")
col1, col2 = st.columns(2)
with col1:
if st.button("πŸ€– Generate Keywords & FAQs with AI", type="primary"):
start_time = time.time()
with st.spinner("Processing large dataset..."):
# Initialize FAQ generator
faq_gen = OptimizedFAQGenerator(api_key)
# Extract keywords with batch processing
st.info("πŸ” Extracting SEO keywords from all reviews...")
keywords = faq_gen.extract_seo_keywords(reviews_data)
# Store in session state
st.session_state.keywords = keywords
# Generate FAQs
st.info("πŸ“ Generating brand-neutral FAQs...")
faqs = faq_gen.generate_faqs(keywords, reviews_data, num_faqs)
# Store in session state
st.session_state.faqs = faqs
generation_time = time.time() - start_time
st.session_state.generation_time = generation_time
st.success(f"βœ… Generated {len(faqs)} FAQs in {generation_time:.1f} seconds!")
# with col2:
# if st.button("⚑ Use Quick Fallback"):
# faq_gen = OptimizedFAQGenerator("") # Empty API key for fallback
# st.session_state.keywords = ["customer service", "quality products", "professional consultation", "experienced team", "customer satisfaction"]
# st.session_state.faqs = faq_gen._get_fallback_faqs(num_faqs)
# st.info("Using pre-built content")
# Display results
if hasattr(st.session_state, 'keywords') and hasattr(st.session_state, 'faqs'):
# Performance metrics
if hasattr(st.session_state, 'generation_time'):
st.info(f"⏱️ Generation completed in {st.session_state.generation_time:.1f} seconds")
st.header("πŸ“ˆ Extracted SEO Keywords")
# Display keywords in a nice format
keywords = st.session_state.keywords
# Show keywords in columns
cols = st.columns(3)
for i, keyword in enumerate(keywords):
with cols[i % 3]:
st.markdown(f"`{keyword}`")
st.header("❓ Generated Brand-Neutral FAQs")
st.info(f"Generated {len(st.session_state.faqs)} FAQs that can be used by any business in this industry")
# Display FAQs with search functionality
search_term = st.text_input("πŸ” Search FAQs", placeholder="Enter keywords to filter FAQs...")
faqs = st.session_state.faqs
# Filter FAQs if search term is provided
if search_term:
filtered_faqs = [
faq for faq in faqs
if search_term.lower() in faq.get('question', '').lower()
or search_term.lower() in faq.get('answer', '').lower()
]
st.info(f"Showing {len(filtered_faqs)} FAQs matching '{search_term}'")
faqs_to_show = filtered_faqs
else:
faqs_to_show = faqs
# Display FAQs
for i, faq in enumerate(faqs_to_show):
with st.expander(f"FAQ {i+1}: {faq.get('question', 'No question')}", expanded=False):
st.subheader("Question:")
st.write(faq.get('question', 'No question'))
st.subheader("Answer:")
st.write(faq.get('answer', 'No answer'))
# Export options
st.header("πŸ“₯ Export Options")
col1, col2, col3 = st.columns(3)
with col1:
# Export as JSON
export_data = {
"metadata": {
"generated_at": time.strftime("%Y-%m-%d %H:%M:%S"),
"total_reviews_analyzed": len(reviews_data),
"generation_time_seconds": getattr(st.session_state, 'generation_time', 0),
"brand_neutral": True
},
"keywords": keywords,
"faqs": faqs
}
st.download_button(
label="πŸ“„ Download JSON",
data=json.dumps(export_data, indent=2),
file_name="faqs.json",
mime="application/json"
)
with col2:
# Export as CSV
faq_df = pd.DataFrame(faqs)
csv_data = faq_df.to_csv(index=False)
st.download_button(
label="πŸ“Š Download CSV",
data=csv_data,
file_name="faqs.csv",
mime="text/csv"
)
with col3:
# Export as HTML
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<title>Brand-Neutral FAQs</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 40px; line-height: 1.6; }}
.faq {{ margin-bottom: 30px; border-left: 4px solid #007bff; padding-left: 20px; }}
.question {{ font-weight: bold; font-size: 18px; color: #333; margin-bottom: 10px; }}
.answer {{ color: #666; }}
.header {{ background: #f8f9fa; padding: 20px; border-radius: 5px; margin-bottom: 30px; }}
</style>
</head>
<body>
<div class="header">
<h1>Brand-Neutral FAQs</h1>
<p><strong>Generated:</strong> {time.strftime("%Y-%m-%d %H:%M:%S")}</p>
<p><strong>Reviews Analyzed:</strong> {len(reviews_data)}</p>
<p><strong>Keywords:</strong> {', '.join(keywords[:10])}...</p>
</div>
"""
for i, faq in enumerate(faqs, 1):
html_content += f"""
<div class="faq">
<div class="question">{i}. {faq.get('question', '')}</div>
<div class="answer">{faq.get('answer', '')}</div>
</div>
"""
html_content += "</body></html>"
st.download_button(
label="🌐 Download HTML",
data=html_content,
file_name="faqs.html",
mime="text/html"
)
# Footer
st.markdown("---")
st.markdown("**Features:** Batch processing for large datasets β€’ Brand-neutral content β€’ SEO optimization β€’ Multiple export formats")
if __name__ == "__main__":
main()