"""
Blog Data Update Script

This script updates the blog data vector store when new posts are added.
It can be scheduled to run periodically or manually executed.

Usage:
    python update_blog_data.py [--force-recreate] [--data-dir DATA_DIR]

Options:
    --force-recreate   Force recreation of the vector store even if it exists
    --data-dir DIR     Directory containing the blog posts (default: data/)
"""

import os
import sys
import argparse
from datetime import datetime
import json
from pathlib import Path
from lets_talk.config import VECTOR_STORAGE_PATH

# Import the blog utilities module
import lets_talk.utils.blog as blog

def parse_args():
    """Parse command-line arguments"""
    parser = argparse.ArgumentParser(description="Update blog data vector store")
    parser.add_argument("--force-recreate", action="store_true", 
                        help="Force recreation of the vector store")
    parser.add_argument("--data-dir", default=blog.DATA_DIR,
                        help=f"Directory containing blog posts (default: {blog.DATA_DIR})")
    return parser.parse_args()

def save_stats(stats, output_dir="./stats"):
    """Save stats to a JSON file for tracking changes over time"""
    # Create directory if it doesn't exist
    Path(output_dir).mkdir(exist_ok=True, parents=True)
    
    # Create filename with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{output_dir}/blog_stats_{timestamp}.json"
    
    # Save only the basic stats, not the full document list
    basic_stats = {
        "timestamp": timestamp,
        "total_documents": stats["total_documents"],
        "total_characters": stats["total_characters"],
        "min_length": stats["min_length"],
        "max_length": stats["max_length"],
        "avg_length": stats["avg_length"],
    }
    
    with open(filename, "w") as f:
        json.dump(basic_stats, f, indent=2)
    
    print(f"Saved stats to {filename}")
    return filename

def main():
    """Main function to update blog data"""
    args = parse_args()
    
    print("=== Blog Data Update ===")
    print(f"Data directory: {args.data_dir}")
    print(f"Force recreate: {args.force_recreate}")
    print("========================")
    
    # Process blog posts without creating embeddings
    try:
        # Load and process documents
        documents = blog.load_blog_posts(args.data_dir)
        documents = blog.update_document_metadata(documents)
        
        # Get stats
        stats = blog.get_document_stats(documents)
        blog.display_document_stats(stats)
        
        # Save stats for tracking
        stats_file = save_stats(stats)

        create_vector_store = (not Path.exists(Path(VECTOR_STORAGE_PATH))) or (args.force_recreate)
        
        # Create a reference file for the vector store
        if create_vector_store:
            print("\nAttempting to save vector store reference file...")
            vector_store = blog.create_vector_store(documents, storage_path=VECTOR_STORAGE_PATH, force_recreate=create_vector_store)
            vector_store.client.close()
            print("Vector store reference file saved.")
        
        print("\n=== Update Summary ===")
        print(f"Processed {stats['total_documents']} documents")
        print(f"Stats saved to: {stats_file}")
        print("=====================")
        
        return 0
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()
        return 1

if __name__ == "__main__":
    sys.exit(main())