# TheDataGuy Chat Configuration | |
# Copy this file to .env and fill in your values | |
# OpenAI API Key - Required for LLM and embeddings | |
OPENAI_API_KEY=your_openai_api_key_here | |
# Vector Store Configuration | |
VECTOR_STORAGE_PATH=./db/vector_store_tdg | |
QDRANT_COLLECTION=thedataguy_documents | |
# Model Configuration | |
EMBEDDING_MODEL=Snowflake/snowflake-arctic-embed-l | |
LLM_MODEL=gpt-4o-mini | |
LLM_TEMPERATURE=0 | |
# For evaluation and synthetic data generation (optional) | |
SDG_LLM_MODEL=gpt-4.1 | |
EVAL_LLM_MODEL=gpt-4.1 | |
# Blog Configuration | |
DATA_DIR=data/ | |
BLOG_BASE_URL=https://thedataguy.pro/blog/ | |
# Search Configuration | |
MAX_SEARCH_RESULTS=5 | |
# Document Chunking Configuration | |
CHUNK_SIZE=1000 | |
CHUNK_OVERLAP=200 | |
# Vector Database Creation Configuration | |
FORCE_RECREATE=False | |
OUTPUT_DIR=./stats | |
USE_CHUNKING=True | |
SHOULD_SAVE_STATS=True | |