from typing import Dict, Optional, List from dataclasses import dataclass from haystack.dataclasses import ChatMessage @dataclass class DatasetConfig: name: str split: str = "train" content_field: str = "content" fields: Dict[str, str] = None # Dictionary of field mappings prompt_template: Optional[str] = None # Default configurations for different datasets DATASET_CONFIGS = { "awesome-chatgpt-prompts": DatasetConfig( name="fka/awesome-chatgpt-prompts", content_field="prompt", fields={ "role": "act", "prompt": "prompt" }, prompt_template=""" Given the following context where each document represents a prompt for a specific role, please answer the question while considering both the role and the prompt content. Available Contexts: {% for document in documents %} {% if document.meta.role %}Role: {{ document.meta.role }}{% endif %} Content: {{ document.content }} --- {% endfor %} Question: {{question}} Answer: """ ), "settings-dataset": DatasetConfig( name="syntaxhacker/rag_pipeline", content_field="context", fields={ "question": "question", "answer": "answer", "context": "context" }, prompt_template=""" Given the following context about software settings and configurations, please answer the question accurately based on the provided information. For each setting, provide a clear, step-by-step navigation path and include: 1. The exact location (Origin Type > Tab > Section > Setting name) 2. What the setting does 3. Available options/values 4. How to access and modify the setting 5. Reference screenshots (if available) Format your answer as: "To [accomplish task], follow these steps: Location: [Origin Type] > [Tab] > [Section] > [Setting name] Purpose: [describe what the setting does] Options: [list available values/options] How to set: [describe interaction method: toggle/select/input] Visual Guide: [Include reference image links if available] For more details, you can refer to the screenshots above showing the exact location and interface." Available Contexts: {% for document in documents %} Setting Info: {{ document.content }} Reference Answer: {{ document.meta.answer }} --- {% endfor %} Question: {{question}} Answer: """ ), "seven-wonders": DatasetConfig( name="bilgeyucel/seven-wonders", content_field="content", fields={}, # No additional fields needed prompt_template=""" Given the following information about the Seven Wonders, please answer the question. Context: {% for document in documents %} {{ document.content }} {% endfor %} Question: {{question}} Answer: """ ), "psychology-dataset": DatasetConfig( name="jkhedri/psychology-dataset", split="train", content_field="question", # Assuming we want to use the question as the content fields={ "response_j": "response_j", # Response from one model "response_k": "response_k" # Response from another model }, prompt_template=""" Given the following context where each document represents a psychological inquiry, please answer the question based on the provided responses. Available Contexts: {% for document in documents %} Question: {{ document.content }} Response J: {{ document.meta.response_j }} Response K: {{ document.meta.response_k }} --- {% endfor %} Question: {{question}} Answer: """ ), "developer-portfolio": DatasetConfig( name="syntaxhacker/developer-portfolio-rag", split="train", content_field="answer", fields={ "question": "question", "answer": "answer", "context": "context" }, prompt_template=""" You are a helpful assistant that provides direct answers based on the provided context. Format your answers using markdown, especially for lists. --- Example 1: Question: What is your current role? Answer: I am a Tech Lead at FleetEnable, where I lead the UI development for a logistics SaaS product focused on drayage and freight management. --- Example 2: Question: What are your primary responsibilities as a Tech Lead? Answer: My primary responsibilities include: - Leading UI development. - Collaborating with product and backend teams. - Helping define technical strategies. - Ensuring the delivery of high-quality features. --- Context: {% for document in documents %} Question: {{ document.meta.question }} Answer: {{ document.content }} {% endfor %} Question: {{question}} Answer: """ ), } # Default configuration for embedding and LLM models MODEL_CONFIG = { "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", "llm_model": "gemini-2.0-flash-exp", }