Spaces:

syntaxhacker
/

developer-portfolio-rag

Running

File size: 5,581 Bytes

from typing import Dict, Optional, List
from dataclasses import dataclass
from haystack.dataclasses import ChatMessage

@dataclass
class DatasetConfig:
    name: str
    split: str = "train"
    content_field: str = "content"
    fields: Dict[str, str] = None  # Dictionary of field mappings
    prompt_template: Optional[str] = None

# Default configurations for different datasets
DATASET_CONFIGS = {
    "awesome-chatgpt-prompts": DatasetConfig(
        name="fka/awesome-chatgpt-prompts",
        content_field="prompt",
        fields={
            "role": "act",
            "prompt": "prompt"
        },
        prompt_template="""
        Given the following context where each document represents a prompt for a specific role,
        please answer the question while considering both the role and the prompt content.
        
        Available Contexts:
        {% for document in documents %}
            {% if document.meta.role %}Role: {{ document.meta.role }}{% endif %}
            Content: {{ document.content }}
            ---
        {% endfor %}
        
        Question: {{question}}
        Answer:
        """
    ),
    "settings-dataset": DatasetConfig(
        name="syntaxhacker/rag_pipeline",
        content_field="context",
        fields={
            "question": "question",
            "answer": "answer",
            "context": "context"
        },
        prompt_template="""
        Given the following context about software settings and configurations,
        please answer the question accurately based on the provided information.
        
        For each setting, provide a clear, step-by-step navigation path and include:
        1. The exact location (Origin Type > Tab > Section > Setting name)
        2. What the setting does
        3. Available options/values
        4. How to access and modify the setting
        5. Reference screenshots (if available)
        
        Format your answer as:
        "To [accomplish task], follow these steps:

        Location: [Origin Type] > [Tab] > [Section] > [Setting name]
        Purpose: [describe what the setting does]
        Options: [list available values/options]
        How to set: [describe interaction method: toggle/select/input]
        
        Visual Guide:
        [Include reference image links if available]

        For more details, you can refer to the screenshots above showing the exact location and interface."

        Available Contexts:
        {% for document in documents %}
            Setting Info: {{ document.content }}
            Reference Answer: {{ document.meta.answer }}
            ---
        {% endfor %}

        Question: {{question}}
        Answer:
        """
    ),
    "seven-wonders": DatasetConfig(
        name="bilgeyucel/seven-wonders",
        content_field="content",
        fields={},  # No additional fields needed
        prompt_template="""
        Given the following information about the Seven Wonders, please answer the question.
        
        Context:
        {% for document in documents %}
            {{ document.content }}
        {% endfor %}
        
        Question: {{question}}
        Answer:
        """
    ),
    "psychology-dataset": DatasetConfig(
        name="jkhedri/psychology-dataset",
        split="train",
        content_field="question",  # Assuming we want to use the question as the content
        fields={
            "response_j": "response_j",  # Response from one model
            "response_k": "response_k"   # Response from another model
        },
        prompt_template="""
        Given the following context where each document represents a psychological inquiry,
        please answer the question based on the provided responses.

        Available Contexts:
        {% for document in documents %}
            Question: {{ document.content }}
            Response J: {{ document.meta.response_j }}
            Response K: {{ document.meta.response_k }}
            ---
        {% endfor %}

        Question: {{question}}
        Answer:
        """
    ),
    "developer-portfolio": DatasetConfig(
        name="syntaxhacker/developer-portfolio-rag",
        split="train",
        content_field="answer",
        fields={
            "question": "question",
            "answer": "answer",
            "context": "context"
        },
        prompt_template="""
        You are a helpful assistant that provides direct answers based on the provided context. Format your answers using markdown, especially for lists.

        ---
        Example 1:

        Question: What is your current role?
        
        Answer:
        I am a Tech Lead at FleetEnable, where I lead the UI development for a logistics SaaS product focused on drayage and freight management.

        ---
        Example 2:

        Question: What are your primary responsibilities as a Tech Lead?

        Answer:
        My primary responsibilities include:
        - Leading UI development.
        - Collaborating with product and backend teams.
        - Helping define technical strategies.
        - Ensuring the delivery of high-quality features.

        ---

        Context:
        {% for document in documents %}
            Question: {{ document.meta.question }}
            Answer: {{ document.content }}
        {% endfor %}

        Question: {{question}}
        
        Answer:
        """
    ),
}

# Default configuration for embedding and LLM models
MODEL_CONFIG = {
    "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
    "llm_model": "gemini-2.0-flash-exp",
}