File size: 5,041 Bytes
751d628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import nltk
import logging
import numpy as np
from typing import List, Any
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import SystemMessage, HumanMessage
from sentence_transformers import SentenceTransformer

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
logger = logging.getLogger(__name__)

# Download NLTK data
try:
    nltk.download('punkt', quiet=True)
    nltk.download('stopwords', quiet=True)
except Exception as e:
    logger.warning(f"NLTK data download failed: {e}")

# Global embedder
_embedder = None

def get_embedder():
    global _embedder
    if _embedder is None:
        try:
            _embedder = SentenceTransformer(
                "all-MiniLM-L6-v2",
                device="cpu",
                cache_folder="./cache"
            )
            logger.info("SentenceTransformer initialized")
        except Exception as e:
            logger.error(f"Failed to initialize SentenceTransformer: {e}")
            raise RuntimeError(f"Embedder initialization failed: {e}")
    return _embedder

def filter_results(search_results: List[str], question: str) -> List[str]:
    try:
        if not search_results or not question:
            return search_results
        
        embedder = get_embedder()
        question_embedding = embedder.encode([question], convert_to_numpy=True)
        result_embeddings = embedder.encode(search_results, convert_to_numpy=True)
        
        similarities = np.dot(result_embeddings, question_embedding.T).flatten()
        filtered_results = [
            search_results[i] for i in range(len(search_results))
            if similarities[i] > 0.5 and search_results[i].strip()
        ]
        
        return filtered_results if filtered_results else search_results[:3]
    except Exception as e:
        logger.warning(f"Result filtering failed: {e}")
        return search_results[:3]

async def preprocess_question(question: str) -> str:
    """Preprocess the question to clean and standardize it."""
    try:
        question = question.strip().lower()
        if not question.endswith("?"):
            question += "?"
        logger.debug(f"Preprocessed question: {question}")
        return question
    except Exception as e:
        logger.error(f"Error preprocessing question: {e}")
        return question

async def generate_answer(
    task_id: str,
    question: str,
    search_results: List[str],
    file_results: str,
    llm_client: Any
) -> str:
    """Generate an answer using LLM with search and file results."""
    try:
        if not search_results:
            search_results = ["No search results available."]
        if not file_results:
            file_results = "No file results available."

        context = "\n".join([str(r) for r in search_results]) + "\n" + file_results
        prompt = ChatPromptTemplate.from_messages([
            SystemMessage(content="""You are an assistant answering questions using provided context. 
            - Use ONLY the context to formulate a concise, accurate answer.
            - If the context is insufficient, state: 'Insufficient information to answer.'
            - Do NOT generate or assume information beyond the context.
            - Return a single, clear sentence or phrase as the answer."""),
            HumanMessage(content=f"Context: {context}\nQuestion: {question}")
        ])
        
        messages = [
            {"role": "system", "content": prompt[0].content},
            {"role": "user", "content": prompt[1].content}
        ]
        
        if isinstance(llm_client, tuple):  # hf_local
            model, tokenizer = llm_client
            inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
            outputs = model.generate(inputs, max_new_tokens=100, temperature=0.7)
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        elif hasattr(llm_client, "chat"):  # together
            response = llm_client.chat.completions.create(
                model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
                messages=messages,
                max_tokens=100,
                temperature=0.7,
                top_p=0.9,
                frequency_penalty=0.5
            )
            response = response.choices[0].message.content.strip()
        else:  # hf_api
            response = llm_client.chat.completions.create(
                messages=messages,
                max_tokens=100,
                temperature=0.7
            )
            response = response.choices[0].message.content.strip()
        
        answer = response.strip()
        if not answer or answer.lower() == "none":
            answer = "Insufficient information to answer."
        logger.info(f"Task {task_id}: Generated answer: {answer}")
        return answer
    except Exception as e:
        logger.error(f"Task {task_id}: Answer generation failed: {e}")
        return "Error generating answer."