Spaces:
Starting
Starting
File size: 5,041 Bytes
751d628 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import nltk
import logging
import numpy as np
from typing import List, Any
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import SystemMessage, HumanMessage
from sentence_transformers import SentenceTransformer
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
logger = logging.getLogger(__name__)
# Download NLTK data
try:
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
except Exception as e:
logger.warning(f"NLTK data download failed: {e}")
# Global embedder
_embedder = None
def get_embedder():
global _embedder
if _embedder is None:
try:
_embedder = SentenceTransformer(
"all-MiniLM-L6-v2",
device="cpu",
cache_folder="./cache"
)
logger.info("SentenceTransformer initialized")
except Exception as e:
logger.error(f"Failed to initialize SentenceTransformer: {e}")
raise RuntimeError(f"Embedder initialization failed: {e}")
return _embedder
def filter_results(search_results: List[str], question: str) -> List[str]:
try:
if not search_results or not question:
return search_results
embedder = get_embedder()
question_embedding = embedder.encode([question], convert_to_numpy=True)
result_embeddings = embedder.encode(search_results, convert_to_numpy=True)
similarities = np.dot(result_embeddings, question_embedding.T).flatten()
filtered_results = [
search_results[i] for i in range(len(search_results))
if similarities[i] > 0.5 and search_results[i].strip()
]
return filtered_results if filtered_results else search_results[:3]
except Exception as e:
logger.warning(f"Result filtering failed: {e}")
return search_results[:3]
async def preprocess_question(question: str) -> str:
"""Preprocess the question to clean and standardize it."""
try:
question = question.strip().lower()
if not question.endswith("?"):
question += "?"
logger.debug(f"Preprocessed question: {question}")
return question
except Exception as e:
logger.error(f"Error preprocessing question: {e}")
return question
async def generate_answer(
task_id: str,
question: str,
search_results: List[str],
file_results: str,
llm_client: Any
) -> str:
"""Generate an answer using LLM with search and file results."""
try:
if not search_results:
search_results = ["No search results available."]
if not file_results:
file_results = "No file results available."
context = "\n".join([str(r) for r in search_results]) + "\n" + file_results
prompt = ChatPromptTemplate.from_messages([
SystemMessage(content="""You are an assistant answering questions using provided context.
- Use ONLY the context to formulate a concise, accurate answer.
- If the context is insufficient, state: 'Insufficient information to answer.'
- Do NOT generate or assume information beyond the context.
- Return a single, clear sentence or phrase as the answer."""),
HumanMessage(content=f"Context: {context}\nQuestion: {question}")
])
messages = [
{"role": "system", "content": prompt[0].content},
{"role": "user", "content": prompt[1].content}
]
if isinstance(llm_client, tuple): # hf_local
model, tokenizer = llm_client
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)
outputs = model.generate(inputs, max_new_tokens=100, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
elif hasattr(llm_client, "chat"): # together
response = llm_client.chat.completions.create(
model="meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
messages=messages,
max_tokens=100,
temperature=0.7,
top_p=0.9,
frequency_penalty=0.5
)
response = response.choices[0].message.content.strip()
else: # hf_api
response = llm_client.chat.completions.create(
messages=messages,
max_tokens=100,
temperature=0.7
)
response = response.choices[0].message.content.strip()
answer = response.strip()
if not answer or answer.lower() == "none":
answer = "Insufficient information to answer."
logger.info(f"Task {task_id}: Generated answer: {answer}")
return answer
except Exception as e:
logger.error(f"Task {task_id}: Answer generation failed: {e}")
return "Error generating answer." |