Spaces:

AiActivity
/

ai-search-system

Running

App Files Files Community

AiActivity commited on 25 days ago

Commit

a6a8ad8

verified ·

1 Parent(s): 6a5bbc8

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -90

app.py CHANGED Viewed

@@ -154,30 +154,79 @@ def search_web(query, max_results=5):
     return results[:max_results]
-def generate_response(model, tokenizer, prompt, max_new_tokens=512):
-    """Generate response using the AI model with proper error handling"""
     try:
         # For T5 models
         if "t5" in MODEL_ID.lower():
-            inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
             with torch.no_grad():
                 outputs = model.generate(
                     inputs.input_ids,
                     max_new_tokens=max_new_tokens,
-                    temperature=0.7,
-                    do_sample=True
                 )
             response = tokenizer.decode(outputs[0], skip_special_tokens=True)
             return response
         # For Phi and other models
         else:
             if "phi" in MODEL_ID.lower():
-                formatted_prompt = f"Instruct: {prompt}\nOutput:"
             else:
-                formatted_prompt = prompt
             inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
@@ -185,8 +234,9 @@ def generate_response(model, tokenizer, prompt, max_new_tokens=512):
                 outputs = model.generate(
                     inputs.input_ids,
                     max_new_tokens=max_new_tokens,
-                    temperature=0.7,
-                    top_p=0.9,
                     do_sample=True,
                     pad_token_id=tokenizer.eos_token_id
                 )
@@ -194,24 +244,75 @@ def generate_response(model, tokenizer, prompt, max_new_tokens=512):
             response = tokenizer.decode(outputs[0][inputs.input_ids.size(1):], skip_special_tokens=True).strip()
             # Check if response is empty or too short
-            if not response or len(response) < 10:
-                # Try again with different parameters
                 outputs = model.generate(
                     inputs.input_ids,
                     max_new_tokens=max_new_tokens,
-                    num_beams=3,  # Use beam search instead
                     temperature=1.0,
                     do_sample=False,  # Deterministic generation
                     pad_token_id=tokenizer.eos_token_id
                 )
                 response = tokenizer.decode(outputs[0][inputs.input_ids.size(1):], skip_special_tokens=True).strip()
             return response
     except Exception as e:
-        print(f"Error generating response: {e}")
-        # Return a simple error message
-        return "I encountered a technical issue while generating a response. Please try another query."
 def ensure_citations(text, search_results):
     """Ensure citations are properly added to the text"""
@@ -234,90 +335,53 @@ def ensure_citations(text, search_results):
     return text
-def generate_related_topics(model, tokenizer, query, answer):
-    """Generate related topics based on the AI model"""
-    try:
-        # Craft a prompt to generate related topics
-        related_prompt = f"""Based on the original search query "{query}" and the information in this answer:
-"{answer[:300]}...", generate 3 related topics or questions that someone might want to explore next.
-Each should be specific and directly related to the query but explore a different aspect.
-Format as a simple list with 3 items only."""
-        # Use the model to generate topics
-        related_text = generate_response(model, tokenizer, related_prompt, max_new_tokens=200)
-        # Parse the generated text into individual topics
-        lines = related_text.split('\n')
-        topics = []
-        for line in lines:
-            # Clean up line by removing numbers, bullet points, etc.
-            clean_line = re.sub(r'^[\d\-\*\•\.\s]+', '', line.strip())
-            if clean_line and len(clean_line) > 5:
-                topics.append(clean_line)
-        # Ensure we have at least 3 topics
-        if len(topics) < 3:
-            # Add generic but relevant topics based on the query
-            base_topics = [
-                f"History of {query}",
-                f"Latest developments in {query}",
-                f"How does {query} work?",
-                f"Applications of {query}",
-                f"Future of {query}"
-            ]
-            # Add topics until we have at least 3
-            for topic in base_topics:
-                if len(topics) >= 3:
-                    break
-                if topic not in topics:
-                    topics.append(topic)
-        return topics[:3]  # Return top 3 topics
-    except Exception as e:
-        print(f"Error generating related topics: {e}")
-        # Return generic topics as fallback
-        return [
-            f"More about {query}",
-            f"Latest developments in {query}",
-            f"Applications of {query}"
-        ]
 def process_query(query):
-    """Main function to process a query with real search and AI responses"""
     try:
         # Step 1: Search the web for real results
         search_results = search_web(query, max_results=5)
-        # Step 2: Create context from search results
-        context = f"Query: {query}\n\nSearch Results:\n\n"
         for i, result in enumerate(search_results, 1):
-            context += f"Source {i}:\n"
-            context += f"Title: {result['title']}\n"
-            context += f"URL: {result['url']}\n"
-            context += f"Content: {result['snippet']}\n\n"
-        # Step 3: Create prompt for the AI model
-        prompt = f"""You are a helpful AI assistant that provides accurate and comprehensive answers based on search results.
 {context}
-Based on these search results, please provide a detailed answer to the query: "{query}"
-Include citations like [1], [2], etc. to reference the sources.
-Be factual and accurate. If the search results don't contain enough information, acknowledge this limitation.
-Format your answer in clear paragraphs with bullet points where appropriate."""
-        # Step 4: Generate answer using the AI model
-        answer = generate_response(model, tokenizer, prompt, max_new_tokens=512)
-        # Step 5: Ensure citations
         answer = ensure_citations(answer, search_results)
-        # Step 6: Generate related topics using the AI model
-        related_topics = generate_related_topics(model, tokenizer, query, answer)
         # Return the complete result
         return {
@@ -330,9 +394,9 @@ Format your answer in clear paragraphs with bullet points where appropriate."""
         print(f"Error in process_query: {e}")
         # Return a minimal result that won't break the UI
         return {
-            "answer": f"I encountered an error while processing your query about '{query}'. Please try again or try a different search term.",
-            "sources": search_web(query, max_results=2),  # Try to get at least some sources
-            "related_topics": [f"More about {query}", f"Different aspects of {query}", f"Applications of {query}"]
         }
 def format_sources(sources):
@@ -455,9 +519,11 @@ def format_related(topics):
         observer.observe(document.body, { childList: true, subtree: true });
         // jQuery-like helper function
-        Element.prototype.contains = function(text) {
-            return this.innerText.includes(text);
-        };
     </script>
     """

     return results[:max_results]
+def generate_response(prompt, max_new_tokens=256):
+    """Generate response using the AI model with robust fallbacks"""
+    # Check if model is loaded properly
+    if 'model' not in globals() or model is None:
+        print("Model not available for generation")
+        return "Based on the search results, I can provide information about this topic. Please check the sources for more detailed information."
     try:
         # For T5 models
         if "t5" in MODEL_ID.lower():
+            # Simplify prompt for T5
+            simple_prompt = prompt
+            if len(simple_prompt) > 512:
+                # Truncate to essential parts for T5
+                parts = prompt.split("\n\n")
+                query_part = next((p for p in parts if p.startswith("Query:")), "")
+                instruction_part = parts[-1] if parts else ""
+                simple_prompt = f"{query_part}\n\n{instruction_part}"
+            inputs = tokenizer(simple_prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
             with torch.no_grad():
                 outputs = model.generate(
                     inputs.input_ids,
                     max_new_tokens=max_new_tokens,
+                    temperature=0.8,
+                    do_sample=True,
+                    top_k=50,
+                    repetition_penalty=1.2
                 )
             response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # If response is too short, try again with different parameters
+            if len(response) < 50:
+                outputs = model.generate(
+                    inputs.input_ids,
+                    max_new_tokens=max_new_tokens,
+                    num_beams=4,
+                    temperature=1.0,
+                    do_sample=False
+                )
+                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
             return response
         # For Phi and other models
         else:
+            # Extract just the query from the prompt for simpler generation
+            query = ""
+            search_results_text = ""
+            if "Query:" in prompt:
+                query_section = prompt.split("Query:")[1].split("\n")[0].strip()
+                query = query_section
+            elif "question:" in prompt.lower():
+                query_section = prompt.split("question:")[1].split("\n")[0].strip()
+                query = query_section
+            else:
+                # Try to extract from the beginning of the prompt
+                query = prompt.split("\n")[0].strip()
+            if "Search Results:" in prompt:
+                search_results_text = prompt.split("Search Results:")[1].split("Based on")[0].strip()
+            # Create a simpler prompt format for better results
+            simple_prompt = f"Answer this question based on these search results:\n\nQuestion: {query}\n\nSearch Results: {search_results_text[:500]}...\n\nAnswer:"
+            # Adjust format based on model
             if "phi" in MODEL_ID.lower():
+                formatted_prompt = f"Instruct: {simple_prompt}\nOutput:"
             else:
+                formatted_prompt = simple_prompt
             inputs = tokenizer(formatted_prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
                 outputs = model.generate(
                     inputs.input_ids,
                     max_new_tokens=max_new_tokens,
+                    temperature=0.85,
+                    top_p=0.92,
+                    top_k=50,
                     do_sample=True,
                     pad_token_id=tokenizer.eos_token_id
                 )
             response = tokenizer.decode(outputs[0][inputs.input_ids.size(1):], skip_special_tokens=True).strip()
             # Check if response is empty or too short
+            if not response or len(response) < 20:
+                print("First generation attempt failed, trying alternative method")
+                # Try with different parameters
                 outputs = model.generate(
                     inputs.input_ids,
                     max_new_tokens=max_new_tokens,
+                    num_beams=3,  # Use beam search
                     temperature=1.0,
                     do_sample=False,  # Deterministic generation
+                    repetition_penalty=1.2,
                     pad_token_id=tokenizer.eos_token_id
                 )
                 response = tokenizer.decode(outputs[0][inputs.input_ids.size(1):], skip_special_tokens=True).strip()
+            # If still no good response, use a minimal reliable response
+            if not response or len(response) < 20:
+                print("Second generation attempt failed, using fallback response")
+                # Create a simple response that's guaranteed to work
+                if query:
+                    base_response = f"Based on the search results, I can provide information about {query}. "
+                    base_response += "The sources contain relevant details about this topic. "
+                    base_response += "You can refer to them for more in-depth information."
+                    return base_response
+                else:
+                    return "Based on the search results, I can provide information related to your query. Please check the sources for more details."
             return response
     except Exception as e:
+        print(f"Error in generate_response: {e}")
+        # Return a guaranteed fallback response
+        return "Based on the search results, I found information related to your query. The sources listed below contain more detailed information about this topic."
+def parse_related_topics(text, query):
+    """Extract related topics from generated text with better fallbacks"""
+    topics = []
+    # Parse lines and clean them up
+    lines = text.split('\n')
+    for line in lines:
+        # Clean up line from numbers and symbols
+        clean_line = re.sub(r'^[\d\-\*\•\.\s]+', '', line.strip())
+        if clean_line and len(clean_line) > 10:
+            # Make sure it ends with a question mark if it seems like a question
+            if any(q in clean_line.lower() for q in ['what', 'how', 'why', 'when', 'where', 'who']) and not clean_line.endswith('?'):
+                clean_line += '?'
+            topics.append(clean_line)
+    # If we don't have enough topics, generate some based on the query
+    if len(topics) < 3:
+        base_queries = [
+            f"What is the history of {query}?",
+            f"How does {query} work?",
+            f"What are the latest developments in {query}?",
+            f"What are common applications of {query}?",
+            f"How is {query} used today?"
+        ]
+        # Add base queries until we have at least 3
+        for bq in base_queries:
+            if len(topics) >= 3:
+                break
+            if not any(bq.lower() in t.lower() for t in topics):
+                topics.append(bq)
+    return topics[:3]  # Return top 3 topics
 def ensure_citations(text, search_results):
     """Ensure citations are properly added to the text"""
     return text
 def process_query(query):
+    """Main function to process a query with robust response generation"""
     try:
         # Step 1: Search the web for real results
         search_results = search_web(query, max_results=5)
+        # Step 2: Create context from search results - shorter and more focused
+        context = f"Query: {query}\n\n"
+        context += "Search Results Summary:\n\n"
         for i, result in enumerate(search_results, 1):
+            # Use shorter context to avoid token limits
+            context += f"Source {i}: {result['title']}\n"
+            context += f"Content: {result['snippet'][:150]}\n\n"
+        # Step 3: Create a simpler prompt for the AI model
+        prompt = f"""Answer this question based on the search results: {query}
 {context}
+Provide a clear answer using information from these sources. Include citations like [1], [2] to reference sources."""
+        # Step 4: Generate answer using the improved generation function
+        answer = generate_response(prompt, max_new_tokens=384)
+        # Step 5: Ensure we have some answer content
+        if not answer or len(answer.strip()) < 30:
+            print("Fallback to generic response")
+            answer = f"Based on the search results for '{query}', I found relevant information in the sources listed below. They provide details about this topic that you may find useful."
+        # Step 6: Ensure citations
         answer = ensure_citations(answer, search_results)
+        # Step 7: Generate related topics
+        # Use a simpler approach to get related topics since this might be failing too
+        try:
+            related_prompt = f"Generate 3 questions related to: {query}"
+            related_raw = generate_response(related_prompt, max_new_tokens=150)
+            related_topics = parse_related_topics(related_raw, query)
+        except Exception as e:
+            print(f"Error generating related topics: {e}")
+            # Fallback topics
+            related_topics = [
+                f"What is the history of {query}?",
+                f"How does {query} work?",
+                f"What are applications of {query}?"
+            ]
         # Return the complete result
         return {
         print(f"Error in process_query: {e}")
         # Return a minimal result that won't break the UI
         return {
+            "answer": f"I found information about '{query}' in the sources below. They provide details about this topic that may be helpful.",
+            "sources": search_results if 'search_results' in locals() else search_web(query, max_results=2),
+            "related_topics": [f"What is {query}?", f"History of {query}", f"How to use {query}"]
         }
 def format_sources(sources):
         observer.observe(document.body, { childList: true, subtree: true });
         // jQuery-like helper function
+        if (!Element.prototype.contains) {
+            Element.prototype.contains = function(text) {
+                return this.innerText.includes(text);
+            };
+        }
     </script>
     """