Spaces:

sreesh2804
/

Doc_Chatbot

Sleeping

App Files Files Community

sreesh2804 commited on Apr 1

Commit

2279cee

verified ·

1 Parent(s): d77afce

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -0

app.py CHANGED Viewed

@@ -98,6 +98,56 @@ def query_document(question):
     tts.save(temp_audio_path)
     return response, temp_audio_path
 # ✅ Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# 📄 AI-Powered Multi-Document Chatbot with Voice Output")

     tts.save(temp_audio_path)
     return response, temp_audio_path
+def query_document(question):
+    if vector_store is None:
+        return "❌ No documents processed.", None
+    # ✅ Fetch stored documents
+    stored_docs = vector_store.get()["documents"]
+    # ✅ Calculate total word count safely
+    total_words = sum(len(doc.split()) if isinstance(doc, str) else len(doc.page_content.split()) for doc in stored_docs)
+    # ✅ Dynamically adjust k based on document size
+    if total_words < 500:
+        k_value = 3
+    elif total_words < 2000:
+        k_value = 5
+    else:
+        k_value = 10
+    retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": k_value})
+    # ✅ Improved prompt for detailed response
+    detailed_prompt = f"""
+    Provide a **detailed and structured answer** to the following question.
+    - Use relevant **examples, key points, and explanations**.
+    - If applicable, provide **step-by-step analysis** or comparisons.
+    - Ensure **clarity and completeness**.
+    **Question:** {question}
+    """
+    # ✅ Dynamically select model based on document size
+    if total_words < 1000:
+        model_name = "gemini-2.0-pro-exp-02-05"  # More detailed responses for small files
+    else:
+        model_name = "gemini-2.0-flash"  # Faster processing for large documents
+    logging.info(f"🧠 Using Model: {model_name} for processing")
+    model = ChatGoogleGenerativeAI(model=model_name, google_api_key=GOOGLE_API_KEY)
+    qa_chain = RetrievalQA.from_chain_type(llm=model, retriever=retriever)
+    response = qa_chain.invoke({"query": detailed_prompt})["result"]
+    # ✅ Convert response to speech
+    tts = gTTS(text=response, lang="en")
+    temp_audio_path = os.path.join(temp_dir, "response.mp3")
+    tts.save(temp_audio_path)
+    temp_file_map["response.mp3"] = time.time()
+    return response, temp_audio_path
 # ✅ Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# 📄 AI-Powered Multi-Document Chatbot with Voice Output")