Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -98,6 +98,56 @@ def query_document(question):
|
|
98 |
tts.save(temp_audio_path)
|
99 |
return response, temp_audio_path
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
# ✅ Gradio UI
|
102 |
with gr.Blocks() as demo:
|
103 |
gr.Markdown("# 📄 AI-Powered Multi-Document Chatbot with Voice Output")
|
|
|
98 |
tts.save(temp_audio_path)
|
99 |
return response, temp_audio_path
|
100 |
|
101 |
+
def query_document(question):
|
102 |
+
if vector_store is None:
|
103 |
+
return "❌ No documents processed.", None
|
104 |
+
|
105 |
+
# ✅ Fetch stored documents
|
106 |
+
stored_docs = vector_store.get()["documents"]
|
107 |
+
|
108 |
+
# ✅ Calculate total word count safely
|
109 |
+
total_words = sum(len(doc.split()) if isinstance(doc, str) else len(doc.page_content.split()) for doc in stored_docs)
|
110 |
+
|
111 |
+
# ✅ Dynamically adjust k based on document size
|
112 |
+
if total_words < 500:
|
113 |
+
k_value = 3
|
114 |
+
elif total_words < 2000:
|
115 |
+
k_value = 5
|
116 |
+
else:
|
117 |
+
k_value = 10
|
118 |
+
|
119 |
+
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": k_value})
|
120 |
+
|
121 |
+
# ✅ Improved prompt for detailed response
|
122 |
+
detailed_prompt = f"""
|
123 |
+
Provide a **detailed and structured answer** to the following question.
|
124 |
+
- Use relevant **examples, key points, and explanations**.
|
125 |
+
- If applicable, provide **step-by-step analysis** or comparisons.
|
126 |
+
- Ensure **clarity and completeness**.
|
127 |
+
**Question:** {question}
|
128 |
+
"""
|
129 |
+
|
130 |
+
# ✅ Dynamically select model based on document size
|
131 |
+
if total_words < 1000:
|
132 |
+
model_name = "gemini-2.0-pro-exp-02-05" # More detailed responses for small files
|
133 |
+
else:
|
134 |
+
model_name = "gemini-2.0-flash" # Faster processing for large documents
|
135 |
+
|
136 |
+
logging.info(f"🧠 Using Model: {model_name} for processing")
|
137 |
+
|
138 |
+
model = ChatGoogleGenerativeAI(model=model_name, google_api_key=GOOGLE_API_KEY)
|
139 |
+
qa_chain = RetrievalQA.from_chain_type(llm=model, retriever=retriever)
|
140 |
+
response = qa_chain.invoke({"query": detailed_prompt})["result"]
|
141 |
+
|
142 |
+
# ✅ Convert response to speech
|
143 |
+
tts = gTTS(text=response, lang="en")
|
144 |
+
temp_audio_path = os.path.join(temp_dir, "response.mp3")
|
145 |
+
tts.save(temp_audio_path)
|
146 |
+
temp_file_map["response.mp3"] = time.time()
|
147 |
+
|
148 |
+
return response, temp_audio_path
|
149 |
+
|
150 |
+
|
151 |
# ✅ Gradio UI
|
152 |
with gr.Blocks() as demo:
|
153 |
gr.Markdown("# 📄 AI-Powered Multi-Document Chatbot with Voice Output")
|