sreesh2804 commited on
Commit
2279cee
·
verified ·
1 Parent(s): d77afce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py CHANGED
@@ -98,6 +98,56 @@ def query_document(question):
98
  tts.save(temp_audio_path)
99
  return response, temp_audio_path
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  # ✅ Gradio UI
102
  with gr.Blocks() as demo:
103
  gr.Markdown("# 📄 AI-Powered Multi-Document Chatbot with Voice Output")
 
98
  tts.save(temp_audio_path)
99
  return response, temp_audio_path
100
 
101
+ def query_document(question):
102
+ if vector_store is None:
103
+ return "❌ No documents processed.", None
104
+
105
+ # ✅ Fetch stored documents
106
+ stored_docs = vector_store.get()["documents"]
107
+
108
+ # ✅ Calculate total word count safely
109
+ total_words = sum(len(doc.split()) if isinstance(doc, str) else len(doc.page_content.split()) for doc in stored_docs)
110
+
111
+ # ✅ Dynamically adjust k based on document size
112
+ if total_words < 500:
113
+ k_value = 3
114
+ elif total_words < 2000:
115
+ k_value = 5
116
+ else:
117
+ k_value = 10
118
+
119
+ retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": k_value})
120
+
121
+ # ✅ Improved prompt for detailed response
122
+ detailed_prompt = f"""
123
+ Provide a **detailed and structured answer** to the following question.
124
+ - Use relevant **examples, key points, and explanations**.
125
+ - If applicable, provide **step-by-step analysis** or comparisons.
126
+ - Ensure **clarity and completeness**.
127
+ **Question:** {question}
128
+ """
129
+
130
+ # ✅ Dynamically select model based on document size
131
+ if total_words < 1000:
132
+ model_name = "gemini-2.0-pro-exp-02-05" # More detailed responses for small files
133
+ else:
134
+ model_name = "gemini-2.0-flash" # Faster processing for large documents
135
+
136
+ logging.info(f"🧠 Using Model: {model_name} for processing")
137
+
138
+ model = ChatGoogleGenerativeAI(model=model_name, google_api_key=GOOGLE_API_KEY)
139
+ qa_chain = RetrievalQA.from_chain_type(llm=model, retriever=retriever)
140
+ response = qa_chain.invoke({"query": detailed_prompt})["result"]
141
+
142
+ # ✅ Convert response to speech
143
+ tts = gTTS(text=response, lang="en")
144
+ temp_audio_path = os.path.join(temp_dir, "response.mp3")
145
+ tts.save(temp_audio_path)
146
+ temp_file_map["response.mp3"] = time.time()
147
+
148
+ return response, temp_audio_path
149
+
150
+
151
  # ✅ Gradio UI
152
  with gr.Blocks() as demo:
153
  gr.Markdown("# 📄 AI-Powered Multi-Document Chatbot with Voice Output")