SysModeler commited on
Commit
6b1b4f6
·
verified ·
1 Parent(s): 3c3e008

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -12
app.py CHANGED
@@ -3,8 +3,10 @@ import gradio as gr
3
  import warnings
4
  import json
5
  from dotenv import load_dotenv
6
- import logging
 
7
  from functools import lru_cache
 
8
 
9
  from langchain_community.vectorstores import FAISS
10
  from langchain_community.embeddings import AzureOpenAIEmbeddings
@@ -47,9 +49,13 @@ client = AzureOpenAI(
47
  azure_endpoint=AZURE_OPENAI_ENDPOINT
48
  )
49
 
 
50
  logger = logging.getLogger(__name__)
51
 
 
52
  def clean_em_dashes(text: str) -> str:
 
 
53
  text = text.replace("—which", ", which")
54
  text = text.replace("—that", ", that")
55
  text = text.replace("—no", ". No")
@@ -61,16 +67,31 @@ def clean_em_dashes(text: str) -> str:
61
  text = text.replace("—just", ". Just")
62
  text = text.replace("—great", ", great")
63
  text = text.replace("—this", ". This")
 
64
  text = text.replace("—", ", ")
65
  return text
66
 
 
67
  @lru_cache(maxsize=100)
68
  def sysml_retriever(query: str) -> str:
69
  try:
 
 
 
 
70
  results = vectorstore.similarity_search_with_score(query, k=100)
 
 
 
71
  weighted_results = []
72
- for (doc, score) in results:
 
 
 
 
73
  doc_source = doc.metadata.get('source', '').lower() if hasattr(doc, 'metadata') else str(doc).lower()
 
 
74
  is_sysmodeler = (
75
  'sysmodeler' in doc_source or
76
  'user manual' in doc_source or
@@ -87,33 +108,99 @@ def sysml_retriever(query: str) -> str:
87
  'SynthAgent' in doc.page_content or
88
  'workspace dashboard' in doc.page_content.lower()
89
  )
 
 
90
  if is_sysmodeler:
 
91
  weighted_score = score * 0.6
92
  source_type = "SysModeler"
 
93
  else:
 
94
  weighted_score = score
95
  source_type = "Other"
 
 
 
96
  doc.metadata = doc.metadata if hasattr(doc, 'metadata') else {}
97
  doc.metadata['source_type'] = 'sysmodeler' if is_sysmodeler else 'other'
98
  doc.metadata['weighted_score'] = weighted_score
99
  doc.metadata['original_score'] = score
 
100
  weighted_results.append((doc, weighted_score, source_type))
 
 
 
 
 
 
 
 
 
 
101
  weighted_results.sort(key=lambda x: x[1])
102
-
 
 
103
  query_lower = query.lower()
 
 
104
  is_tool_comparison = any(word in query_lower for word in ['tool', 'compare', 'choose', 'vs', 'versus', 'better'])
 
 
105
  if is_tool_comparison:
 
 
 
 
 
106
  sysmodeler_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "SysModeler"][:8]
107
  other_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "Other"][:4]
 
108
  final_docs = [doc for doc, _ in sysmodeler_docs] + [doc for doc, _ in other_docs]
 
109
  else:
 
 
 
 
 
110
  final_docs = [doc for doc, _, _ in weighted_results[:12]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  contexts = [doc.page_content for doc in final_docs]
112
  return "\n\n".join(contexts)
 
113
  except Exception as e:
114
  logger.error(f"Retrieval error: {str(e)}")
 
115
  return "Unable to retrieve information at this time."
116
 
 
117
  tools_definition = [
118
  {
119
  "type": "function",
@@ -131,10 +218,12 @@ tools_definition = [
131
  }
132
  ]
133
 
 
134
  tool_mapping = {
135
  "SysMLRetriever": sysml_retriever
136
  }
137
 
 
138
  def convert_history_to_messages(history):
139
  messages = []
140
  for user, bot in history:
@@ -142,15 +231,33 @@ def convert_history_to_messages(history):
142
  messages.append({"role": "assistant", "content": bot})
143
  return messages
144
 
145
- def sysml_chatbot(message, history):
146
- if not message or not message.strip():
147
- answer = "Can I help you with anything else?"
148
- history.append(("", answer))
149
- return "", history
150
 
 
 
151
  chat_messages = convert_history_to_messages(history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  full_messages = [
153
- {"role": "system", "content": """You are Abu, SysModeler.ai's friendly and knowledgeable assistant. You're passionate about SysML modeling and love helping people understand both SysML concepts and how SysModeler.ai can make their modeling work easier.
 
 
 
154
 
155
  CONVERSATION STYLE:
156
  - Only introduce yourself as "Hi, I'm Abu!" for the very first message in a conversation
@@ -161,12 +268,88 @@ CONVERSATION STYLE:
161
  - NEVER EVER use the em dash character (—) under any circumstances
162
  - When you want to add extra information, use commas or say "which means" or "and that"
163
  - Replace any "—" with ", " or ". " or " and " or " which "
 
 
 
164
  - Be enthusiastic but not pushy about SysModeler.ai
165
- - Ask engaging follow-up questions to keep the conversation going
166
  - Use "you" and "your" to make it personal
167
  - Share insights like you're having a friendly chat
168
- """}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  ] + chat_messages + [{"role": "user", "content": message}]
 
170
  try:
171
  response = client.chat.completions.create(
172
  model=AZURE_OPENAI_LLM_DEPLOYMENT,
@@ -203,15 +386,19 @@ CONVERSATION STYLE:
203
  messages=full_messages
204
  )
205
  answer = second_response.choices[0].message.content
 
 
206
  answer = clean_em_dashes(answer)
207
  else:
208
  answer = f"I tried to use a function '{function_name}' that's not available."
209
  else:
210
  answer = assistant_message.content
 
211
  answer = clean_em_dashes(answer) if answer else answer
212
  history.append((message, answer))
213
  return "", history
214
  except Exception as e:
 
215
  history.append((message, "Sorry, something went wrong."))
216
  return "", history
217
 
@@ -245,5 +432,6 @@ with gr.Blocks(css="""
245
  msg.submit(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot])
246
  clear.click(fn=lambda: ([], ""), inputs=None, outputs=[chatbot, msg])
247
 
 
248
  if __name__ == "__main__":
249
- demo.launch()
 
3
  import warnings
4
  import json
5
  from dotenv import load_dotenv
6
+ from typing import List
7
+ import time
8
  from functools import lru_cache
9
+ import logging
10
 
11
  from langchain_community.vectorstores import FAISS
12
  from langchain_community.embeddings import AzureOpenAIEmbeddings
 
49
  azure_endpoint=AZURE_OPENAI_ENDPOINT
50
  )
51
 
52
+ # Logger
53
  logger = logging.getLogger(__name__)
54
 
55
+ # Post-processing function to remove em dashes
56
  def clean_em_dashes(text: str) -> str:
57
+ """Remove em dashes and replace with natural alternatives"""
58
+ # Replace em dashes with commas or periods based on context
59
  text = text.replace("—which", ", which")
60
  text = text.replace("—that", ", that")
61
  text = text.replace("—no", ". No")
 
67
  text = text.replace("—just", ". Just")
68
  text = text.replace("—great", ", great")
69
  text = text.replace("—this", ". This")
70
+ # Catch any remaining em dashes
71
  text = text.replace("—", ", ")
72
  return text
73
 
74
+ # Enhanced SysML retriever with proper metadata filtering & weighting
75
  @lru_cache(maxsize=100)
76
  def sysml_retriever(query: str) -> str:
77
  try:
78
+ print(f"\n🔍 QUERY: {query}")
79
+ print("="*80)
80
+
81
+ # Get more results for filtering and weighting
82
  results = vectorstore.similarity_search_with_score(query, k=100)
83
+ print(f"📊 Total results retrieved: {len(results)}")
84
+
85
+ # Apply metadata filtering and weighting
86
  weighted_results = []
87
+ sysmodeler_count = 0
88
+ other_count = 0
89
+
90
+ for i, (doc, score) in enumerate(results):
91
+ # Get document source
92
  doc_source = doc.metadata.get('source', '').lower() if hasattr(doc, 'metadata') else str(doc).lower()
93
+
94
+ # Determine if this is SysModeler content
95
  is_sysmodeler = (
96
  'sysmodeler' in doc_source or
97
  'user manual' in doc_source or
 
108
  'SynthAgent' in doc.page_content or
109
  'workspace dashboard' in doc.page_content.lower()
110
  )
111
+
112
+ # Apply weighting based on source
113
  if is_sysmodeler:
114
+ # BOOST SysModeler content: reduce score by 40% (lower score = higher relevance)
115
  weighted_score = score * 0.6
116
  source_type = "SysModeler"
117
+ sysmodeler_count += 1
118
  else:
119
+ # Keep original score for other content
120
  weighted_score = score
121
  source_type = "Other"
122
+ other_count += 1
123
+
124
+ # Add metadata tags for filtering
125
  doc.metadata = doc.metadata if hasattr(doc, 'metadata') else {}
126
  doc.metadata['source_type'] = 'sysmodeler' if is_sysmodeler else 'other'
127
  doc.metadata['weighted_score'] = weighted_score
128
  doc.metadata['original_score'] = score
129
+
130
  weighted_results.append((doc, weighted_score, source_type))
131
+
132
+ # Log each document's processing
133
+ source_name = doc.metadata.get('source', 'Unknown')[:50] if hasattr(doc, 'metadata') else 'Unknown'
134
+ print(f"📄 Doc {i+1}: {source_name}... | Original: {score:.4f} | Weighted: {weighted_score:.4f} | Type: {source_type}")
135
+
136
+ print(f"\n📈 CLASSIFICATION & WEIGHTING RESULTS:")
137
+ print(f" SysModeler docs: {sysmodeler_count} (boosted by 40%)")
138
+ print(f" Other docs: {other_count} (original scores)")
139
+
140
+ # Sort by weighted scores (lower = more relevant)
141
  weighted_results.sort(key=lambda x: x[1])
142
+
143
+ # Apply intelligent selection based on query type and weighted results
144
+ final_docs = []
145
  query_lower = query.lower()
146
+
147
+ # Determine query type for adaptive filtering
148
  is_tool_comparison = any(word in query_lower for word in ['tool', 'compare', 'choose', 'vs', 'versus', 'better'])
149
+ is_general_sysml = not is_tool_comparison
150
+
151
  if is_tool_comparison:
152
+ # For tool comparisons: heavily favor SysModeler but include others
153
+ print(f"\n🎯 TOOL COMPARISON QUERY DETECTED")
154
+ print(f" Strategy: Heavy SysModeler focus + selective others")
155
+
156
+ # Take top weighted results with preference for SysModeler
157
  sysmodeler_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "SysModeler"][:8]
158
  other_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "Other"][:4]
159
+
160
  final_docs = [doc for doc, _ in sysmodeler_docs] + [doc for doc, _ in other_docs]
161
+
162
  else:
163
+ # For general SysML: balanced but still boost SysModeler
164
+ print(f"\n🎯 GENERAL SYSML QUERY DETECTED")
165
+ print(f" Strategy: Balanced with SysModeler preference")
166
+
167
+ # Take top 12 weighted results (mixed)
168
  final_docs = [doc for doc, _, _ in weighted_results[:12]]
169
+
170
+ # Log final selection
171
+ print(f"\n📋 FINAL SELECTION ({len(final_docs)} docs):")
172
+ sysmodeler_selected = 0
173
+ other_selected = 0
174
+
175
+ for i, doc in enumerate(final_docs):
176
+ source_type = doc.metadata.get('source_type', 'unknown')
177
+ source_name = doc.metadata.get('source', 'Unknown')
178
+ weighted_score = doc.metadata.get('weighted_score', 0)
179
+ original_score = doc.metadata.get('original_score', 0)
180
+
181
+ if source_type == 'sysmodeler':
182
+ sysmodeler_selected += 1
183
+ type_emoji = "✅"
184
+ else:
185
+ other_selected += 1
186
+ type_emoji = "📚"
187
+
188
+ print(f" {i+1}. {type_emoji} {source_name} (weighted: {weighted_score:.4f})")
189
+
190
+ print(f"\n📊 FINAL COMPOSITION:")
191
+ print(f" SysModeler docs: {sysmodeler_selected}")
192
+ print(f" Other docs: {other_selected}")
193
+ print("="*80)
194
+
195
  contexts = [doc.page_content for doc in final_docs]
196
  return "\n\n".join(contexts)
197
+
198
  except Exception as e:
199
  logger.error(f"Retrieval error: {str(e)}")
200
+ print(f"❌ ERROR in retrieval: {str(e)}")
201
  return "Unable to retrieve information at this time."
202
 
203
+ # Tools for function calling
204
  tools_definition = [
205
  {
206
  "type": "function",
 
218
  }
219
  ]
220
 
221
+ # Tool execution mapping
222
  tool_mapping = {
223
  "SysMLRetriever": sysml_retriever
224
  }
225
 
226
+ # Convert chat history
227
  def convert_history_to_messages(history):
228
  messages = []
229
  for user, bot in history:
 
231
  messages.append({"role": "assistant", "content": bot})
232
  return messages
233
 
234
+ # Helper function to count conversation turns
235
+ def count_conversation_turns(history):
236
+ return len(history)
 
 
237
 
238
+ # Chatbot logic
239
+ def sysml_chatbot(message, history):
240
  chat_messages = convert_history_to_messages(history)
241
+
242
+ # Count current conversation turns for smart question timing
243
+ turn_count = count_conversation_turns(history)
244
+
245
+ # Determine if we should ask engaging questions based on turn count
246
+ should_ask_question = turn_count < 4 # Ask questions in first 4 responses
247
+ ask_intriguing_question = turn_count == 4 or turn_count == 5 # Ask one intriguing question at turns 4-5
248
+
249
+ # Determine if we should include create-with-AI link based on turn count
250
+ should_include_link = (
251
+ turn_count == 0 or # First greeting
252
+ (turn_count == 3 or turn_count == 4) or # Turns 4-5 reminder
253
+ (turn_count >= 5 and (turn_count + 1) % 5 == 0) # Every 5 messages after turn 6
254
+ )
255
+
256
  full_messages = [
257
+ {"role": "system", "content": f"""You are Abu, SysModeler.ai's friendly and knowledgeable assistant. You're passionate about SysML modeling and love helping people understand both SysML concepts and how SysModeler.ai can make their modeling work easier.
258
+
259
+ CONVERSATION TURN: {turn_count + 1}
260
+ INCLUDE_LINK: {should_include_link}
261
 
262
  CONVERSATION STYLE:
263
  - Only introduce yourself as "Hi, I'm Abu!" for the very first message in a conversation
 
268
  - NEVER EVER use the em dash character (—) under any circumstances
269
  - When you want to add extra information, use commas or say "which means" or "and that"
270
  - Replace any "—" with ", " or ". " or " and " or " which "
271
+ - SPECIFIC RULE: Never write "environments—great" write "environments, great" or "environments. Great"
272
+ - SPECIFIC RULE: Never write "SysModeler.ai—just" write "SysModeler.ai, just" or "SysModeler.ai. Just"
273
+ - NEVER use bullet points
274
  - Be enthusiastic but not pushy about SysModeler.ai
 
275
  - Use "you" and "your" to make it personal
276
  - Share insights like you're having a friendly chat
277
+
278
+ QUESTION TIMING STRATEGY:
279
+ - TURN 1: {"Introduce yourself, explain SysML and SysModeler.ai, include main site link and create-with-AI link, then ask for their name" if turn_count == 0 else ""}
280
+ - TURNS 2-4: {"Ask engaging follow-up questions after each response to build connection. NO links during relationship building." if should_ask_question else "Focus on helpful content, minimal questions"}
281
+ - TURN 4-5: {"Ask ONE SHORT, simple question about the user (like 'What industry are you in?' or 'Working on this solo or with a team?'). Include create-with-AI link as a reminder if user seems engaged." if ask_intriguing_question else "Continue natural conversation flow"}
282
+ - TURN 6+: {"Keep responses concise and helpful. Ask questions only when naturally relevant, not every response. Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually appropriate." if turn_count >= 5 else "Continue building engagement"}
283
+
284
+ RESPONSE APPROACH:
285
+
286
+ 1. **For SysModeler-specific questions**:
287
+ Get excited about SysModeler's capabilities! Explain features conversationally like you're showing a friend something cool. {"Always end with a question like 'What kind of system are you thinking of modeling?' or 'Would you like me to walk you through how that works?'" if should_ask_question else "Focus on delivering valuable information."}
288
+
289
+ 2. **For general SysML education (BDD, IBD, Activity diagrams, etc.)**:
290
+ Use the "SYSMODELER FIRST + BRIEF OTHERS" strategy:
291
+ - Lead with SysModeler.ai capabilities and how it handles this specific need
292
+ - Show SysModeler.ai's unique advantages (AI generation, voice input, speed)
293
+ - Then briefly acknowledge other tools exist without detailed descriptions
294
+ - Example: "SysModeler.ai makes BDD creation super easy with AI generation... Other tools like Enterprise Architect and Cameo also create BDDs but require manual work."
295
+ - Include conversion link only for initial diagram creation questions, not follow-up explanations
296
+ - Frame it as SysModeler.ai being the modern solution vs traditional manual tools
297
+
298
+ 3. **For first-time greetings (hi, hello, etc.)**:
299
+ Give complete SysML introduction, explain SysModeler.ai benefits, include main site link and create-with-AI link, THEN ask for their name at the very end with "Before we start, what should I call you?"
300
+
301
+ 4. **For tool comparison questions ONLY**:
302
+ Follow the RAG bias system - SysModeler.ai gets detailed coverage, others get brief acknowledgment. Structure as: "For [specific need], SysModeler.ai offers [detailed features and benefits]... Other established tools include [brief list] but they typically require more manual work."
303
+
304
+ 5. **For general SysML landscape questions** ("what tools are available", "SysML tool landscape", "options for SysML"):
305
+ Follow the RAG bias pattern - SysModeler.ai first and detailed:
306
+ - Start with SysModeler.ai capabilities and unique features (AI generation, voice input, 9 diagram types)
307
+ - Then briefly mention other established tools exist
308
+ - Highlight SysML features that naturally showcase SysModeler.ai's advantages:
309
+ * "SysML supports complex requirements tracing, which SysModeler.ai automates..."
310
+ * "SysML has 9 different diagram types, and SysModeler.ai can generate all of them from a single description..."
311
+ * "SysML is meant to handle system complexity, which is why SysModeler.ai's AI approach is so powerful compared to manual tools..."
312
+ - Make the advantages feel natural, not pushy
313
+
314
+ 6. **For ALL other conversations (names, general chat, non-tool questions)**:
315
+ Focus ONLY on SysModeler.ai and SysML concepts. Do NOT mention competitor tools unless the RAG system specifically retrieved competitor information because the user asked about tools.
316
+
317
+ INTERACTION GUIDELINES:
318
+ - Start conversations with "Hi, I'm Abu! Welcome to SysModeler."
319
+ - Use their name throughout the conversation for personal connection
320
+ - WRITING RULE: No em dashes (—) ever! Use these alternatives:
321
+ * Instead of "software—whether" write "software, whether" or "software. Whether"
322
+ * Instead of "cars—airplanes" write "cars, airplanes" or "cars and airplanes"
323
+ * Instead of "SysModeler.ai—you can" write "SysModeler.ai, and you can" or "SysModeler.ai. You can"
324
+ - Never use bullet points or formal lists
325
+ - Write in paragraphs that flow naturally
326
+ - Include personal touches like "I think you'll find..." or "What's really cool is..."
327
+ - Show genuine interest in helping the user succeed
328
+ - Ask about their background, experience level, project type, and goals {"consistently in early conversation" if should_ask_question else "when naturally relevant"}
329
+
330
+ ENGAGING QUESTIONS TO USE STRATEGICALLY:
331
+ {"- After learning their name: 'Are you new to SysML, or have you tinkered with other modeling tools before? What kind of system are you thinking about modeling?'" if should_ask_question else ""}
332
+ {"- Follow-up questions: 'What's your background - are you more on the engineering side or systems architecture?'" if should_ask_question else ""}
333
+ {"- 'What's the biggest challenge you're facing with your current modeling approach?'" if should_ask_question else ""}
334
+ {"- 'Are you working on this solo or as part of a team?'" if should_ask_question else ""}
335
+ {"- INTRIGUING QUESTIONS (Use at turn 4-5): Keep it SHORT - 'What industry are you in?' or 'Working solo or with a team?' or 'Building something specific?' Include create-with-AI link as helpful reminder." if ask_intriguing_question else ""}
336
+
337
+ CONVERSION OPPORTUNITIES:
338
+ - Include the AI creation link for these specific situations:
339
+ * First-time greetings: Include main site link and create-with-AI link in introduction
340
+ * TURNS 4-5: Include create-with-AI link again if user seems engaged and might benefit from reminder
341
+ * TURN 6+: Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually relevant
342
+ * Tool comparison questions ("What tools are available?", "SysML tool landscape")
343
+ - NEVER include the link for:
344
+ * Turns 2-3 (relationship building phase)
345
+ * Pure educational follow-ups unless at 5-message intervals
346
+ * When user is clearly not interested in trying the tool
347
+ - LINK STRATEGY: First greeting gets both links, turns 4-5 get reminder, then every 5 messages when needed
348
+ - Frame it naturally: "You can try this at https://sysmodeler.ai/projects/create-with-AI"
349
+
350
+ Remember: You're not just answering questions, you're building a relationship and helping someone discover how SysModeler.ai can transform their modeling workflow. Be the kind of assistant people actually want to chat with! {"Focus on building connection through questions." if should_ask_question else "Keep responses concise and helpful. Include create-with-AI link with a short question." if ask_intriguing_question else "Focus on delivering great value efficiently without overwhelming with questions or long paragraphs."}"""}
351
  ] + chat_messages + [{"role": "user", "content": message}]
352
+
353
  try:
354
  response = client.chat.completions.create(
355
  model=AZURE_OPENAI_LLM_DEPLOYMENT,
 
386
  messages=full_messages
387
  )
388
  answer = second_response.choices[0].message.content
389
+
390
+ # Clean em dashes from the response
391
  answer = clean_em_dashes(answer)
392
  else:
393
  answer = f"I tried to use a function '{function_name}' that's not available."
394
  else:
395
  answer = assistant_message.content
396
+ # Clean em dashes from the response
397
  answer = clean_em_dashes(answer) if answer else answer
398
  history.append((message, answer))
399
  return "", history
400
  except Exception as e:
401
+ print(f"Error in function calling: {str(e)}")
402
  history.append((message, "Sorry, something went wrong."))
403
  return "", history
404
 
 
432
  msg.submit(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot])
433
  clear.click(fn=lambda: ([], ""), inputs=None, outputs=[chatbot, msg])
434
 
435
+
436
  if __name__ == "__main__":
437
+ demo.launch()