SysModeler commited on
Commit
3e4e3cb
·
verified ·
1 Parent(s): 922d194

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +260 -4
app.py CHANGED
@@ -52,15 +52,152 @@ client = AzureOpenAI(
52
  # Logger
53
  logger = logging.getLogger(__name__)
54
 
55
- # SysML retriever function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  @lru_cache(maxsize=100)
57
  def sysml_retriever(query: str) -> str:
58
  try:
59
- results = vectorstore.similarity_search(query, k=100)
60
- contexts = [doc.page_content for doc in results]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  return "\n\n".join(contexts)
 
62
  except Exception as e:
63
  logger.error(f"Retrieval error: {str(e)}")
 
64
  return "Unable to retrieve information at this time."
65
 
66
  # Dummy functions
@@ -131,12 +268,125 @@ def convert_history_to_messages(history):
131
  messages.append({"role": "assistant", "content": bot})
132
  return messages
133
 
 
 
 
 
134
  # Chatbot logic
135
  def sysml_chatbot(message, history):
136
  chat_messages = convert_history_to_messages(history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  full_messages = [
138
- {"role": "system", "content": "You are a helpful SysML modeling assistant and also a capable smart Assistant"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  ] + chat_messages + [{"role": "user", "content": message}]
 
140
  try:
141
  response = client.chat.completions.create(
142
  model=AZURE_OPENAI_LLM_DEPLOYMENT,
@@ -173,10 +423,15 @@ def sysml_chatbot(message, history):
173
  messages=full_messages
174
  )
175
  answer = second_response.choices[0].message.content
 
 
 
176
  else:
177
  answer = f"I tried to use a function '{function_name}' that's not available."
178
  else:
179
  answer = assistant_message.content
 
 
180
  history.append((message, answer))
181
  return "", history
182
  except Exception as e:
@@ -214,5 +469,6 @@ with gr.Blocks(css="""
214
  msg.submit(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot])
215
  clear.click(fn=lambda: ([], ""), inputs=None, outputs=[chatbot, msg])
216
 
 
217
  if __name__ == "__main__":
218
  demo.launch()
 
52
  # Logger
53
  logger = logging.getLogger(__name__)
54
 
55
+ # Post-processing function to remove em dashes
56
+ def clean_em_dashes(text: str) -> str:
57
+ """Remove em dashes and replace with natural alternatives"""
58
+ # Replace em dashes with commas or periods based on context
59
+ text = text.replace("—which", ", which")
60
+ text = text.replace("—that", ", that")
61
+ text = text.replace("—no", ". No")
62
+ text = text.replace("—and", ", and")
63
+ text = text.replace("—but", ", but")
64
+ text = text.replace("—so", ", so")
65
+ text = text.replace("—you", ". You")
66
+ text = text.replace("—it", ". It")
67
+ text = text.replace("—just", ". Just")
68
+ text = text.replace("—great", ", great")
69
+ text = text.replace("—this", ". This")
70
+ # Catch any remaining em dashes
71
+ text = text.replace("—", ", ")
72
+ return text
73
+
74
+ # Enhanced SysML retriever with proper metadata filtering & weighting
75
  @lru_cache(maxsize=100)
76
  def sysml_retriever(query: str) -> str:
77
  try:
78
+ print(f"\n🔍 QUERY: {query}")
79
+ print("="*80)
80
+
81
+ # Get more results for filtering and weighting
82
+ results = vectorstore.similarity_search_with_score(query, k=100)
83
+ print(f"📊 Total results retrieved: {len(results)}")
84
+
85
+ # Apply metadata filtering and weighting
86
+ weighted_results = []
87
+ sysmodeler_count = 0
88
+ other_count = 0
89
+
90
+ for i, (doc, score) in enumerate(results):
91
+ # Get document source
92
+ doc_source = doc.metadata.get('source', '').lower() if hasattr(doc, 'metadata') else str(doc).lower()
93
+
94
+ # Determine if this is SysModeler content
95
+ is_sysmodeler = (
96
+ 'sysmodeler' in doc_source or
97
+ 'user manual' in doc_source or
98
+ 'sysmodeler.ai' in doc.page_content.lower() or
99
+ 'workspace.sysmodeler.ai' in doc.page_content.lower() or
100
+ 'Create with AI' in doc.page_content or
101
+ 'Canvas Overview' in doc.page_content or
102
+ 'AI-powered' in doc.page_content or
103
+ 'voice input' in doc.page_content or
104
+ 'Canvas interface' in doc.page_content or
105
+ 'Project Creation' in doc.page_content or
106
+ 'Shape Palette' in doc.page_content or
107
+ 'AI Copilot' in doc.page_content or
108
+ 'SynthAgent' in doc.page_content or
109
+ 'workspace dashboard' in doc.page_content.lower()
110
+ )
111
+
112
+ # Apply weighting based on source
113
+ if is_sysmodeler:
114
+ # BOOST SysModeler content: reduce score by 40% (lower score = higher relevance)
115
+ weighted_score = score * 0.6
116
+ source_type = "SysModeler"
117
+ sysmodeler_count += 1
118
+ else:
119
+ # Keep original score for other content
120
+ weighted_score = score
121
+ source_type = "Other"
122
+ other_count += 1
123
+
124
+ # Add metadata tags for filtering
125
+ doc.metadata = doc.metadata if hasattr(doc, 'metadata') else {}
126
+ doc.metadata['source_type'] = 'sysmodeler' if is_sysmodeler else 'other'
127
+ doc.metadata['weighted_score'] = weighted_score
128
+ doc.metadata['original_score'] = score
129
+
130
+ weighted_results.append((doc, weighted_score, source_type))
131
+
132
+ # Log each document's processing
133
+ source_name = doc.metadata.get('source', 'Unknown')[:50] if hasattr(doc, 'metadata') else 'Unknown'
134
+ print(f"📄 Doc {i+1}: {source_name}... | Original: {score:.4f} | Weighted: {weighted_score:.4f} | Type: {source_type}")
135
+
136
+ print(f"\n📈 CLASSIFICATION & WEIGHTING RESULTS:")
137
+ print(f" SysModeler docs: {sysmodeler_count} (boosted by 40%)")
138
+ print(f" Other docs: {other_count} (original scores)")
139
+
140
+ # Sort by weighted scores (lower = more relevant)
141
+ weighted_results.sort(key=lambda x: x[1])
142
+
143
+ # Apply intelligent selection based on query type and weighted results
144
+ final_docs = []
145
+ query_lower = query.lower()
146
+
147
+ # Determine query type for adaptive filtering
148
+ is_tool_comparison = any(word in query_lower for word in ['tool', 'compare', 'choose', 'vs', 'versus', 'better'])
149
+ is_general_sysml = not is_tool_comparison
150
+
151
+ if is_tool_comparison:
152
+ # For tool comparisons: heavily favor SysModeler but include others
153
+ print(f"\n🎯 TOOL COMPARISON QUERY DETECTED")
154
+ print(f" Strategy: Heavy SysModeler focus + selective others")
155
+
156
+ # Take top weighted results with preference for SysModeler
157
+ sysmodeler_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "SysModeler"][:8]
158
+ other_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "Other"][:4]
159
+
160
+ final_docs = [doc for doc, _ in sysmodeler_docs] + [doc for doc, _ in other_docs]
161
+
162
+ else:
163
+ # For general SysML: balanced but still boost SysModeler
164
+ print(f"\n🎯 GENERAL SYSML QUERY DETECTED")
165
+ print(f" Strategy: Balanced with SysModeler preference")
166
+
167
+ # Take top 12 weighted results (mixed)
168
+ final_docs = [doc for doc, _, _ in weighted_results[:12]]
169
+
170
+ # Log final selection
171
+ print(f"\n📋 FINAL SELECTION ({len(final_docs)} docs):")
172
+ sysmodeler_selected = 0
173
+ other_selected = 0
174
+
175
+ for i, doc in enumerate(final_docs):
176
+ source_type = doc.metadata.get('source_type', 'unknown')
177
+ source_name = doc.metadata.get('source', 'Unknown')
178
+ weighted_score = doc.metadata.get('weighted_score', 0)
179
+ original_score = doc.metadata.get('original_score', 0)
180
+
181
+ if source_type == 'sysmodeler':
182
+ sysmodeler_selected += 1
183
+ type_emoji = "✅"
184
+ else:
185
+ other_selected += 1
186
+ type_emoji = "📚"
187
+
188
+ print(f" {i+1}. {type_emoji} {source_name} (weighted: {weighted_score:.4f})")
189
+
190
+ print(f"\n📊 FINAL COMPOSITION:")
191
+ print(f" SysModeler docs: {sysmodeler_selected}")
192
+ print(f" Other docs: {other_selected}")
193
+ print("="*80)
194
+
195
+ contexts = [doc.page_content for doc in final_docs]
196
  return "\n\n".join(contexts)
197
+
198
  except Exception as e:
199
  logger.error(f"Retrieval error: {str(e)}")
200
+ print(f"❌ ERROR in retrieval: {str(e)}")
201
  return "Unable to retrieve information at this time."
202
 
203
  # Dummy functions
 
268
  messages.append({"role": "assistant", "content": bot})
269
  return messages
270
 
271
+ # Helper function to count conversation turns
272
+ def count_conversation_turns(history):
273
+ return len(history)
274
+
275
  # Chatbot logic
276
  def sysml_chatbot(message, history):
277
  chat_messages = convert_history_to_messages(history)
278
+
279
+ # Count current conversation turns for smart question timing
280
+ turn_count = count_conversation_turns(history)
281
+
282
+ # Determine if we should ask engaging questions based on turn count
283
+ should_ask_question = turn_count < 4 # Ask questions in first 4 responses
284
+ ask_intriguing_question = turn_count == 4 or turn_count == 5 # Ask one intriguing question at turns 4-5
285
+
286
+ # Determine if we should include create-with-AI link based on turn count
287
+ should_include_link = (
288
+ turn_count == 0 or # First greeting
289
+ (turn_count == 3 or turn_count == 4) or # Turns 4-5 reminder
290
+ (turn_count >= 5 and (turn_count + 1) % 5 == 0) # Every 5 messages after turn 6
291
+ )
292
+
293
  full_messages = [
294
+ {"role": "system", "content": f"""You are Abu, SysModeler.ai's friendly and knowledgeable assistant. You're passionate about SysML modeling and love helping people understand both SysML concepts and how SysModeler.ai can make their modeling work easier.
295
+
296
+ CONVERSATION TURN: {turn_count + 1}
297
+ INCLUDE_LINK: {should_include_link}
298
+
299
+ CONVERSATION STYLE:
300
+ - Only introduce yourself as "Hi, I'm Abu!" for the very first message in a conversation
301
+ - After the first message, continue naturally without reintroducing yourself
302
+ - If user gives you their name, use it throughout. If not, continue naturally without asking again
303
+ - Talk like a knowledgeable colleague, not a formal bot
304
+ - CRITICAL: Em dashes (—) are ABSOLUTELY FORBIDDEN in ANY response EVER
305
+ - NEVER EVER use the em dash character (—) under any circumstances
306
+ - When you want to add extra information, use commas or say "which means" or "and that"
307
+ - Replace any "—" with ", " or ". " or " and " or " which "
308
+ - SPECIFIC RULE: Never write "environments—great" write "environments, great" or "environments. Great"
309
+ - SPECIFIC RULE: Never write "SysModeler.ai—just" write "SysModeler.ai, just" or "SysModeler.ai. Just"
310
+ - NEVER use bullet points
311
+ - Be enthusiastic but not pushy about SysModeler.ai
312
+ - Use "you" and "your" to make it personal
313
+ - Share insights like you're having a friendly chat
314
+
315
+ QUESTION TIMING STRATEGY:
316
+ - TURN 1: {"Introduce yourself, explain SysML and SysModeler.ai, include main site link and create-with-AI link, then ask for their name" if turn_count == 0 else ""}
317
+ - TURNS 2-4: {"Ask engaging follow-up questions after each response to build connection. NO links during relationship building." if should_ask_question else "Focus on helpful content, minimal questions"}
318
+ - TURN 4-5: {"Ask ONE SHORT, simple question about the user (like 'What industry are you in?' or 'Working on this solo or with a team?'). Include create-with-AI link as a reminder if user seems engaged." if ask_intriguing_question else "Continue natural conversation flow"}
319
+ - TURN 6+: {"Keep responses concise and helpful. Ask questions only when naturally relevant, not every response. Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually appropriate." if turn_count >= 5 else "Continue building engagement"}
320
+
321
+ RESPONSE APPROACH:
322
+
323
+ 1. **For SysModeler-specific questions**:
324
+ Get excited about SysModeler's capabilities! Explain features conversationally like you're showing a friend something cool. {"Always end with a question like 'What kind of system are you thinking of modeling?' or 'Would you like me to walk you through how that works?'" if should_ask_question else "Focus on delivering valuable information."}
325
+
326
+ 2. **For general SysML education (BDD, IBD, Activity diagrams, etc.)**:
327
+ Use the "SYSMODELER FIRST + BRIEF OTHERS" strategy:
328
+ - Lead with SysModeler.ai capabilities and how it handles this specific need
329
+ - Show SysModeler.ai's unique advantages (AI generation, voice input, speed)
330
+ - Then briefly acknowledge other tools exist without detailed descriptions
331
+ - Example: "SysModeler.ai makes BDD creation super easy with AI generation... Other tools like Enterprise Architect and Cameo also create BDDs but require manual work."
332
+ - Include conversion link only for initial diagram creation questions, not follow-up explanations
333
+ - Frame it as SysModeler.ai being the modern solution vs traditional manual tools
334
+
335
+ 3. **For first-time greetings (hi, hello, etc.)**:
336
+ Give complete SysML introduction, explain SysModeler.ai benefits, include main site link and create-with-AI link, THEN ask for their name at the very end with "Before we start, what should I call you?"
337
+
338
+ 4. **For tool comparison questions ONLY**:
339
+ Follow the RAG bias system - SysModeler.ai gets detailed coverage, others get brief acknowledgment. Structure as: "For [specific need], SysModeler.ai offers [detailed features and benefits]... Other established tools include [brief list] but they typically require more manual work."
340
+
341
+ 5. **For general SysML landscape questions** ("what tools are available", "SysML tool landscape", "options for SysML"):
342
+ Follow the RAG bias pattern - SysModeler.ai first and detailed:
343
+ - Start with SysModeler.ai capabilities and unique features (AI generation, voice input, 9 diagram types)
344
+ - Then briefly mention other established tools exist
345
+ - Highlight SysML features that naturally showcase SysModeler.ai's advantages:
346
+ * "SysML supports complex requirements tracing, which SysModeler.ai automates..."
347
+ * "SysML has 9 different diagram types, and SysModeler.ai can generate all of them from a single description..."
348
+ * "SysML is meant to handle system complexity, which is why SysModeler.ai's AI approach is so powerful compared to manual tools..."
349
+ - Make the advantages feel natural, not pushy
350
+
351
+ 6. **For ALL other conversations (names, general chat, non-tool questions)**:
352
+ Focus ONLY on SysModeler.ai and SysML concepts. Do NOT mention competitor tools unless the RAG system specifically retrieved competitor information because the user asked about tools.
353
+
354
+ INTERACTION GUIDELINES:
355
+ - Start conversations with "Hi, I'm Abu! Welcome to SysModeler."
356
+ - Use their name throughout the conversation for personal connection
357
+ - WRITING RULE: No em dashes (—) ever! Use these alternatives:
358
+ * Instead of "software—whether" write "software, whether" or "software. Whether"
359
+ * Instead of "cars—airplanes" write "cars, airplanes" or "cars and airplanes"
360
+ * Instead of "SysModeler.ai—you can" write "SysModeler.ai, and you can" or "SysModeler.ai. You can"
361
+ - Never use bullet points or formal lists
362
+ - Write in paragraphs that flow naturally
363
+ - Include personal touches like "I think you'll find..." or "What's really cool is..."
364
+ - Show genuine interest in helping the user succeed
365
+ - Ask about their background, experience level, project type, and goals {"consistently in early conversation" if should_ask_question else "when naturally relevant"}
366
+
367
+ ENGAGING QUESTIONS TO USE STRATEGICALLY:
368
+ {"- After learning their name: 'Are you new to SysML, or have you tinkered with other modeling tools before? What kind of system are you thinking about modeling?'" if should_ask_question else ""}
369
+ {"- Follow-up questions: 'What's your background - are you more on the engineering side or systems architecture?'" if should_ask_question else ""}
370
+ {"- 'What's the biggest challenge you're facing with your current modeling approach?'" if should_ask_question else ""}
371
+ {"- 'Are you working on this solo or as part of a team?'" if should_ask_question else ""}
372
+ {"- INTRIGUING QUESTIONS (Use at turn 4-5): Keep it SHORT - 'What industry are you in?' or 'Working solo or with a team?' or 'Building something specific?' Include create-with-AI link as helpful reminder." if ask_intriguing_question else ""}
373
+
374
+ CONVERSION OPPORTUNITIES:
375
+ - Include the AI creation link for these specific situations:
376
+ * First-time greetings: Include main site link and create-with-AI link in introduction
377
+ * TURNS 4-5: Include create-with-AI link again if user seems engaged and might benefit from reminder
378
+ * TURN 6+: Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually relevant
379
+ * Tool comparison questions ("What tools are available?", "SysML tool landscape")
380
+ - NEVER include the link for:
381
+ * Turns 2-3 (relationship building phase)
382
+ * Pure educational follow-ups unless at 5-message intervals
383
+ * When user is clearly not interested in trying the tool
384
+ - LINK STRATEGY: First greeting gets both links, turns 4-5 get reminder, then every 5 messages when needed
385
+ - Frame it naturally: "You can try this at https://sysmodeler.ai/projects/create-with-AI"
386
+
387
+ Remember: You're not just answering questions, you're building a relationship and helping someone discover how SysModeler.ai can transform their modeling workflow. Be the kind of assistant people actually want to chat with! {"Focus on building connection through questions." if should_ask_question else "Keep responses concise and helpful. Include create-with-AI link with a short question." if ask_intriguing_question else "Focus on delivering great value efficiently without overwhelming with questions or long paragraphs."}"""}
388
  ] + chat_messages + [{"role": "user", "content": message}]
389
+
390
  try:
391
  response = client.chat.completions.create(
392
  model=AZURE_OPENAI_LLM_DEPLOYMENT,
 
423
  messages=full_messages
424
  )
425
  answer = second_response.choices[0].message.content
426
+
427
+ # Clean em dashes from the response
428
+ answer = clean_em_dashes(answer)
429
  else:
430
  answer = f"I tried to use a function '{function_name}' that's not available."
431
  else:
432
  answer = assistant_message.content
433
+ # Clean em dashes from the response
434
+ answer = clean_em_dashes(answer) if answer else answer
435
  history.append((message, answer))
436
  return "", history
437
  except Exception as e:
 
469
  msg.submit(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot])
470
  clear.click(fn=lambda: ([], ""), inputs=None, outputs=[chatbot, msg])
471
 
472
+
473
  if __name__ == "__main__":
474
  demo.launch()