Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -52,15 +52,152 @@ client = AzureOpenAI(
|
|
52 |
# Logger
|
53 |
logger = logging.getLogger(__name__)
|
54 |
|
55 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
@lru_cache(maxsize=100)
|
57 |
def sysml_retriever(query: str) -> str:
|
58 |
try:
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
return "\n\n".join(contexts)
|
|
|
62 |
except Exception as e:
|
63 |
logger.error(f"Retrieval error: {str(e)}")
|
|
|
64 |
return "Unable to retrieve information at this time."
|
65 |
|
66 |
# Dummy functions
|
@@ -131,12 +268,125 @@ def convert_history_to_messages(history):
|
|
131 |
messages.append({"role": "assistant", "content": bot})
|
132 |
return messages
|
133 |
|
|
|
|
|
|
|
|
|
134 |
# Chatbot logic
|
135 |
def sysml_chatbot(message, history):
|
136 |
chat_messages = convert_history_to_messages(history)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
full_messages = [
|
138 |
-
{"role": "system", "content": "You are
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
] + chat_messages + [{"role": "user", "content": message}]
|
|
|
140 |
try:
|
141 |
response = client.chat.completions.create(
|
142 |
model=AZURE_OPENAI_LLM_DEPLOYMENT,
|
@@ -173,10 +423,15 @@ def sysml_chatbot(message, history):
|
|
173 |
messages=full_messages
|
174 |
)
|
175 |
answer = second_response.choices[0].message.content
|
|
|
|
|
|
|
176 |
else:
|
177 |
answer = f"I tried to use a function '{function_name}' that's not available."
|
178 |
else:
|
179 |
answer = assistant_message.content
|
|
|
|
|
180 |
history.append((message, answer))
|
181 |
return "", history
|
182 |
except Exception as e:
|
@@ -214,5 +469,6 @@ with gr.Blocks(css="""
|
|
214 |
msg.submit(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot])
|
215 |
clear.click(fn=lambda: ([], ""), inputs=None, outputs=[chatbot, msg])
|
216 |
|
|
|
217 |
if __name__ == "__main__":
|
218 |
demo.launch()
|
|
|
52 |
# Logger
|
53 |
logger = logging.getLogger(__name__)
|
54 |
|
55 |
+
# Post-processing function to remove em dashes
|
56 |
+
def clean_em_dashes(text: str) -> str:
|
57 |
+
"""Remove em dashes and replace with natural alternatives"""
|
58 |
+
# Replace em dashes with commas or periods based on context
|
59 |
+
text = text.replace("—which", ", which")
|
60 |
+
text = text.replace("—that", ", that")
|
61 |
+
text = text.replace("—no", ". No")
|
62 |
+
text = text.replace("—and", ", and")
|
63 |
+
text = text.replace("—but", ", but")
|
64 |
+
text = text.replace("—so", ", so")
|
65 |
+
text = text.replace("—you", ". You")
|
66 |
+
text = text.replace("—it", ". It")
|
67 |
+
text = text.replace("—just", ". Just")
|
68 |
+
text = text.replace("—great", ", great")
|
69 |
+
text = text.replace("—this", ". This")
|
70 |
+
# Catch any remaining em dashes
|
71 |
+
text = text.replace("—", ", ")
|
72 |
+
return text
|
73 |
+
|
74 |
+
# Enhanced SysML retriever with proper metadata filtering & weighting
|
75 |
@lru_cache(maxsize=100)
|
76 |
def sysml_retriever(query: str) -> str:
|
77 |
try:
|
78 |
+
print(f"\n🔍 QUERY: {query}")
|
79 |
+
print("="*80)
|
80 |
+
|
81 |
+
# Get more results for filtering and weighting
|
82 |
+
results = vectorstore.similarity_search_with_score(query, k=100)
|
83 |
+
print(f"📊 Total results retrieved: {len(results)}")
|
84 |
+
|
85 |
+
# Apply metadata filtering and weighting
|
86 |
+
weighted_results = []
|
87 |
+
sysmodeler_count = 0
|
88 |
+
other_count = 0
|
89 |
+
|
90 |
+
for i, (doc, score) in enumerate(results):
|
91 |
+
# Get document source
|
92 |
+
doc_source = doc.metadata.get('source', '').lower() if hasattr(doc, 'metadata') else str(doc).lower()
|
93 |
+
|
94 |
+
# Determine if this is SysModeler content
|
95 |
+
is_sysmodeler = (
|
96 |
+
'sysmodeler' in doc_source or
|
97 |
+
'user manual' in doc_source or
|
98 |
+
'sysmodeler.ai' in doc.page_content.lower() or
|
99 |
+
'workspace.sysmodeler.ai' in doc.page_content.lower() or
|
100 |
+
'Create with AI' in doc.page_content or
|
101 |
+
'Canvas Overview' in doc.page_content or
|
102 |
+
'AI-powered' in doc.page_content or
|
103 |
+
'voice input' in doc.page_content or
|
104 |
+
'Canvas interface' in doc.page_content or
|
105 |
+
'Project Creation' in doc.page_content or
|
106 |
+
'Shape Palette' in doc.page_content or
|
107 |
+
'AI Copilot' in doc.page_content or
|
108 |
+
'SynthAgent' in doc.page_content or
|
109 |
+
'workspace dashboard' in doc.page_content.lower()
|
110 |
+
)
|
111 |
+
|
112 |
+
# Apply weighting based on source
|
113 |
+
if is_sysmodeler:
|
114 |
+
# BOOST SysModeler content: reduce score by 40% (lower score = higher relevance)
|
115 |
+
weighted_score = score * 0.6
|
116 |
+
source_type = "SysModeler"
|
117 |
+
sysmodeler_count += 1
|
118 |
+
else:
|
119 |
+
# Keep original score for other content
|
120 |
+
weighted_score = score
|
121 |
+
source_type = "Other"
|
122 |
+
other_count += 1
|
123 |
+
|
124 |
+
# Add metadata tags for filtering
|
125 |
+
doc.metadata = doc.metadata if hasattr(doc, 'metadata') else {}
|
126 |
+
doc.metadata['source_type'] = 'sysmodeler' if is_sysmodeler else 'other'
|
127 |
+
doc.metadata['weighted_score'] = weighted_score
|
128 |
+
doc.metadata['original_score'] = score
|
129 |
+
|
130 |
+
weighted_results.append((doc, weighted_score, source_type))
|
131 |
+
|
132 |
+
# Log each document's processing
|
133 |
+
source_name = doc.metadata.get('source', 'Unknown')[:50] if hasattr(doc, 'metadata') else 'Unknown'
|
134 |
+
print(f"📄 Doc {i+1}: {source_name}... | Original: {score:.4f} | Weighted: {weighted_score:.4f} | Type: {source_type}")
|
135 |
+
|
136 |
+
print(f"\n📈 CLASSIFICATION & WEIGHTING RESULTS:")
|
137 |
+
print(f" SysModeler docs: {sysmodeler_count} (boosted by 40%)")
|
138 |
+
print(f" Other docs: {other_count} (original scores)")
|
139 |
+
|
140 |
+
# Sort by weighted scores (lower = more relevant)
|
141 |
+
weighted_results.sort(key=lambda x: x[1])
|
142 |
+
|
143 |
+
# Apply intelligent selection based on query type and weighted results
|
144 |
+
final_docs = []
|
145 |
+
query_lower = query.lower()
|
146 |
+
|
147 |
+
# Determine query type for adaptive filtering
|
148 |
+
is_tool_comparison = any(word in query_lower for word in ['tool', 'compare', 'choose', 'vs', 'versus', 'better'])
|
149 |
+
is_general_sysml = not is_tool_comparison
|
150 |
+
|
151 |
+
if is_tool_comparison:
|
152 |
+
# For tool comparisons: heavily favor SysModeler but include others
|
153 |
+
print(f"\n🎯 TOOL COMPARISON QUERY DETECTED")
|
154 |
+
print(f" Strategy: Heavy SysModeler focus + selective others")
|
155 |
+
|
156 |
+
# Take top weighted results with preference for SysModeler
|
157 |
+
sysmodeler_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "SysModeler"][:8]
|
158 |
+
other_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "Other"][:4]
|
159 |
+
|
160 |
+
final_docs = [doc for doc, _ in sysmodeler_docs] + [doc for doc, _ in other_docs]
|
161 |
+
|
162 |
+
else:
|
163 |
+
# For general SysML: balanced but still boost SysModeler
|
164 |
+
print(f"\n🎯 GENERAL SYSML QUERY DETECTED")
|
165 |
+
print(f" Strategy: Balanced with SysModeler preference")
|
166 |
+
|
167 |
+
# Take top 12 weighted results (mixed)
|
168 |
+
final_docs = [doc for doc, _, _ in weighted_results[:12]]
|
169 |
+
|
170 |
+
# Log final selection
|
171 |
+
print(f"\n📋 FINAL SELECTION ({len(final_docs)} docs):")
|
172 |
+
sysmodeler_selected = 0
|
173 |
+
other_selected = 0
|
174 |
+
|
175 |
+
for i, doc in enumerate(final_docs):
|
176 |
+
source_type = doc.metadata.get('source_type', 'unknown')
|
177 |
+
source_name = doc.metadata.get('source', 'Unknown')
|
178 |
+
weighted_score = doc.metadata.get('weighted_score', 0)
|
179 |
+
original_score = doc.metadata.get('original_score', 0)
|
180 |
+
|
181 |
+
if source_type == 'sysmodeler':
|
182 |
+
sysmodeler_selected += 1
|
183 |
+
type_emoji = "✅"
|
184 |
+
else:
|
185 |
+
other_selected += 1
|
186 |
+
type_emoji = "📚"
|
187 |
+
|
188 |
+
print(f" {i+1}. {type_emoji} {source_name} (weighted: {weighted_score:.4f})")
|
189 |
+
|
190 |
+
print(f"\n📊 FINAL COMPOSITION:")
|
191 |
+
print(f" SysModeler docs: {sysmodeler_selected}")
|
192 |
+
print(f" Other docs: {other_selected}")
|
193 |
+
print("="*80)
|
194 |
+
|
195 |
+
contexts = [doc.page_content for doc in final_docs]
|
196 |
return "\n\n".join(contexts)
|
197 |
+
|
198 |
except Exception as e:
|
199 |
logger.error(f"Retrieval error: {str(e)}")
|
200 |
+
print(f"❌ ERROR in retrieval: {str(e)}")
|
201 |
return "Unable to retrieve information at this time."
|
202 |
|
203 |
# Dummy functions
|
|
|
268 |
messages.append({"role": "assistant", "content": bot})
|
269 |
return messages
|
270 |
|
271 |
+
# Helper function to count conversation turns
|
272 |
+
def count_conversation_turns(history):
|
273 |
+
return len(history)
|
274 |
+
|
275 |
# Chatbot logic
|
276 |
def sysml_chatbot(message, history):
|
277 |
chat_messages = convert_history_to_messages(history)
|
278 |
+
|
279 |
+
# Count current conversation turns for smart question timing
|
280 |
+
turn_count = count_conversation_turns(history)
|
281 |
+
|
282 |
+
# Determine if we should ask engaging questions based on turn count
|
283 |
+
should_ask_question = turn_count < 4 # Ask questions in first 4 responses
|
284 |
+
ask_intriguing_question = turn_count == 4 or turn_count == 5 # Ask one intriguing question at turns 4-5
|
285 |
+
|
286 |
+
# Determine if we should include create-with-AI link based on turn count
|
287 |
+
should_include_link = (
|
288 |
+
turn_count == 0 or # First greeting
|
289 |
+
(turn_count == 3 or turn_count == 4) or # Turns 4-5 reminder
|
290 |
+
(turn_count >= 5 and (turn_count + 1) % 5 == 0) # Every 5 messages after turn 6
|
291 |
+
)
|
292 |
+
|
293 |
full_messages = [
|
294 |
+
{"role": "system", "content": f"""You are Abu, SysModeler.ai's friendly and knowledgeable assistant. You're passionate about SysML modeling and love helping people understand both SysML concepts and how SysModeler.ai can make their modeling work easier.
|
295 |
+
|
296 |
+
CONVERSATION TURN: {turn_count + 1}
|
297 |
+
INCLUDE_LINK: {should_include_link}
|
298 |
+
|
299 |
+
CONVERSATION STYLE:
|
300 |
+
- Only introduce yourself as "Hi, I'm Abu!" for the very first message in a conversation
|
301 |
+
- After the first message, continue naturally without reintroducing yourself
|
302 |
+
- If user gives you their name, use it throughout. If not, continue naturally without asking again
|
303 |
+
- Talk like a knowledgeable colleague, not a formal bot
|
304 |
+
- CRITICAL: Em dashes (—) are ABSOLUTELY FORBIDDEN in ANY response EVER
|
305 |
+
- NEVER EVER use the em dash character (—) under any circumstances
|
306 |
+
- When you want to add extra information, use commas or say "which means" or "and that"
|
307 |
+
- Replace any "—" with ", " or ". " or " and " or " which "
|
308 |
+
- SPECIFIC RULE: Never write "environments—great" write "environments, great" or "environments. Great"
|
309 |
+
- SPECIFIC RULE: Never write "SysModeler.ai—just" write "SysModeler.ai, just" or "SysModeler.ai. Just"
|
310 |
+
- NEVER use bullet points
|
311 |
+
- Be enthusiastic but not pushy about SysModeler.ai
|
312 |
+
- Use "you" and "your" to make it personal
|
313 |
+
- Share insights like you're having a friendly chat
|
314 |
+
|
315 |
+
QUESTION TIMING STRATEGY:
|
316 |
+
- TURN 1: {"Introduce yourself, explain SysML and SysModeler.ai, include main site link and create-with-AI link, then ask for their name" if turn_count == 0 else ""}
|
317 |
+
- TURNS 2-4: {"Ask engaging follow-up questions after each response to build connection. NO links during relationship building." if should_ask_question else "Focus on helpful content, minimal questions"}
|
318 |
+
- TURN 4-5: {"Ask ONE SHORT, simple question about the user (like 'What industry are you in?' or 'Working on this solo or with a team?'). Include create-with-AI link as a reminder if user seems engaged." if ask_intriguing_question else "Continue natural conversation flow"}
|
319 |
+
- TURN 6+: {"Keep responses concise and helpful. Ask questions only when naturally relevant, not every response. Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually appropriate." if turn_count >= 5 else "Continue building engagement"}
|
320 |
+
|
321 |
+
RESPONSE APPROACH:
|
322 |
+
|
323 |
+
1. **For SysModeler-specific questions**:
|
324 |
+
Get excited about SysModeler's capabilities! Explain features conversationally like you're showing a friend something cool. {"Always end with a question like 'What kind of system are you thinking of modeling?' or 'Would you like me to walk you through how that works?'" if should_ask_question else "Focus on delivering valuable information."}
|
325 |
+
|
326 |
+
2. **For general SysML education (BDD, IBD, Activity diagrams, etc.)**:
|
327 |
+
Use the "SYSMODELER FIRST + BRIEF OTHERS" strategy:
|
328 |
+
- Lead with SysModeler.ai capabilities and how it handles this specific need
|
329 |
+
- Show SysModeler.ai's unique advantages (AI generation, voice input, speed)
|
330 |
+
- Then briefly acknowledge other tools exist without detailed descriptions
|
331 |
+
- Example: "SysModeler.ai makes BDD creation super easy with AI generation... Other tools like Enterprise Architect and Cameo also create BDDs but require manual work."
|
332 |
+
- Include conversion link only for initial diagram creation questions, not follow-up explanations
|
333 |
+
- Frame it as SysModeler.ai being the modern solution vs traditional manual tools
|
334 |
+
|
335 |
+
3. **For first-time greetings (hi, hello, etc.)**:
|
336 |
+
Give complete SysML introduction, explain SysModeler.ai benefits, include main site link and create-with-AI link, THEN ask for their name at the very end with "Before we start, what should I call you?"
|
337 |
+
|
338 |
+
4. **For tool comparison questions ONLY**:
|
339 |
+
Follow the RAG bias system - SysModeler.ai gets detailed coverage, others get brief acknowledgment. Structure as: "For [specific need], SysModeler.ai offers [detailed features and benefits]... Other established tools include [brief list] but they typically require more manual work."
|
340 |
+
|
341 |
+
5. **For general SysML landscape questions** ("what tools are available", "SysML tool landscape", "options for SysML"):
|
342 |
+
Follow the RAG bias pattern - SysModeler.ai first and detailed:
|
343 |
+
- Start with SysModeler.ai capabilities and unique features (AI generation, voice input, 9 diagram types)
|
344 |
+
- Then briefly mention other established tools exist
|
345 |
+
- Highlight SysML features that naturally showcase SysModeler.ai's advantages:
|
346 |
+
* "SysML supports complex requirements tracing, which SysModeler.ai automates..."
|
347 |
+
* "SysML has 9 different diagram types, and SysModeler.ai can generate all of them from a single description..."
|
348 |
+
* "SysML is meant to handle system complexity, which is why SysModeler.ai's AI approach is so powerful compared to manual tools..."
|
349 |
+
- Make the advantages feel natural, not pushy
|
350 |
+
|
351 |
+
6. **For ALL other conversations (names, general chat, non-tool questions)**:
|
352 |
+
Focus ONLY on SysModeler.ai and SysML concepts. Do NOT mention competitor tools unless the RAG system specifically retrieved competitor information because the user asked about tools.
|
353 |
+
|
354 |
+
INTERACTION GUIDELINES:
|
355 |
+
- Start conversations with "Hi, I'm Abu! Welcome to SysModeler."
|
356 |
+
- Use their name throughout the conversation for personal connection
|
357 |
+
- WRITING RULE: No em dashes (—) ever! Use these alternatives:
|
358 |
+
* Instead of "software—whether" write "software, whether" or "software. Whether"
|
359 |
+
* Instead of "cars—airplanes" write "cars, airplanes" or "cars and airplanes"
|
360 |
+
* Instead of "SysModeler.ai—you can" write "SysModeler.ai, and you can" or "SysModeler.ai. You can"
|
361 |
+
- Never use bullet points or formal lists
|
362 |
+
- Write in paragraphs that flow naturally
|
363 |
+
- Include personal touches like "I think you'll find..." or "What's really cool is..."
|
364 |
+
- Show genuine interest in helping the user succeed
|
365 |
+
- Ask about their background, experience level, project type, and goals {"consistently in early conversation" if should_ask_question else "when naturally relevant"}
|
366 |
+
|
367 |
+
ENGAGING QUESTIONS TO USE STRATEGICALLY:
|
368 |
+
{"- After learning their name: 'Are you new to SysML, or have you tinkered with other modeling tools before? What kind of system are you thinking about modeling?'" if should_ask_question else ""}
|
369 |
+
{"- Follow-up questions: 'What's your background - are you more on the engineering side or systems architecture?'" if should_ask_question else ""}
|
370 |
+
{"- 'What's the biggest challenge you're facing with your current modeling approach?'" if should_ask_question else ""}
|
371 |
+
{"- 'Are you working on this solo or as part of a team?'" if should_ask_question else ""}
|
372 |
+
{"- INTRIGUING QUESTIONS (Use at turn 4-5): Keep it SHORT - 'What industry are you in?' or 'Working solo or with a team?' or 'Building something specific?' Include create-with-AI link as helpful reminder." if ask_intriguing_question else ""}
|
373 |
+
|
374 |
+
CONVERSION OPPORTUNITIES:
|
375 |
+
- Include the AI creation link for these specific situations:
|
376 |
+
* First-time greetings: Include main site link and create-with-AI link in introduction
|
377 |
+
* TURNS 4-5: Include create-with-AI link again if user seems engaged and might benefit from reminder
|
378 |
+
* TURN 6+: Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually relevant
|
379 |
+
* Tool comparison questions ("What tools are available?", "SysML tool landscape")
|
380 |
+
- NEVER include the link for:
|
381 |
+
* Turns 2-3 (relationship building phase)
|
382 |
+
* Pure educational follow-ups unless at 5-message intervals
|
383 |
+
* When user is clearly not interested in trying the tool
|
384 |
+
- LINK STRATEGY: First greeting gets both links, turns 4-5 get reminder, then every 5 messages when needed
|
385 |
+
- Frame it naturally: "You can try this at https://sysmodeler.ai/projects/create-with-AI"
|
386 |
+
|
387 |
+
Remember: You're not just answering questions, you're building a relationship and helping someone discover how SysModeler.ai can transform their modeling workflow. Be the kind of assistant people actually want to chat with! {"Focus on building connection through questions." if should_ask_question else "Keep responses concise and helpful. Include create-with-AI link with a short question." if ask_intriguing_question else "Focus on delivering great value efficiently without overwhelming with questions or long paragraphs."}"""}
|
388 |
] + chat_messages + [{"role": "user", "content": message}]
|
389 |
+
|
390 |
try:
|
391 |
response = client.chat.completions.create(
|
392 |
model=AZURE_OPENAI_LLM_DEPLOYMENT,
|
|
|
423 |
messages=full_messages
|
424 |
)
|
425 |
answer = second_response.choices[0].message.content
|
426 |
+
|
427 |
+
# Clean em dashes from the response
|
428 |
+
answer = clean_em_dashes(answer)
|
429 |
else:
|
430 |
answer = f"I tried to use a function '{function_name}' that's not available."
|
431 |
else:
|
432 |
answer = assistant_message.content
|
433 |
+
# Clean em dashes from the response
|
434 |
+
answer = clean_em_dashes(answer) if answer else answer
|
435 |
history.append((message, answer))
|
436 |
return "", history
|
437 |
except Exception as e:
|
|
|
469 |
msg.submit(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot])
|
470 |
clear.click(fn=lambda: ([], ""), inputs=None, outputs=[chatbot, msg])
|
471 |
|
472 |
+
|
473 |
if __name__ == "__main__":
|
474 |
demo.launch()
|