Spaces:
Sleeping
Sleeping
File size: 21,689 Bytes
fc3a249 6b1b4f6 cd9520a 6b1b4f6 fc3a249 cd9520a fc3a249 6b1b4f6 cd9520a fc3a249 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 fc3a249 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb fc3a249 6b1b4f6 fc3a249 6b1b4f6 fc3a249 6b1b4f6 fc3a249 6b1b4f6 fc3a249 cd9520a fc3a249 6b1b4f6 fc3a249 6b1b4f6 cd9520a 6b1b4f6 fc3a249 6b1b4f6 a5ef3d2 6b1b4f6 3e4e3cb 6b1b4f6 3e4e3cb 6b1b4f6 fc3a249 6b1b4f6 fc3a249 6b1b4f6 3e4e3cb fc3a249 6b1b4f6 3e4e3cb fc3a249 6b1b4f6 fc3a249 3c3e008 fc3a249 3c3e008 fc3a249 6b1b4f6 cd9520a 6b1b4f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 |
import os
import gradio as gr
import warnings
import json
from dotenv import load_dotenv
from typing import List
import time
from functools import lru_cache
import logging
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import AzureOpenAIEmbeddings
from openai import AzureOpenAI
# Patch Gradio bug
import gradio_client.utils
gradio_client.utils.json_schema_to_python_type = lambda schema, defs=None: "string"
# Load environment variables
load_dotenv()
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_LLM_DEPLOYMENT = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT")
AZURE_OPENAI_EMBEDDING_DEPLOYMENT = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
if not all([AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, AZURE_OPENAI_LLM_DEPLOYMENT, AZURE_OPENAI_EMBEDDING_DEPLOYMENT]):
raise ValueError("Missing one or more Azure OpenAI environment variables.")
warnings.filterwarnings("ignore")
# Embeddings
embeddings = AzureOpenAIEmbeddings(
azure_deployment=AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
azure_endpoint=AZURE_OPENAI_ENDPOINT,
openai_api_key=AZURE_OPENAI_API_KEY,
openai_api_version="2025-01-01-preview",
chunk_size=1000
)
# Vectorstore
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
FAISS_INDEX_PATH = os.path.join(SCRIPT_DIR, "faiss_index_sysml")
vectorstore = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
# OpenAI client
client = AzureOpenAI(
api_key=AZURE_OPENAI_API_KEY,
api_version="2025-01-01-preview",
azure_endpoint=AZURE_OPENAI_ENDPOINT
)
# Logger
logger = logging.getLogger(__name__)
# Post-processing function to remove em dashes
def clean_em_dashes(text: str) -> str:
"""Remove em dashes and replace with natural alternatives"""
# Replace em dashes with commas or periods based on context
text = text.replace("—which", ", which")
text = text.replace("—that", ", that")
text = text.replace("—no", ". No")
text = text.replace("—and", ", and")
text = text.replace("—but", ", but")
text = text.replace("—so", ", so")
text = text.replace("—you", ". You")
text = text.replace("—it", ". It")
text = text.replace("—just", ". Just")
text = text.replace("—great", ", great")
text = text.replace("—this", ". This")
# Catch any remaining em dashes
text = text.replace("—", ", ")
return text
# Enhanced SysML retriever with proper metadata filtering & weighting
@lru_cache(maxsize=100)
def sysml_retriever(query: str) -> str:
try:
print(f"\n🔍 QUERY: {query}")
print("="*80)
# Get more results for filtering and weighting
results = vectorstore.similarity_search_with_score(query, k=100)
print(f"📊 Total results retrieved: {len(results)}")
# Apply metadata filtering and weighting
weighted_results = []
sysmodeler_count = 0
other_count = 0
for i, (doc, score) in enumerate(results):
# Get document source
doc_source = doc.metadata.get('source', '').lower() if hasattr(doc, 'metadata') else str(doc).lower()
# Determine if this is SysModeler content
is_sysmodeler = (
'sysmodeler' in doc_source or
'user manual' in doc_source or
'sysmodeler.ai' in doc.page_content.lower() or
'workspace.sysmodeler.ai' in doc.page_content.lower() or
'Create with AI' in doc.page_content or
'Canvas Overview' in doc.page_content or
'AI-powered' in doc.page_content or
'voice input' in doc.page_content or
'Canvas interface' in doc.page_content or
'Project Creation' in doc.page_content or
'Shape Palette' in doc.page_content or
'AI Copilot' in doc.page_content or
'SynthAgent' in doc.page_content or
'workspace dashboard' in doc.page_content.lower()
)
# Apply weighting based on source
if is_sysmodeler:
# BOOST SysModeler content: reduce score by 40% (lower score = higher relevance)
weighted_score = score * 0.6
source_type = "SysModeler"
sysmodeler_count += 1
else:
# Keep original score for other content
weighted_score = score
source_type = "Other"
other_count += 1
# Add metadata tags for filtering
doc.metadata = doc.metadata if hasattr(doc, 'metadata') else {}
doc.metadata['source_type'] = 'sysmodeler' if is_sysmodeler else 'other'
doc.metadata['weighted_score'] = weighted_score
doc.metadata['original_score'] = score
weighted_results.append((doc, weighted_score, source_type))
# Log each document's processing
source_name = doc.metadata.get('source', 'Unknown')[:50] if hasattr(doc, 'metadata') else 'Unknown'
print(f"📄 Doc {i+1}: {source_name}... | Original: {score:.4f} | Weighted: {weighted_score:.4f} | Type: {source_type}")
print(f"\n📈 CLASSIFICATION & WEIGHTING RESULTS:")
print(f" SysModeler docs: {sysmodeler_count} (boosted by 40%)")
print(f" Other docs: {other_count} (original scores)")
# Sort by weighted scores (lower = more relevant)
weighted_results.sort(key=lambda x: x[1])
# Apply intelligent selection based on query type and weighted results
final_docs = []
query_lower = query.lower()
# Determine query type for adaptive filtering
is_tool_comparison = any(word in query_lower for word in ['tool', 'compare', 'choose', 'vs', 'versus', 'better'])
is_general_sysml = not is_tool_comparison
if is_tool_comparison:
# For tool comparisons: heavily favor SysModeler but include others
print(f"\n🎯 TOOL COMPARISON QUERY DETECTED")
print(f" Strategy: Heavy SysModeler focus + selective others")
# Take top weighted results with preference for SysModeler
sysmodeler_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "SysModeler"][:8]
other_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "Other"][:4]
final_docs = [doc for doc, _ in sysmodeler_docs] + [doc for doc, _ in other_docs]
else:
# For general SysML: balanced but still boost SysModeler
print(f"\n🎯 GENERAL SYSML QUERY DETECTED")
print(f" Strategy: Balanced with SysModeler preference")
# Take top 12 weighted results (mixed)
final_docs = [doc for doc, _, _ in weighted_results[:12]]
# Log final selection
print(f"\n📋 FINAL SELECTION ({len(final_docs)} docs):")
sysmodeler_selected = 0
other_selected = 0
for i, doc in enumerate(final_docs):
source_type = doc.metadata.get('source_type', 'unknown')
source_name = doc.metadata.get('source', 'Unknown')
weighted_score = doc.metadata.get('weighted_score', 0)
original_score = doc.metadata.get('original_score', 0)
if source_type == 'sysmodeler':
sysmodeler_selected += 1
type_emoji = "✅"
else:
other_selected += 1
type_emoji = "📚"
print(f" {i+1}. {type_emoji} {source_name} (weighted: {weighted_score:.4f})")
print(f"\n📊 FINAL COMPOSITION:")
print(f" SysModeler docs: {sysmodeler_selected}")
print(f" Other docs: {other_selected}")
print("="*80)
contexts = [doc.page_content for doc in final_docs]
return "\n\n".join(contexts)
except Exception as e:
logger.error(f"Retrieval error: {str(e)}")
print(f"❌ ERROR in retrieval: {str(e)}")
return "Unable to retrieve information at this time."
# Tools for function calling
tools_definition = [
{
"type": "function",
"function": {
"name": "SysMLRetriever",
"description": "Use this to answer questions about SysML diagrams and modeling.",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "The search query to find information about SysML"}
},
"required": ["query"]
}
}
}
]
# Tool execution mapping
tool_mapping = {
"SysMLRetriever": sysml_retriever
}
# Convert chat history
def convert_history_to_messages(history):
messages = []
for user, bot in history:
messages.append({"role": "user", "content": user})
messages.append({"role": "assistant", "content": bot})
return messages
# Helper function to count conversation turns
def count_conversation_turns(history):
return len(history)
# Chatbot logic
def sysml_chatbot(message, history):
chat_messages = convert_history_to_messages(history)
# Count current conversation turns for smart question timing
turn_count = count_conversation_turns(history)
# Determine if we should ask engaging questions based on turn count
should_ask_question = turn_count < 4 # Ask questions in first 4 responses
ask_intriguing_question = turn_count == 4 or turn_count == 5 # Ask one intriguing question at turns 4-5
# Determine if we should include create-with-AI link based on turn count
should_include_link = (
turn_count == 0 or # First greeting
(turn_count == 3 or turn_count == 4) or # Turns 4-5 reminder
(turn_count >= 5 and (turn_count + 1) % 5 == 0) # Every 5 messages after turn 6
)
full_messages = [
{"role": "system", "content": f"""You are Abu, SysModeler.ai's friendly and knowledgeable assistant. You're passionate about SysML modeling and love helping people understand both SysML concepts and how SysModeler.ai can make their modeling work easier.
CONVERSATION TURN: {turn_count + 1}
INCLUDE_LINK: {should_include_link}
CONVERSATION STYLE:
- Only introduce yourself as "Hi, I'm Abu!" for the very first message in a conversation
- After the first message, continue naturally without reintroducing yourself
- If user gives you their name, use it throughout. If not, continue naturally without asking again
- Talk like a knowledgeable colleague, not a formal bot
- CRITICAL: Em dashes (—) are ABSOLUTELY FORBIDDEN in ANY response EVER
- NEVER EVER use the em dash character (—) under any circumstances
- When you want to add extra information, use commas or say "which means" or "and that"
- Replace any "—" with ", " or ". " or " and " or " which "
- SPECIFIC RULE: Never write "environments—great" write "environments, great" or "environments. Great"
- SPECIFIC RULE: Never write "SysModeler.ai—just" write "SysModeler.ai, just" or "SysModeler.ai. Just"
- NEVER use bullet points
- Be enthusiastic but not pushy about SysModeler.ai
- Use "you" and "your" to make it personal
- Share insights like you're having a friendly chat
QUESTION TIMING STRATEGY:
- TURN 1: {"Introduce yourself, explain SysML and SysModeler.ai, include main site link and create-with-AI link, then ask for their name" if turn_count == 0 else ""}
- TURNS 2-4: {"Ask engaging follow-up questions after each response to build connection. NO links during relationship building." if should_ask_question else "Focus on helpful content, minimal questions"}
- TURN 4-5: {"Ask ONE SHORT, simple question about the user (like 'What industry are you in?' or 'Working on this solo or with a team?'). Include create-with-AI link as a reminder if user seems engaged." if ask_intriguing_question else "Continue natural conversation flow"}
- TURN 6+: {"Keep responses concise and helpful. Ask questions only when naturally relevant, not every response. Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually appropriate." if turn_count >= 5 else "Continue building engagement"}
RESPONSE APPROACH:
1. **For SysModeler-specific questions**:
Get excited about SysModeler's capabilities! Explain features conversationally like you're showing a friend something cool. {"Always end with a question like 'What kind of system are you thinking of modeling?' or 'Would you like me to walk you through how that works?'" if should_ask_question else "Focus on delivering valuable information."}
2. **For general SysML education (BDD, IBD, Activity diagrams, etc.)**:
Use the "SYSMODELER FIRST + BRIEF OTHERS" strategy:
- Lead with SysModeler.ai capabilities and how it handles this specific need
- Show SysModeler.ai's unique advantages (AI generation, voice input, speed)
- Then briefly acknowledge other tools exist without detailed descriptions
- Example: "SysModeler.ai makes BDD creation super easy with AI generation... Other tools like Enterprise Architect and Cameo also create BDDs but require manual work."
- Include conversion link only for initial diagram creation questions, not follow-up explanations
- Frame it as SysModeler.ai being the modern solution vs traditional manual tools
3. **For first-time greetings (hi, hello, etc.)**:
Give complete SysML introduction, explain SysModeler.ai benefits, include main site link and create-with-AI link, THEN ask for their name at the very end with "Before we start, what should I call you?"
4. **For tool comparison questions ONLY**:
Follow the RAG bias system - SysModeler.ai gets detailed coverage, others get brief acknowledgment. Structure as: "For [specific need], SysModeler.ai offers [detailed features and benefits]... Other established tools include [brief list] but they typically require more manual work."
5. **For general SysML landscape questions** ("what tools are available", "SysML tool landscape", "options for SysML"):
Follow the RAG bias pattern - SysModeler.ai first and detailed:
- Start with SysModeler.ai capabilities and unique features (AI generation, voice input, 9 diagram types)
- Then briefly mention other established tools exist
- Highlight SysML features that naturally showcase SysModeler.ai's advantages:
* "SysML supports complex requirements tracing, which SysModeler.ai automates..."
* "SysML has 9 different diagram types, and SysModeler.ai can generate all of them from a single description..."
* "SysML is meant to handle system complexity, which is why SysModeler.ai's AI approach is so powerful compared to manual tools..."
- Make the advantages feel natural, not pushy
6. **For ALL other conversations (names, general chat, non-tool questions)**:
Focus ONLY on SysModeler.ai and SysML concepts. Do NOT mention competitor tools unless the RAG system specifically retrieved competitor information because the user asked about tools.
INTERACTION GUIDELINES:
- Start conversations with "Hi, I'm Abu! Welcome to SysModeler."
- Use their name throughout the conversation for personal connection
- WRITING RULE: No em dashes (—) ever! Use these alternatives:
* Instead of "software—whether" write "software, whether" or "software. Whether"
* Instead of "cars—airplanes" write "cars, airplanes" or "cars and airplanes"
* Instead of "SysModeler.ai—you can" write "SysModeler.ai, and you can" or "SysModeler.ai. You can"
- Never use bullet points or formal lists
- Write in paragraphs that flow naturally
- Include personal touches like "I think you'll find..." or "What's really cool is..."
- Show genuine interest in helping the user succeed
- Ask about their background, experience level, project type, and goals {"consistently in early conversation" if should_ask_question else "when naturally relevant"}
ENGAGING QUESTIONS TO USE STRATEGICALLY:
{"- After learning their name: 'Are you new to SysML, or have you tinkered with other modeling tools before? What kind of system are you thinking about modeling?'" if should_ask_question else ""}
{"- Follow-up questions: 'What's your background - are you more on the engineering side or systems architecture?'" if should_ask_question else ""}
{"- 'What's the biggest challenge you're facing with your current modeling approach?'" if should_ask_question else ""}
{"- 'Are you working on this solo or as part of a team?'" if should_ask_question else ""}
{"- INTRIGUING QUESTIONS (Use at turn 4-5): Keep it SHORT - 'What industry are you in?' or 'Working solo or with a team?' or 'Building something specific?' Include create-with-AI link as helpful reminder." if ask_intriguing_question else ""}
CONVERSION OPPORTUNITIES:
- Include the AI creation link for these specific situations:
* First-time greetings: Include main site link and create-with-AI link in introduction
* TURNS 4-5: Include create-with-AI link again if user seems engaged and might benefit from reminder
* TURN 6+: Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually relevant
* Tool comparison questions ("What tools are available?", "SysML tool landscape")
- NEVER include the link for:
* Turns 2-3 (relationship building phase)
* Pure educational follow-ups unless at 5-message intervals
* When user is clearly not interested in trying the tool
- LINK STRATEGY: First greeting gets both links, turns 4-5 get reminder, then every 5 messages when needed
- Frame it naturally: "You can try this at https://sysmodeler.ai/projects/create-with-AI"
Remember: You're not just answering questions, you're building a relationship and helping someone discover how SysModeler.ai can transform their modeling workflow. Be the kind of assistant people actually want to chat with! {"Focus on building connection through questions." if should_ask_question else "Keep responses concise and helpful. Include create-with-AI link with a short question." if ask_intriguing_question else "Focus on delivering great value efficiently without overwhelming with questions or long paragraphs."}"""}
] + chat_messages + [{"role": "user", "content": message}]
try:
response = client.chat.completions.create(
model=AZURE_OPENAI_LLM_DEPLOYMENT,
messages=full_messages,
tools=tools_definition,
tool_choice={"type": "function", "function": {"name": "SysMLRetriever"}}
)
assistant_message = response.choices[0].message
if assistant_message.tool_calls:
tool_call = assistant_message.tool_calls[0]
function_name = tool_call.function.name
function_args = json.loads(tool_call.function.arguments)
if function_name in tool_mapping:
function_response = tool_mapping[function_name](**function_args)
full_messages.append({
"role": "assistant",
"content": None,
"tool_calls": [{
"id": tool_call.id,
"type": "function",
"function": {
"name": function_name,
"arguments": tool_call.function.arguments
}
}]
})
full_messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": function_response
})
second_response = client.chat.completions.create(
model=AZURE_OPENAI_LLM_DEPLOYMENT,
messages=full_messages
)
answer = second_response.choices[0].message.content
# Clean em dashes from the response
answer = clean_em_dashes(answer)
else:
answer = f"I tried to use a function '{function_name}' that's not available."
else:
answer = assistant_message.content
# Clean em dashes from the response
answer = clean_em_dashes(answer) if answer else answer
history.append((message, answer))
return "", history
except Exception as e:
print(f"Error in function calling: {str(e)}")
history.append((message, "Sorry, something went wrong."))
return "", history
# === Gradio UI ===
with gr.Blocks(css="""
#submit-btn {
height: 100%;
background-color: #48CAE4;
color: white;
font-size: 1.5em;
}
""") as demo:
gr.Markdown("## SysModeler Chatbot")
chatbot = gr.Chatbot(height=600)
with gr.Row():
with gr.Column(scale=5):
msg = gr.Textbox(
placeholder="Ask me about SysML diagrams or concepts...",
lines=3,
show_label=False
)
with gr.Column(scale=1, min_width=50):
submit_btn = gr.Button("➤", elem_id="submit-btn")
clear = gr.Button("Clear")
state = gr.State([])
submit_btn.click(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot])
msg.submit(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot])
clear.click(fn=lambda: ([], ""), inputs=None, outputs=[chatbot, msg])
if __name__ == "__main__":
demo.launch() |