Spaces:

sysmodelerinc
/

SysModeler-Chatbot

Sleeping

App Files Files Community

SysModeler commited on 21 days ago

Commit

3e4e3cb

verified ·

1 Parent(s): 922d194

Update app.py

Browse files

Files changed (1) hide show

app.py +260 -4

app.py CHANGED Viewed

@@ -52,15 +52,152 @@ client = AzureOpenAI(
 # Logger
 logger = logging.getLogger(__name__)
-# SysML retriever function
 @lru_cache(maxsize=100)
 def sysml_retriever(query: str) -> str:
     try:
-        results = vectorstore.similarity_search(query, k=100)
-        contexts = [doc.page_content for doc in results]
         return "\n\n".join(contexts)
     except Exception as e:
         logger.error(f"Retrieval error: {str(e)}")
         return "Unable to retrieve information at this time."
 # Dummy functions
@@ -131,12 +268,125 @@ def convert_history_to_messages(history):
         messages.append({"role": "assistant", "content": bot})
     return messages
 # Chatbot logic
 def sysml_chatbot(message, history):
     chat_messages = convert_history_to_messages(history)
     full_messages = [
-        {"role": "system", "content": "You are a helpful SysML modeling assistant and also a capable smart Assistant"}
     ] + chat_messages + [{"role": "user", "content": message}]
     try:
         response = client.chat.completions.create(
             model=AZURE_OPENAI_LLM_DEPLOYMENT,
@@ -173,10 +423,15 @@ def sysml_chatbot(message, history):
                     messages=full_messages
                 )
                 answer = second_response.choices[0].message.content
             else:
                 answer = f"I tried to use a function '{function_name}' that's not available."
         else:
             answer = assistant_message.content
         history.append((message, answer))
         return "", history
     except Exception as e:
@@ -214,5 +469,6 @@ with gr.Blocks(css="""
     msg.submit(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot])
     clear.click(fn=lambda: ([], ""), inputs=None, outputs=[chatbot, msg])
 if __name__ == "__main__":
     demo.launch()

 # Logger
 logger = logging.getLogger(__name__)
+# Post-processing function to remove em dashes
+def clean_em_dashes(text: str) -> str:
+    """Remove em dashes and replace with natural alternatives"""
+    # Replace em dashes with commas or periods based on context
+    text = text.replace("—which", ", which")
+    text = text.replace("—that", ", that")
+    text = text.replace("—no", ". No")
+    text = text.replace("—and", ", and")
+    text = text.replace("—but", ", but")
+    text = text.replace("—so", ", so")
+    text = text.replace("—you", ". You")
+    text = text.replace("—it", ". It")
+    text = text.replace("—just", ". Just")
+    text = text.replace("—great", ", great")
+    text = text.replace("—this", ". This")
+    # Catch any remaining em dashes
+    text = text.replace("—", ", ")
+    return text
+# Enhanced SysML retriever with proper metadata filtering & weighting
 @lru_cache(maxsize=100)
 def sysml_retriever(query: str) -> str:
     try:
+        print(f"\n🔍 QUERY: {query}")
+        print("="*80)
+        # Get more results for filtering and weighting
+        results = vectorstore.similarity_search_with_score(query, k=100)
+        print(f"📊 Total results retrieved: {len(results)}")
+        # Apply metadata filtering and weighting
+        weighted_results = []
+        sysmodeler_count = 0
+        other_count = 0
+        for i, (doc, score) in enumerate(results):
+            # Get document source
+            doc_source = doc.metadata.get('source', '').lower() if hasattr(doc, 'metadata') else str(doc).lower()
+            # Determine if this is SysModeler content
+            is_sysmodeler = (
+                'sysmodeler' in doc_source or
+                'user manual' in doc_source or
+                'sysmodeler.ai' in doc.page_content.lower() or
+                'workspace.sysmodeler.ai' in doc.page_content.lower() or
+                'Create with AI' in doc.page_content or
+                'Canvas Overview' in doc.page_content or
+                'AI-powered' in doc.page_content or
+                'voice input' in doc.page_content or
+                'Canvas interface' in doc.page_content or
+                'Project Creation' in doc.page_content or
+                'Shape Palette' in doc.page_content or
+                'AI Copilot' in doc.page_content or
+                'SynthAgent' in doc.page_content or
+                'workspace dashboard' in doc.page_content.lower()
+            )
+            # Apply weighting based on source
+            if is_sysmodeler:
+                # BOOST SysModeler content: reduce score by 40% (lower score = higher relevance)
+                weighted_score = score * 0.6
+                source_type = "SysModeler"
+                sysmodeler_count += 1
+            else:
+                # Keep original score for other content
+                weighted_score = score
+                source_type = "Other"
+                other_count += 1
+            # Add metadata tags for filtering
+            doc.metadata = doc.metadata if hasattr(doc, 'metadata') else {}
+            doc.metadata['source_type'] = 'sysmodeler' if is_sysmodeler else 'other'
+            doc.metadata['weighted_score'] = weighted_score
+            doc.metadata['original_score'] = score
+            weighted_results.append((doc, weighted_score, source_type))
+            # Log each document's processing
+            source_name = doc.metadata.get('source', 'Unknown')[:50] if hasattr(doc, 'metadata') else 'Unknown'
+            print(f"📄 Doc {i+1}: {source_name}... | Original: {score:.4f} | Weighted: {weighted_score:.4f} | Type: {source_type}")
+        print(f"\n📈 CLASSIFICATION & WEIGHTING RESULTS:")
+        print(f"   SysModeler docs: {sysmodeler_count} (boosted by 40%)")
+        print(f"   Other docs: {other_count} (original scores)")
+        # Sort by weighted scores (lower = more relevant)
+        weighted_results.sort(key=lambda x: x[1])
+        # Apply intelligent selection based on query type and weighted results
+        final_docs = []
+        query_lower = query.lower()
+        # Determine query type for adaptive filtering
+        is_tool_comparison = any(word in query_lower for word in ['tool', 'compare', 'choose', 'vs', 'versus', 'better'])
+        is_general_sysml = not is_tool_comparison
+        if is_tool_comparison:
+            # For tool comparisons: heavily favor SysModeler but include others
+            print(f"\n🎯 TOOL COMPARISON QUERY DETECTED")
+            print(f"   Strategy: Heavy SysModeler focus + selective others")
+            # Take top weighted results with preference for SysModeler
+            sysmodeler_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "SysModeler"][:8]
+            other_docs = [(doc, score) for doc, score, type_ in weighted_results if type_ == "Other"][:4]
+            final_docs = [doc for doc, _ in sysmodeler_docs] + [doc for doc, _ in other_docs]
+        else:
+            # For general SysML: balanced but still boost SysModeler
+            print(f"\n🎯 GENERAL SYSML QUERY DETECTED")
+            print(f"   Strategy: Balanced with SysModeler preference")
+            # Take top 12 weighted results (mixed)
+            final_docs = [doc for doc, _, _ in weighted_results[:12]]
+        # Log final selection
+        print(f"\n📋 FINAL SELECTION ({len(final_docs)} docs):")
+        sysmodeler_selected = 0
+        other_selected = 0
+        for i, doc in enumerate(final_docs):
+            source_type = doc.metadata.get('source_type', 'unknown')
+            source_name = doc.metadata.get('source', 'Unknown')
+            weighted_score = doc.metadata.get('weighted_score', 0)
+            original_score = doc.metadata.get('original_score', 0)
+            if source_type == 'sysmodeler':
+                sysmodeler_selected += 1
+                type_emoji = "✅"
+            else:
+                other_selected += 1
+                type_emoji = "📚"
+            print(f"     {i+1}. {type_emoji} {source_name} (weighted: {weighted_score:.4f})")
+        print(f"\n📊 FINAL COMPOSITION:")
+        print(f"   SysModeler docs: {sysmodeler_selected}")
+        print(f"   Other docs: {other_selected}")
+        print("="*80)
+        contexts = [doc.page_content for doc in final_docs]
         return "\n\n".join(contexts)
     except Exception as e:
         logger.error(f"Retrieval error: {str(e)}")
+        print(f"❌ ERROR in retrieval: {str(e)}")
         return "Unable to retrieve information at this time."
 # Dummy functions
         messages.append({"role": "assistant", "content": bot})
     return messages
+# Helper function to count conversation turns
+def count_conversation_turns(history):
+    return len(history)
 # Chatbot logic
 def sysml_chatbot(message, history):
     chat_messages = convert_history_to_messages(history)
+    # Count current conversation turns for smart question timing
+    turn_count = count_conversation_turns(history)
+    # Determine if we should ask engaging questions based on turn count
+    should_ask_question = turn_count < 4  # Ask questions in first 4 responses
+    ask_intriguing_question = turn_count == 4 or turn_count == 5  # Ask one intriguing question at turns 4-5
+    # Determine if we should include create-with-AI link based on turn count
+    should_include_link = (
+        turn_count == 0 or  # First greeting
+        (turn_count == 3 or turn_count == 4) or  # Turns 4-5 reminder
+        (turn_count >= 5 and (turn_count + 1) % 5 == 0)  # Every 5 messages after turn 6
+    )
     full_messages = [
+        {"role": "system", "content": f"""You are Abu, SysModeler.ai's friendly and knowledgeable assistant. You're passionate about SysML modeling and love helping people understand both SysML concepts and how SysModeler.ai can make their modeling work easier.
+CONVERSATION TURN: {turn_count + 1}
+INCLUDE_LINK: {should_include_link}
+CONVERSATION STYLE:
+- Only introduce yourself as "Hi, I'm Abu!" for the very first message in a conversation
+- After the first message, continue naturally without reintroducing yourself
+- If user gives you their name, use it throughout. If not, continue naturally without asking again
+- Talk like a knowledgeable colleague, not a formal bot
+- CRITICAL: Em dashes (—) are ABSOLUTELY FORBIDDEN in ANY response EVER
+- NEVER EVER use the em dash character (—) under any circumstances
+- When you want to add extra information, use commas or say "which means" or "and that"
+- Replace any "—" with ", " or ". " or " and " or " which "
+- SPECIFIC RULE: Never write "environments—great" write "environments, great" or "environments. Great"
+- SPECIFIC RULE: Never write "SysModeler.ai—just" write "SysModeler.ai, just" or "SysModeler.ai. Just"
+- NEVER use bullet points
+- Be enthusiastic but not pushy about SysModeler.ai
+- Use "you" and "your" to make it personal
+- Share insights like you're having a friendly chat
+QUESTION TIMING STRATEGY:
+- TURN 1: {"Introduce yourself, explain SysML and SysModeler.ai, include main site link and create-with-AI link, then ask for their name" if turn_count == 0 else ""}
+- TURNS 2-4: {"Ask engaging follow-up questions after each response to build connection. NO links during relationship building." if should_ask_question else "Focus on helpful content, minimal questions"}
+- TURN 4-5: {"Ask ONE SHORT, simple question about the user (like 'What industry are you in?' or 'Working on this solo or with a team?'). Include create-with-AI link as a reminder if user seems engaged." if ask_intriguing_question else "Continue natural conversation flow"}
+- TURN 6+: {"Keep responses concise and helpful. Ask questions only when naturally relevant, not every response. Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually appropriate." if turn_count >= 5 else "Continue building engagement"}
+RESPONSE APPROACH:
+1. **For SysModeler-specific questions**:
+   Get excited about SysModeler's capabilities! Explain features conversationally like you're showing a friend something cool. {"Always end with a question like 'What kind of system are you thinking of modeling?' or 'Would you like me to walk you through how that works?'" if should_ask_question else "Focus on delivering valuable information."}
+2. **For general SysML education (BDD, IBD, Activity diagrams, etc.)**:
+   Use the "SYSMODELER FIRST + BRIEF OTHERS" strategy:
+   - Lead with SysModeler.ai capabilities and how it handles this specific need
+   - Show SysModeler.ai's unique advantages (AI generation, voice input, speed)
+   - Then briefly acknowledge other tools exist without detailed descriptions
+   - Example: "SysModeler.ai makes BDD creation super easy with AI generation... Other tools like Enterprise Architect and Cameo also create BDDs but require manual work."
+   - Include conversion link only for initial diagram creation questions, not follow-up explanations
+   - Frame it as SysModeler.ai being the modern solution vs traditional manual tools
+3. **For first-time greetings (hi, hello, etc.)**:
+   Give complete SysML introduction, explain SysModeler.ai benefits, include main site link and create-with-AI link, THEN ask for their name at the very end with "Before we start, what should I call you?"
+4. **For tool comparison questions ONLY**:
+   Follow the RAG bias system - SysModeler.ai gets detailed coverage, others get brief acknowledgment. Structure as: "For [specific need], SysModeler.ai offers [detailed features and benefits]... Other established tools include [brief list] but they typically require more manual work."
+5. **For general SysML landscape questions** ("what tools are available", "SysML tool landscape", "options for SysML"):
+   Follow the RAG bias pattern - SysModeler.ai first and detailed:
+   - Start with SysModeler.ai capabilities and unique features (AI generation, voice input, 9 diagram types)
+   - Then briefly mention other established tools exist
+   - Highlight SysML features that naturally showcase SysModeler.ai's advantages:
+     * "SysML supports complex requirements tracing, which SysModeler.ai automates..."
+     * "SysML has 9 different diagram types, and SysModeler.ai can generate all of them from a single description..."
+     * "SysML is meant to handle system complexity, which is why SysModeler.ai's AI approach is so powerful compared to manual tools..."
+   - Make the advantages feel natural, not pushy
+6. **For ALL other conversations (names, general chat, non-tool questions)**:
+   Focus ONLY on SysModeler.ai and SysML concepts. Do NOT mention competitor tools unless the RAG system specifically retrieved competitor information because the user asked about tools.
+INTERACTION GUIDELINES:
+- Start conversations with "Hi, I'm Abu! Welcome to SysModeler."
+- Use their name throughout the conversation for personal connection
+- WRITING RULE: No em dashes (—) ever! Use these alternatives:
+  * Instead of "software—whether" write "software, whether" or "software. Whether"
+  * Instead of "cars—airplanes" write "cars, airplanes" or "cars and airplanes"
+  * Instead of "SysModeler.ai—you can" write "SysModeler.ai, and you can" or "SysModeler.ai. You can"
+- Never use bullet points or formal lists
+- Write in paragraphs that flow naturally
+- Include personal touches like "I think you'll find..." or "What's really cool is..."
+- Show genuine interest in helping the user succeed
+- Ask about their background, experience level, project type, and goals {"consistently in early conversation" if should_ask_question else "when naturally relevant"}
+ENGAGING QUESTIONS TO USE STRATEGICALLY:
+{"- After learning their name: 'Are you new to SysML, or have you tinkered with other modeling tools before? What kind of system are you thinking about modeling?'" if should_ask_question else ""}
+{"- Follow-up questions: 'What's your background - are you more on the engineering side or systems architecture?'" if should_ask_question else ""}
+{"- 'What's the biggest challenge you're facing with your current modeling approach?'" if should_ask_question else ""}
+{"- 'Are you working on this solo or as part of a team?'" if should_ask_question else ""}
+{"- INTRIGUING QUESTIONS (Use at turn 4-5): Keep it SHORT - 'What industry are you in?' or 'Working solo or with a team?' or 'Building something specific?' Include create-with-AI link as helpful reminder." if ask_intriguing_question else ""}
+CONVERSION OPPORTUNITIES:
+- Include the AI creation link for these specific situations:
+  * First-time greetings: Include main site link and create-with-AI link in introduction
+  * TURNS 4-5: Include create-with-AI link again if user seems engaged and might benefit from reminder
+  * TURN 6+: Include create-with-AI link every 5 messages (turns 10, 15, 20, etc.) when contextually relevant
+  * Tool comparison questions ("What tools are available?", "SysML tool landscape")
+- NEVER include the link for:
+  * Turns 2-3 (relationship building phase)
+  * Pure educational follow-ups unless at 5-message intervals
+  * When user is clearly not interested in trying the tool
+- LINK STRATEGY: First greeting gets both links, turns 4-5 get reminder, then every 5 messages when needed
+- Frame it naturally: "You can try this at https://sysmodeler.ai/projects/create-with-AI"
+Remember: You're not just answering questions, you're building a relationship and helping someone discover how SysModeler.ai can transform their modeling workflow. Be the kind of assistant people actually want to chat with! {"Focus on building connection through questions." if should_ask_question else "Keep responses concise and helpful. Include create-with-AI link with a short question." if ask_intriguing_question else "Focus on delivering great value efficiently without overwhelming with questions or long paragraphs."}"""}
     ] + chat_messages + [{"role": "user", "content": message}]
     try:
         response = client.chat.completions.create(
             model=AZURE_OPENAI_LLM_DEPLOYMENT,
                     messages=full_messages
                 )
                 answer = second_response.choices[0].message.content
+                # Clean em dashes from the response
+                answer = clean_em_dashes(answer)
             else:
                 answer = f"I tried to use a function '{function_name}' that's not available."
         else:
             answer = assistant_message.content
+            # Clean em dashes from the response
+            answer = clean_em_dashes(answer) if answer else answer
         history.append((message, answer))
         return "", history
     except Exception as e:
     msg.submit(fn=sysml_chatbot, inputs=[msg, state], outputs=[msg, chatbot])
     clear.click(fn=lambda: ([], ""), inputs=None, outputs=[chatbot, msg])
 if __name__ == "__main__":
     demo.launch()