Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -341,36 +341,72 @@ def build_ascii_diagram(entities, relationships):
|
|
341 |
return "No entities to visualize"
|
342 |
|
343 |
diagram = "KNOWLEDGE GRAPH DIAGRAM:\n"
|
344 |
-
diagram += "=" *
|
345 |
|
346 |
# Show entities by type
|
347 |
entity_types = {}
|
348 |
-
for entity in entities
|
349 |
etype = entity.get("type", "UNKNOWN")
|
350 |
if etype not in entity_types:
|
351 |
entity_types[etype] = []
|
352 |
entity_types[etype].append(entity.get("name", "Unknown"))
|
353 |
|
354 |
for etype, names in entity_types.items():
|
355 |
-
diagram += f"
|
356 |
for name in names:
|
357 |
-
diagram += f"
|
358 |
diagram += "\n"
|
359 |
|
360 |
# Show relationships
|
361 |
if relationships:
|
362 |
-
diagram += "
|
363 |
-
for rel in relationships
|
364 |
source = rel.get("source", "?")
|
365 |
target = rel.get("target", "?")
|
366 |
relation = rel.get("relation", "related")
|
367 |
-
diagram += f"
|
368 |
-
|
369 |
-
if len(relationships) > 8:
|
370 |
-
diagram += f" ... and {len(relationships) - 8} more relationships\n"
|
371 |
|
372 |
return diagram
|
373 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
def build_kg(url_or_text):
|
375 |
"""Main function to build knowledge graph from URL or text.
|
376 |
|
@@ -382,62 +418,67 @@ def build_kg(url_or_text):
|
|
382 |
"""
|
383 |
try:
|
384 |
if not url_or_text or len(url_or_text.strip()) == 0:
|
385 |
-
return json.dumps({"error": "Please provide some text or a URL to analyze."})
|
386 |
|
387 |
# Step 1: Fetch content
|
388 |
content = fetch_content(url_or_text)
|
389 |
|
390 |
if content.startswith("Error"):
|
391 |
-
return json.dumps({"error": content})
|
392 |
|
393 |
# Step 2: Extract entities
|
394 |
entities_data = extract_entities(content)
|
395 |
|
396 |
-
# Step 3: Create simple ASCII diagram
|
397 |
entities = entities_data.get("entities", [])
|
398 |
relationships = entities_data.get("relationships", [])
|
399 |
-
ascii_diagram = build_ascii_diagram(entities, relationships)
|
400 |
|
401 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
num_entities = len(entities)
|
403 |
num_relationships = len(relationships)
|
404 |
|
405 |
-
summary = f"
|
406 |
-
|
407 |
-
Statistics:
|
408 |
-
- Entities: {num_entities}
|
409 |
-
- Relationships: {num_relationships}
|
410 |
-
- Content length: {len(content)} characters
|
411 |
-
|
412 |
-
Top Entities:"""
|
413 |
|
414 |
-
for entity in entities[:
|
415 |
name = entity.get('name', 'Unknown')
|
416 |
entity_type = entity.get('type', 'UNKNOWN')
|
417 |
summary += f"\n• {name} ({entity_type})"
|
418 |
|
419 |
-
if len(entities) >
|
420 |
-
summary += f"\n... and {len(entities) -
|
421 |
|
422 |
-
# Create MCP-friendly response
|
423 |
response = {
|
424 |
"success": True,
|
425 |
-
"entities": entities,
|
426 |
-
"relationships": relationships,
|
427 |
"diagram": ascii_diagram,
|
428 |
"summary": summary,
|
429 |
"stats": {
|
430 |
"entity_count": num_entities,
|
431 |
"relationship_count": num_relationships,
|
432 |
-
"content_length": len(content)
|
433 |
}
|
434 |
}
|
435 |
|
436 |
-
|
|
|
|
|
|
|
|
|
437 |
|
438 |
except Exception as e:
|
439 |
error_msg = f"Analysis failed: {str(e)}"
|
440 |
-
return json.dumps({"success": False, "error": error_msg})
|
441 |
|
442 |
# Create Gradio interface with error handling
|
443 |
try:
|
@@ -484,20 +525,47 @@ except Exception as e:
|
|
484 |
# Launch the demo
|
485 |
if __name__ == "__main__":
|
486 |
try:
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
493 |
except Exception as e:
|
494 |
-
print(f"Launch failed: {e}")
|
495 |
# Try without MCP server as fallback
|
496 |
try:
|
497 |
demo.launch(
|
498 |
mcp_server=False,
|
499 |
share=False,
|
500 |
-
show_error=True
|
|
|
501 |
)
|
502 |
except Exception as e2:
|
503 |
print(f"Complete failure: {e2}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
341 |
return "No entities to visualize"
|
342 |
|
343 |
diagram = "KNOWLEDGE GRAPH DIAGRAM:\n"
|
344 |
+
diagram += "=" * 30 + "\n\n" # Reduced line length
|
345 |
|
346 |
# Show entities by type
|
347 |
entity_types = {}
|
348 |
+
for entity in entities: # Already limited by caller
|
349 |
etype = entity.get("type", "UNKNOWN")
|
350 |
if etype not in entity_types:
|
351 |
entity_types[etype] = []
|
352 |
entity_types[etype].append(entity.get("name", "Unknown"))
|
353 |
|
354 |
for etype, names in entity_types.items():
|
355 |
+
diagram += f"{etype}:\n" # Removed emoji for MCP compatibility
|
356 |
for name in names:
|
357 |
+
diagram += f" - {name}\n"
|
358 |
diagram += "\n"
|
359 |
|
360 |
# Show relationships
|
361 |
if relationships:
|
362 |
+
diagram += "RELATIONSHIPS:\n" # Removed emoji for MCP compatibility
|
363 |
+
for rel in relationships: # Already limited by caller
|
364 |
source = rel.get("source", "?")
|
365 |
target = rel.get("target", "?")
|
366 |
relation = rel.get("relation", "related")
|
367 |
+
diagram += f" {source} -> {target} ({relation})\n"
|
|
|
|
|
|
|
368 |
|
369 |
return diagram
|
370 |
|
371 |
+
def validate_mcp_response(response_data):
|
372 |
+
"""Validate and sanitize response for MCP compatibility"""
|
373 |
+
try:
|
374 |
+
# Ensure all string values are ASCII-safe
|
375 |
+
def sanitize_strings(obj):
|
376 |
+
if isinstance(obj, dict):
|
377 |
+
return {k: sanitize_strings(v) for k, v in obj.items()}
|
378 |
+
elif isinstance(obj, list):
|
379 |
+
return [sanitize_strings(item) for item in obj]
|
380 |
+
elif isinstance(obj, str):
|
381 |
+
# Remove non-ASCII characters and control characters
|
382 |
+
return re.sub(r'[^\x20-\x7E\n\r\t]', '', obj)
|
383 |
+
else:
|
384 |
+
return obj
|
385 |
+
|
386 |
+
sanitized = sanitize_strings(response_data)
|
387 |
+
|
388 |
+
# Test JSON serialization
|
389 |
+
test_json = json.dumps(sanitized, ensure_ascii=True, separators=(',', ':'))
|
390 |
+
|
391 |
+
# Size check
|
392 |
+
if len(test_json) > 100000: # 100KB hard limit
|
393 |
+
# Drastically reduce content
|
394 |
+
sanitized["entities"] = sanitized.get("entities", [])[:5]
|
395 |
+
sanitized["relationships"] = sanitized.get("relationships", [])[:3]
|
396 |
+
sanitized["diagram"] = "Knowledge graph generated (content reduced for MCP)"
|
397 |
+
|
398 |
+
return sanitized
|
399 |
+
|
400 |
+
except Exception as e:
|
401 |
+
return {
|
402 |
+
"success": False,
|
403 |
+
"error": f"Response validation failed: {str(e)}",
|
404 |
+
"entities": [],
|
405 |
+
"relationships": [],
|
406 |
+
"diagram": "Error generating diagram",
|
407 |
+
"summary": "Analysis failed during response validation"
|
408 |
+
}
|
409 |
+
|
410 |
def build_kg(url_or_text):
|
411 |
"""Main function to build knowledge graph from URL or text.
|
412 |
|
|
|
418 |
"""
|
419 |
try:
|
420 |
if not url_or_text or len(url_or_text.strip()) == 0:
|
421 |
+
return json.dumps({"error": "Please provide some text or a URL to analyze."}, ensure_ascii=True)
|
422 |
|
423 |
# Step 1: Fetch content
|
424 |
content = fetch_content(url_or_text)
|
425 |
|
426 |
if content.startswith("Error"):
|
427 |
+
return json.dumps({"error": content}, ensure_ascii=True)
|
428 |
|
429 |
# Step 2: Extract entities
|
430 |
entities_data = extract_entities(content)
|
431 |
|
432 |
+
# Step 3: Create simple ASCII diagram (truncated for MCP)
|
433 |
entities = entities_data.get("entities", [])
|
434 |
relationships = entities_data.get("relationships", [])
|
|
|
435 |
|
436 |
+
# Limit diagram size for MCP compatibility
|
437 |
+
limited_entities = entities[:8] # Reduce from 10 to 8
|
438 |
+
limited_relationships = relationships[:6] # Reduce from 8 to 6
|
439 |
+
ascii_diagram = build_ascii_diagram(limited_entities, limited_relationships)
|
440 |
+
|
441 |
+
# Truncate diagram if too long
|
442 |
+
if len(ascii_diagram) > 1000:
|
443 |
+
ascii_diagram = ascii_diagram[:950] + "\n... (truncated for MCP compatibility)"
|
444 |
+
|
445 |
+
# Step 4: Create summary (more concise for MCP)
|
446 |
num_entities = len(entities)
|
447 |
num_relationships = len(relationships)
|
448 |
|
449 |
+
summary = f"Knowledge Graph Analysis Complete!\n\nStats: {num_entities} entities, {num_relationships} relationships\n\nTop Entities:"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
450 |
|
451 |
+
for entity in entities[:3]: # Show only first 3 for MCP
|
452 |
name = entity.get('name', 'Unknown')
|
453 |
entity_type = entity.get('type', 'UNKNOWN')
|
454 |
summary += f"\n• {name} ({entity_type})"
|
455 |
|
456 |
+
if len(entities) > 3:
|
457 |
+
summary += f"\n... and {len(entities) - 3} more"
|
458 |
|
459 |
+
# Create MCP-friendly response with size limits
|
460 |
response = {
|
461 |
"success": True,
|
462 |
+
"entities": entities[:15], # Limit entities for MCP
|
463 |
+
"relationships": relationships[:10], # Limit relationships for MCP
|
464 |
"diagram": ascii_diagram,
|
465 |
"summary": summary,
|
466 |
"stats": {
|
467 |
"entity_count": num_entities,
|
468 |
"relationship_count": num_relationships,
|
469 |
+
"content_length": min(len(content), 5000) # Cap reported length
|
470 |
}
|
471 |
}
|
472 |
|
473 |
+
# Validate and sanitize response for MCP compatibility
|
474 |
+
validated_response = validate_mcp_response(response)
|
475 |
+
|
476 |
+
# Return compact JSON
|
477 |
+
return json.dumps(validated_response, ensure_ascii=True, separators=(',', ':'))
|
478 |
|
479 |
except Exception as e:
|
480 |
error_msg = f"Analysis failed: {str(e)}"
|
481 |
+
return json.dumps({"success": False, "error": error_msg}, ensure_ascii=True, separators=(',', ':'))
|
482 |
|
483 |
# Create Gradio interface with error handling
|
484 |
try:
|
|
|
525 |
# Launch the demo
|
526 |
if __name__ == "__main__":
|
527 |
try:
|
528 |
+
# Check if running in HuggingFace Spaces or other cloud environment
|
529 |
+
is_hf_space = os.environ.get("SPACE_ID") is not None
|
530 |
+
|
531 |
+
if is_hf_space:
|
532 |
+
# HuggingFace Spaces configuration
|
533 |
+
demo.launch(
|
534 |
+
mcp_server=True,
|
535 |
+
share=False,
|
536 |
+
show_error=True,
|
537 |
+
quiet=False,
|
538 |
+
server_name="0.0.0.0", # Allow external connections
|
539 |
+
server_port=7860 # Standard HF port
|
540 |
+
)
|
541 |
+
else:
|
542 |
+
# Local development configuration
|
543 |
+
demo.launch(
|
544 |
+
mcp_server=True,
|
545 |
+
share=False,
|
546 |
+
show_error=True,
|
547 |
+
quiet=False
|
548 |
+
)
|
549 |
except Exception as e:
|
550 |
+
print(f"MCP Launch failed: {e}")
|
551 |
# Try without MCP server as fallback
|
552 |
try:
|
553 |
demo.launch(
|
554 |
mcp_server=False,
|
555 |
share=False,
|
556 |
+
show_error=True,
|
557 |
+
quiet=True
|
558 |
)
|
559 |
except Exception as e2:
|
560 |
print(f"Complete failure: {e2}")
|
561 |
+
# Create minimal working demo
|
562 |
+
def error_demo(text):
|
563 |
+
return json.dumps({"error": f"Server startup failed: {str(e2)}"}, ensure_ascii=True)
|
564 |
+
|
565 |
+
minimal_demo = gr.Interface(
|
566 |
+
fn=error_demo,
|
567 |
+
inputs="text",
|
568 |
+
outputs="text",
|
569 |
+
title="KG Builder - Error Mode"
|
570 |
+
)
|
571 |
+
minimal_demo.launch(share=False, quiet=True)
|