Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,8 @@ import json
|
|
4 |
import requests
|
5 |
from bs4 import BeautifulSoup
|
6 |
import networkx as nx
|
|
|
|
|
7 |
import matplotlib.pyplot as plt
|
8 |
import numpy as np
|
9 |
import io
|
@@ -17,8 +19,10 @@ import warnings
|
|
17 |
plt.rcParams['font.family'] = ['DejaVu Sans']
|
18 |
plt.rcParams['font.size'] = 10
|
19 |
plt.rcParams['font.weight'] = 'normal'
|
|
|
20 |
warnings.filterwarnings('ignore', category=UserWarning)
|
21 |
warnings.filterwarnings('ignore', message='.*Font family.*not found.*')
|
|
|
22 |
|
23 |
def clean_text_for_display(text):
|
24 |
"""Clean text to remove characters that might cause font issues."""
|
@@ -331,69 +335,96 @@ def build_knowledge_graph(entities_data):
|
|
331 |
|
332 |
return pil_image
|
333 |
|
334 |
-
def
|
335 |
"""Main function to build knowledge graph from URL or text.
|
336 |
|
337 |
Args:
|
338 |
url_or_text: URL to analyze or direct text input
|
339 |
|
340 |
Returns:
|
341 |
-
|
342 |
"""
|
343 |
try:
|
344 |
if not url_or_text or len(url_or_text.strip()) == 0:
|
345 |
-
return
|
346 |
|
347 |
# Step 1: Fetch content
|
348 |
content = fetch_content(url_or_text)
|
349 |
|
350 |
if content.startswith("Error"):
|
351 |
-
return json.dumps({"error": content})
|
352 |
|
353 |
# Step 2: Extract entities
|
354 |
entities_data = extract_entities(content)
|
355 |
|
356 |
-
# Step 3:
|
357 |
-
graph_image = build_knowledge_graph(entities_data)
|
358 |
-
|
359 |
-
# Step 4: Create summary
|
360 |
num_entities = len(entities_data.get("entities", []))
|
361 |
num_relationships = len(entities_data.get("relationships", []))
|
362 |
|
363 |
-
summary = f"""
|
364 |
|
365 |
-
|
366 |
- Entities found: {num_entities}
|
367 |
- Relationships found: {num_relationships}
|
368 |
- Content length: {len(content)} characters
|
369 |
|
370 |
-
|
371 |
|
372 |
for entity in entities_data.get("entities", [])[:8]: # Show first 8
|
373 |
name = entity.get('name', 'Unknown')
|
374 |
entity_type = entity.get('type', 'UNKNOWN')
|
375 |
desc = entity.get('description', 'No description')
|
376 |
-
summary += f"\n•
|
377 |
|
378 |
if len(entities_data.get("entities", [])) > 8:
|
379 |
-
summary += f"\n
|
380 |
-
|
381 |
-
#
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
|
387 |
-
return
|
388 |
|
389 |
except Exception as e:
|
390 |
error_msg = f"Analysis failed: {str(e)}"
|
391 |
-
|
|
|
392 |
|
393 |
# Create Gradio interface with error handling
|
394 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
demo = gr.Interface(
|
396 |
-
fn=
|
397 |
inputs=[
|
398 |
gr.Textbox(
|
399 |
label="URL or Text Input",
|
@@ -404,18 +435,16 @@ try:
|
|
404 |
],
|
405 |
outputs=[
|
406 |
gr.JSON(label="Extracted Entities & Relationships"),
|
407 |
-
gr.Image(label="Knowledge Graph Visualization", type="pil"),
|
408 |
gr.Markdown(label="Analysis Summary")
|
409 |
],
|
410 |
-
title="🧠
|
411 |
description="""
|
412 |
-
**Transform any text or webpage into
|
413 |
|
414 |
This tool:
|
415 |
1. 📖 Extracts content from URLs or analyzes your text
|
416 |
2. 🤖 Uses AI to identify entities and relationships
|
417 |
-
3.
|
418 |
-
4. 📊 Provides detailed analysis summaries
|
419 |
|
420 |
**Try with:** news articles, Wikipedia pages, or any text content
|
421 |
""",
|
@@ -430,18 +459,30 @@ except Exception as e:
|
|
430 |
def simple_demo(text):
|
431 |
return json.dumps({"error": f"Startup failed: {str(e)}"}), None, "Application failed to start properly."
|
432 |
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
445 |
|
446 |
# Launch the demo
|
447 |
if __name__ == "__main__":
|
|
|
4 |
import requests
|
5 |
from bs4 import BeautifulSoup
|
6 |
import networkx as nx
|
7 |
+
import matplotlib
|
8 |
+
matplotlib.use('Agg') # Use non-interactive backend
|
9 |
import matplotlib.pyplot as plt
|
10 |
import numpy as np
|
11 |
import io
|
|
|
19 |
plt.rcParams['font.family'] = ['DejaVu Sans']
|
20 |
plt.rcParams['font.size'] = 10
|
21 |
plt.rcParams['font.weight'] = 'normal'
|
22 |
+
plt.rcParams['figure.max_open_warning'] = 0 # Disable figure warnings
|
23 |
warnings.filterwarnings('ignore', category=UserWarning)
|
24 |
warnings.filterwarnings('ignore', message='.*Font family.*not found.*')
|
25 |
+
warnings.filterwarnings('ignore', message='.*Matplotlib.*')
|
26 |
|
27 |
def clean_text_for_display(text):
|
28 |
"""Clean text to remove characters that might cause font issues."""
|
|
|
335 |
|
336 |
return pil_image
|
337 |
|
338 |
+
def build_kg(url_or_text):
|
339 |
"""Main function to build knowledge graph from URL or text.
|
340 |
|
341 |
Args:
|
342 |
url_or_text: URL to analyze or direct text input
|
343 |
|
344 |
Returns:
|
345 |
+
String: JSON containing entities, relationships, and summary
|
346 |
"""
|
347 |
try:
|
348 |
if not url_or_text or len(url_or_text.strip()) == 0:
|
349 |
+
return json.dumps({"error": "Please provide some text or a URL to analyze."})
|
350 |
|
351 |
# Step 1: Fetch content
|
352 |
content = fetch_content(url_or_text)
|
353 |
|
354 |
if content.startswith("Error"):
|
355 |
+
return json.dumps({"error": content})
|
356 |
|
357 |
# Step 2: Extract entities
|
358 |
entities_data = extract_entities(content)
|
359 |
|
360 |
+
# Step 3: Create summary (no image for MCP compatibility)
|
|
|
|
|
|
|
361 |
num_entities = len(entities_data.get("entities", []))
|
362 |
num_relationships = len(entities_data.get("relationships", []))
|
363 |
|
364 |
+
summary = f"""Knowledge Graph Analysis Complete!
|
365 |
|
366 |
+
Statistics:
|
367 |
- Entities found: {num_entities}
|
368 |
- Relationships found: {num_relationships}
|
369 |
- Content length: {len(content)} characters
|
370 |
|
371 |
+
Extracted Entities:"""
|
372 |
|
373 |
for entity in entities_data.get("entities", [])[:8]: # Show first 8
|
374 |
name = entity.get('name', 'Unknown')
|
375 |
entity_type = entity.get('type', 'UNKNOWN')
|
376 |
desc = entity.get('description', 'No description')
|
377 |
+
summary += f"\n• {name} ({entity_type}): {desc}"
|
378 |
|
379 |
if len(entities_data.get("entities", [])) > 8:
|
380 |
+
summary += f"\n... and {len(entities_data.get('entities', [])) - 8} more entities"
|
381 |
+
|
382 |
+
# Create complete response
|
383 |
+
response = {
|
384 |
+
"success": True,
|
385 |
+
"entities": entities_data.get("entities", []),
|
386 |
+
"relationships": entities_data.get("relationships", []),
|
387 |
+
"summary": summary,
|
388 |
+
"statistics": {
|
389 |
+
"entity_count": num_entities,
|
390 |
+
"relationship_count": num_relationships,
|
391 |
+
"content_length": len(content)
|
392 |
+
}
|
393 |
+
}
|
394 |
|
395 |
+
return json.dumps(response, indent=2, ensure_ascii=True)
|
396 |
|
397 |
except Exception as e:
|
398 |
error_msg = f"Analysis failed: {str(e)}"
|
399 |
+
print(f"Function error: {error_msg}")
|
400 |
+
return json.dumps({"success": False, "error": error_msg})
|
401 |
|
402 |
# Create Gradio interface with error handling
|
403 |
try:
|
404 |
+
def parse_kg_output(json_result):
|
405 |
+
"""Parse the JSON result and return separate components for UI"""
|
406 |
+
try:
|
407 |
+
data = json.loads(json_result) if isinstance(json_result, str) else json_result
|
408 |
+
if data.get("success"):
|
409 |
+
entities_json = {
|
410 |
+
"entities": data.get("entities", []),
|
411 |
+
"relationships": data.get("relationships", [])
|
412 |
+
}
|
413 |
+
summary = data.get("summary", "No summary available")
|
414 |
+
return json.dumps(entities_json, indent=2), summary
|
415 |
+
else:
|
416 |
+
error_msg = data.get("error", "Unknown error")
|
417 |
+
return json.dumps({"error": error_msg}), f"Error: {error_msg}"
|
418 |
+
except Exception as e:
|
419 |
+
return json.dumps({"error": str(e)}), f"Parse error: {str(e)}"
|
420 |
+
|
421 |
+
def kg_interface(url_or_text):
|
422 |
+
"""Interface wrapper for the knowledge graph builder"""
|
423 |
+
result = build_kg(url_or_text)
|
424 |
+
return parse_kg_output(result)
|
425 |
+
|
426 |
demo = gr.Interface(
|
427 |
+
fn=kg_interface,
|
428 |
inputs=[
|
429 |
gr.Textbox(
|
430 |
label="URL or Text Input",
|
|
|
435 |
],
|
436 |
outputs=[
|
437 |
gr.JSON(label="Extracted Entities & Relationships"),
|
|
|
438 |
gr.Markdown(label="Analysis Summary")
|
439 |
],
|
440 |
+
title="🧠 KG Builder",
|
441 |
description="""
|
442 |
+
**Transform any text or webpage into knowledge graphs!**
|
443 |
|
444 |
This tool:
|
445 |
1. 📖 Extracts content from URLs or analyzes your text
|
446 |
2. 🤖 Uses AI to identify entities and relationships
|
447 |
+
3. 📊 Provides detailed analysis summaries
|
|
|
448 |
|
449 |
**Try with:** news articles, Wikipedia pages, or any text content
|
450 |
""",
|
|
|
459 |
def simple_demo(text):
|
460 |
return json.dumps({"error": f"Startup failed: {str(e)}"}), None, "Application failed to start properly."
|
461 |
|
462 |
+
try:
|
463 |
+
demo = gr.Interface(
|
464 |
+
fn=simple_demo,
|
465 |
+
inputs=[gr.Textbox(label="Input", placeholder="Enter text...")],
|
466 |
+
outputs=[
|
467 |
+
gr.JSON(label="Error Output"),
|
468 |
+
gr.Image(label="No Image", type="filepath"),
|
469 |
+
gr.Markdown(label="Error Message")
|
470 |
+
],
|
471 |
+
title="⚠️ KG Builder - Startup Error",
|
472 |
+
allow_flagging="never",
|
473 |
+
cache_examples=False
|
474 |
+
)
|
475 |
+
except Exception as e2:
|
476 |
+
# Ultimate fallback - minimal interface
|
477 |
+
def minimal_demo(text):
|
478 |
+
return {"error": str(e2)}
|
479 |
+
|
480 |
+
demo = gr.Interface(
|
481 |
+
fn=minimal_demo,
|
482 |
+
inputs="text",
|
483 |
+
outputs="json",
|
484 |
+
title="KG Builder Error"
|
485 |
+
)
|
486 |
|
487 |
# Launch the demo
|
488 |
if __name__ == "__main__":
|