VirtualOasis commited on
Commit
804b6dc
·
verified ·
1 Parent(s): b914d47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -551
app.py CHANGED
@@ -1,587 +1,193 @@
1
- import gradio as gr
2
  import os
3
  import json
4
  import requests
5
  from bs4 import BeautifulSoup
6
- import networkx as nx
7
- import matplotlib
8
- matplotlib.use('Agg') # Use non-interactive backend
9
- import matplotlib.pyplot as plt
10
- import numpy as np
11
- import io
12
- import base64
13
- from huggingface_hub import InferenceClient
14
- import re
15
  from urllib.parse import urlparse
16
- import warnings
17
-
18
- # Configure matplotlib for better font handling
19
- plt.rcParams['font.family'] = ['DejaVu Sans']
20
- plt.rcParams['font.size'] = 10
21
- plt.rcParams['font.weight'] = 'normal'
22
- plt.rcParams['figure.max_open_warning'] = 0 # Disable figure warnings
23
- warnings.filterwarnings('ignore', category=UserWarning)
24
- warnings.filterwarnings('ignore', message='.*Font family.*not found.*')
25
- warnings.filterwarnings('ignore', message='.*Matplotlib.*')
26
-
27
- def clean_text_for_display(text):
28
- """Clean text to remove characters that might cause font issues."""
29
- if not isinstance(text, str):
30
- return str(text)
31
-
32
- # Remove or replace problematic characters
33
- text = re.sub(r'[^\x00-\x7F]+', '', text) # Remove non-ASCII characters
34
- text = re.sub(r'\s+', ' ', text).strip() # Normalize whitespace
35
- return text[:50] if len(text) > 50 else text # Limit length for display
36
 
37
- def fetch_content(url_or_text):
38
- """Fetch content from URL or return text directly.
39
-
40
- Args:
41
- url_or_text: Either a URL to fetch content from, or direct text input
42
-
43
- Returns:
44
- Extracted text content
45
- """
46
- try:
47
- # Check if input looks like a URL
48
- parsed = urlparse(url_or_text)
49
- if parsed.scheme in ['http', 'https']:
50
- try:
51
- headers = {
52
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
53
- }
54
- response = requests.get(url_or_text, headers=headers, timeout=10)
55
- response.raise_for_status()
56
-
57
- # Parse HTML and extract text
58
- soup = BeautifulSoup(response.content, 'html.parser')
59
-
60
- # Remove script and style elements
61
- for script in soup(["script", "style"]):
62
- script.decompose()
63
-
64
- # Get text and clean it up
65
- text = soup.get_text()
66
- lines = (line.strip() for line in text.splitlines())
67
- chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
68
- text = ' '.join(chunk for chunk in chunks if chunk)
69
-
70
- return text[:5000] # Limit to first 5000 characters
71
- except Exception as e:
72
- return f"Error fetching URL: {str(e)}"
73
- else:
74
- # It's direct text input
75
- return url_or_text
76
- except Exception as e:
77
- return f"Error processing input: {str(e)}"
78
 
79
- def simple_entity_extraction(text):
80
- """Fallback entity extraction when AI is not available."""
81
  try:
82
- words = text.split()
83
- entities = []
84
-
85
- # Simple heuristic: words that are capitalized and longer than 2 characters
86
- seen = set()
87
- for word in words[:30]: # Limit to first 30 words
88
- clean_word = re.sub(r'[^\w]', '', word)
89
- if (clean_word.istitle() and len(clean_word) > 2 and
90
- clean_word.lower() not in seen and
91
- clean_word not in ['The', 'This', 'That', 'When', 'Where', 'How']):
92
- entities.append({
93
- "name": clean_text_for_display(clean_word),
94
- "type": "CONCEPT",
95
- "description": "Auto-detected entity"
96
- })
97
- seen.add(clean_word.lower())
98
-
99
- # Create some basic relationships
100
- relationships = []
101
- if len(entities) > 1:
102
- for i in range(min(len(entities) - 1, 5)): # Max 5 relationships
103
- relationships.append({
104
- "source": entities[i]["name"],
105
- "target": entities[i + 1]["name"],
106
- "relation": "related_to",
107
- "description": "Sequential relationship"
108
- })
109
-
110
- return {"entities": entities[:10], "relationships": relationships}
111
- except Exception as e:
112
- return {
113
- "entities": [{"name": "Error", "type": "ERROR", "description": str(e)}],
114
- "relationships": []
115
  }
116
-
117
- def extract_entities(text):
118
- """Extract entities and relationships using Mistral AI with fallback.
119
-
120
- Args:
121
- text: Input text to analyze
122
 
123
- Returns:
124
- Dictionary containing entities and relationships
125
- """
126
- try:
127
- # Check if HF_TOKEN is available
128
- hf_token = os.environ.get("HF_TOKEN")
129
- if not hf_token:
130
- print("No HF_TOKEN found, using simple extraction")
131
- return simple_entity_extraction(text)
132
 
133
- client = InferenceClient(
134
- provider="together",
135
- api_key=hf_token,
136
- )
137
 
138
- prompt = f"""
139
- Analyze the following text and extract:
140
- 1. Named entities (people, organizations, locations, concepts)
141
- 2. Relationships between these entities
142
 
143
- Return ONLY a valid JSON object with this structure:
144
- {{
145
- "entities": [
146
- {{"name": "entity_name", "type": "PERSON", "description": "brief description"}}
147
- ],
148
- "relationships": [
149
- {{"source": "entity1", "target": "entity2", "relation": "relationship_type", "description": "brief description"}}
150
- ]
151
- }}
152
 
153
- Text to analyze: {text[:1500]}
154
- """
155
-
156
- completion = client.chat.completions.create(
157
- model="mistralai/Mistral-Small-24B-Instruct-2501",
158
- messages=[{"role": "user", "content": prompt}],
159
- max_tokens=1000,
160
- temperature=0.1,
161
- )
162
-
163
- response_text = completion.choices[0].message.content
164
-
165
- # Clean and extract JSON
166
- json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
167
- if json_match:
168
- json_str = json_match.group()
169
- # Clean the JSON string
170
- json_str = re.sub(r'[\x00-\x1f\x7f-\x9f]', '', json_str) # Remove control characters
171
-
172
- parsed_data = json.loads(json_str)
173
-
174
- # Clean entity names for display
175
- if "entities" in parsed_data:
176
- for entity in parsed_data["entities"]:
177
- if "name" in entity:
178
- entity["name"] = clean_text_for_display(entity["name"])
179
-
180
- return parsed_data
181
- else:
182
- print("No valid JSON found in AI response, using fallback")
183
- return simple_entity_extraction(text)
184
-
185
  except Exception as e:
186
- print(f"AI extraction failed: {e}, using fallback")
187
- return simple_entity_extraction(text)
188
 
189
- def build_knowledge_graph(entities_data):
190
- """Build and visualize knowledge graph.
191
 
192
- Args:
193
- entities_data: Dictionary containing entities and relationships
194
-
195
- Returns:
196
- PIL Image object of the knowledge graph
197
- """
198
- try:
199
- # Create networkx graph
200
- G = nx.Graph()
201
-
202
- # Add nodes (entities)
203
- entities = entities_data.get("entities", [])
204
- for entity in entities[:15]: # Limit to 15 entities for better visualization
205
- clean_name = clean_text_for_display(entity.get("name", "Unknown"))
206
- if clean_name and len(clean_name.strip()) > 0:
207
- G.add_node(clean_name,
208
- type=entity.get("type", "UNKNOWN"),
209
- description=entity.get("description", ""))
210
-
211
- # Add edges (relationships)
212
- relationships = entities_data.get("relationships", [])
213
- for rel in relationships:
214
- source = clean_text_for_display(rel.get("source", ""))
215
- target = clean_text_for_display(rel.get("target", ""))
216
- if source in G.nodes and target in G.nodes:
217
- G.add_edge(source, target,
218
- relation=rel.get("relation", "related"),
219
- description=rel.get("description", ""))
220
-
221
- # If no relationships found, create some connections between entities
222
- if len(relationships) == 0 and len(list(G.nodes())) > 1:
223
- node_list = list(G.nodes())
224
- for i in range(min(len(node_list) - 1, 5)):
225
- G.add_edge(node_list[i], node_list[i + 1], relation="related")
226
-
227
- # Create visualization
228
- fig, ax = plt.subplots(figsize=(10, 8))
229
-
230
- # Skip if no nodes
231
- if len(G.nodes()) == 0:
232
- ax.text(0.5, 0.5, "No entities found to visualize",
233
- ha='center', va='center', fontsize=14, transform=ax.transAxes)
234
- ax.set_title("Knowledge Graph")
235
- ax.axis('off')
236
- else:
237
- # Position nodes using spring layout
238
- pos = nx.spring_layout(G, k=1, iterations=50)
239
-
240
- # Color nodes by type
241
- node_colors = []
242
- type_colors = {
243
- "PERSON": "#FF6B6B",
244
- "ORG": "#4ECDC4",
245
- "LOCATION": "#45B7D1",
246
- "CONCEPT": "#96CEB4",
247
- "ERROR": "#FF0000",
248
- "UNKNOWN": "#DDA0DD"
249
- }
250
-
251
- for node in G.nodes():
252
- node_type = G.nodes[node].get('type', 'UNKNOWN')
253
- node_colors.append(type_colors.get(node_type, "#DDA0DD"))
254
-
255
- # Draw the graph
256
- nx.draw(G, pos,
257
- node_color=node_colors,
258
- node_size=800,
259
- font_size=8,
260
- font_weight='bold',
261
- with_labels=True,
262
- edge_color='gray',
263
- width=1.5,
264
- alpha=0.8,
265
- ax=ax)
266
-
267
- # Add title
268
- ax.set_title("Knowledge Graph", size=14, weight='bold')
269
-
270
- # Convert to PIL Image
271
- fig.canvas.draw()
272
-
273
- # Handle different matplotlib versions
274
- try:
275
- # Try newer method first
276
- img_array = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
277
- img_array = img_array.reshape(fig.canvas.get_width_height()[::-1] + (4,))
278
- # Convert RGBA to RGB
279
- img_array = img_array[:, :, :3]
280
- except AttributeError:
281
- try:
282
- # Fallback to older method
283
- img_array = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
284
- img_array = img_array.reshape(fig.canvas.get_width_height()[::-1] + (3,))
285
- except AttributeError:
286
- # Final fallback - save to buffer
287
- buf = io.BytesIO()
288
- fig.savefig(buf, format='png', bbox_inches='tight')
289
- buf.seek(0)
290
- from PIL import Image
291
- pil_image = Image.open(buf).convert('RGB')
292
- plt.close(fig)
293
- return pil_image
294
-
295
- from PIL import Image
296
- pil_image = Image.fromarray(img_array)
297
- plt.close(fig)
298
-
299
- return pil_image
300
-
301
- except Exception as e:
302
- # Create simple error image
303
- fig, ax = plt.subplots(figsize=(8, 6))
304
- ax.text(0.5, 0.5, f"Error creating graph",
305
- ha='center', va='center', fontsize=12, transform=ax.transAxes)
306
- ax.set_title("Knowledge Graph Error")
307
- ax.axis('off')
308
-
309
- # Handle different matplotlib versions for error image
310
- try:
311
- # Try newer method first
312
- fig.canvas.draw()
313
- img_array = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
314
- img_array = img_array.reshape(fig.canvas.get_width_height()[::-1] + (4,))
315
- img_array = img_array[:, :, :3] # Convert RGBA to RGB
316
- except AttributeError:
317
- try:
318
- # Fallback to older method
319
- fig.canvas.draw()
320
- img_array = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
321
- img_array = img_array.reshape(fig.canvas.get_width_height()[::-1] + (3,))
322
- except AttributeError:
323
- # Final fallback - save to buffer
324
- buf = io.BytesIO()
325
- fig.savefig(buf, format='png', bbox_inches='tight')
326
- buf.seek(0)
327
- from PIL import Image
328
- pil_image = Image.open(buf).convert('RGB')
329
- plt.close(fig)
330
- return pil_image
331
-
332
- from PIL import Image
333
- pil_image = Image.fromarray(img_array)
334
- plt.close(fig)
335
-
336
- return pil_image
337
-
338
- def build_ascii_diagram(entities, relationships):
339
- """Create simple ASCII diagram of knowledge graph"""
340
- if not entities:
341
- return "No entities to visualize"
342
-
343
- diagram = "KNOWLEDGE GRAPH DIAGRAM:\n"
344
- diagram += "=" * 30 + "\n\n" # Reduced line length
345
-
346
- # Show entities by type
347
- entity_types = {}
348
- for entity in entities: # Already limited by caller
349
- etype = entity.get("type", "UNKNOWN")
350
- if etype not in entity_types:
351
- entity_types[etype] = []
352
- entity_types[etype].append(entity.get("name", "Unknown"))
353
 
354
- for etype, names in entity_types.items():
355
- diagram += f"{etype}:\n" # Removed emoji for MCP compatibility
356
- for name in names:
357
- diagram += f" - {name}\n"
358
- diagram += "\n"
359
 
360
- # Show relationships
361
- if relationships:
362
- diagram += "RELATIONSHIPS:\n" # Removed emoji for MCP compatibility
363
- for rel in relationships: # Already limited by caller
364
- source = rel.get("source", "?")
365
- target = rel.get("target", "?")
366
- relation = rel.get("relation", "related")
367
- diagram += f" {source} -> {target} ({relation})\n"
368
 
369
- return diagram
370
-
371
- def validate_mcp_response(response_data):
372
- """Validate and sanitize response for MCP compatibility"""
373
  try:
374
- # Ensure all string values are ASCII-safe
375
- def sanitize_strings(obj):
376
- if isinstance(obj, dict):
377
- return {k: sanitize_strings(v) for k, v in obj.items()}
378
- elif isinstance(obj, list):
379
- return [sanitize_strings(item) for item in obj]
380
- elif isinstance(obj, str):
381
- # Remove non-ASCII characters and control characters
382
- return re.sub(r'[^\x20-\x7E\n\r\t]', '', obj)
383
- else:
384
- return obj
385
 
386
- sanitized = sanitize_strings(response_data)
387
 
388
- # Test JSON serialization
389
- test_json = json.dumps(sanitized, ensure_ascii=True, separators=(',', ':'))
 
 
 
 
390
 
391
- # Size check
392
- if len(test_json) > 100000: # 100KB hard limit
393
- # Drastically reduce content
394
- sanitized["entities"] = sanitized.get("entities", [])[:5]
395
- sanitized["relationships"] = sanitized.get("relationships", [])[:3]
396
- sanitized["diagram"] = "Knowledge graph generated (content reduced for MCP)"
397
-
398
- return sanitized
399
 
 
 
 
 
 
 
 
 
400
  except Exception as e:
401
  return {
402
- "success": False,
403
- "error": f"Response validation failed: {str(e)}",
404
  "entities": [],
405
  "relationships": [],
406
- "diagram": "Error generating diagram",
407
- "summary": "Analysis failed during response validation"
408
  }
409
 
410
- def build_kg(url_or_text):
411
- """Main function to build knowledge graph from URL or text.
412
 
413
- Args:
414
- url_or_text: URL to analyze or direct text input
415
-
416
- Returns:
417
- String: Simple JSON response optimized for MCP streaming
418
- """
419
- try:
420
- # Quick validation
421
- if not url_or_text or len(url_or_text.strip()) == 0:
422
- return '{"error":"Please provide text or URL to analyze"}'
423
-
424
- # Limit input size immediately to prevent timeouts
425
- input_text = url_or_text[:2000] if len(url_or_text) > 2000 else url_or_text
426
-
427
- # Step 1: Fetch content (with timeout protection)
428
- try:
429
- content = fetch_content(input_text)
430
- if content.startswith("Error"):
431
- return f'{{"error":"{content}"}}'
432
- except Exception:
433
- content = input_text # Use input directly if fetch fails
434
-
435
- # Limit content size for fast processing
436
- content = content[:1500] if len(content) > 1500 else content
437
-
438
- # Step 2: Quick entity extraction (simplified for speed)
439
- try:
440
- entities_data = simple_entity_extraction(content) # Always use simple extraction for MCP
441
- except Exception:
442
- entities_data = {"entities": [], "relationships": []}
443
-
444
- # Step 3: Minimal response
445
- entities = entities_data.get("entities", [])[:5] # Max 5 entities
446
- relationships = entities_data.get("relationships", [])[:3] # Max 3 relationships
447
-
448
- # Create minimal ASCII summary
449
- diagram_parts = []
450
- if entities:
451
- diagram_parts.append("ENTITIES:")
452
- for entity in entities:
453
- name = str(entity.get("name", "Unknown"))[:20] # Truncate names
454
- diagram_parts.append(f" - {name}")
455
-
456
- if relationships:
457
- diagram_parts.append("RELATIONSHIPS:")
458
- for rel in relationships:
459
- source = str(rel.get("source", ""))[:15]
460
- target = str(rel.get("target", ""))[:15]
461
- diagram_parts.append(f" {source} -> {target}")
462
-
463
- diagram = "\n".join(diagram_parts) if diagram_parts else "No entities found"
464
-
465
- # Ultra-minimal response
466
- response = {
467
- "success": True,
468
- "entity_count": len(entities),
469
- "relationship_count": len(relationships),
470
- "entities": [{"name": e.get("name", "")[:20], "type": e.get("type", "UNKNOWN")} for e in entities],
471
- "relationships": [{"source": r.get("source", "")[:15], "target": r.get("target", "")[:15]} for r in relationships],
472
- "diagram": diagram[:500] # Strict limit
473
  }
474
-
475
- # Return ultra-compact JSON
476
- return json.dumps(response, separators=(',', ':'))[:2000] # Hard size limit
477
-
478
- except Exception as e:
479
- # Ultra-simple error response
480
- error_msg = str(e)[:100] # Truncate error message
481
- return f'{{"success":false,"error":"{error_msg}"}}'
482
-
483
- # Wrapper function with timeout protection for MCP
484
- def mcp_safe_build_kg(url_or_text):
485
- """MCP-safe wrapper with timeout protection"""
486
- try:
487
- import signal
488
- import functools
489
-
490
- def timeout_handler(signum, frame):
491
- raise TimeoutError("Function timed out")
492
-
493
- # Set timeout for 10 seconds
494
- signal.signal(signal.SIGALRM, timeout_handler)
495
- signal.alarm(10)
496
-
497
- try:
498
- result = build_kg(url_or_text)
499
- signal.alarm(0) # Cancel timeout
500
- return result
501
- except TimeoutError:
502
- return '{"success":false,"error":"Request timed out"}'
503
- except Exception as e:
504
- signal.alarm(0) # Cancel timeout
505
- return f'{{"success":false,"error":"Function error: {str(e)[:50]}"}}'
506
-
507
- except Exception:
508
- # Fallback if signal not available (Windows, etc.)
509
- try:
510
- return build_kg(url_or_text)
511
- except Exception as e:
512
- return f'{{"success":false,"error":"Fallback error: {str(e)[:50]}"}}'
513
-
514
- # Create Gradio interface with error handling
515
- try:
516
- demo = gr.Interface(
517
- fn=mcp_safe_build_kg, # Use the timeout-protected version
518
- inputs=gr.Textbox(
519
- label="Input Text or URL",
520
- placeholder="Enter text to analyze or paste a URL...",
521
- max_lines=5
522
- ),
523
- outputs=gr.Textbox(
524
- label="Knowledge Graph JSON",
525
- show_copy_button=True
526
- ),
527
- title="KG Builder - MCP Edition",
528
- description="Lightweight knowledge graph builder optimized for MCP servers.",
529
- allow_flagging="never",
530
- cache_examples=False
531
- )
532
 
533
- except Exception as e:
534
- print(f"Failed to create Gradio interface: {e}")
535
- # Create minimal fallback
536
- def error_demo(text):
537
- return f'{{"error":"Interface creation failed: {str(e)[:100]}"}}'
538
-
539
- demo = gr.Interface(
540
- fn=error_demo,
541
- inputs="text",
542
- outputs="text",
543
- title="KG Builder - Error Mode",
544
- allow_flagging="never"
545
- )
546
 
547
- # Launch the demo
548
- if __name__ == "__main__":
549
- print("Starting KG Builder MCP Server...")
 
 
 
 
 
 
 
 
 
 
550
 
551
- try:
552
- demo.launch(
553
- mcp_server=True,
554
- share=False,
555
- show_error=False, # Reduce error verbosity for MCP
556
- quiet=True, # Reduce logging to prevent SSE issues
557
- server_name="0.0.0.0",
558
- server_port=7860,
559
- max_threads=1, # Limit concurrency to prevent resource issues
560
- show_api=False # Disable API docs to reduce overhead
561
- )
562
- except Exception as e:
563
- print(f"MCP server launch failed: {e}")
564
- print("Trying fallback mode...")
565
- try:
566
- # Fallback without MCP
567
- demo.launch(
568
- mcp_server=False,
569
- share=False,
570
- quiet=True,
571
- show_error=False
572
- )
573
- except Exception as e2:
574
- print(f"All launch attempts failed: {e2}")
575
- print("Creating emergency fallback...")
576
-
577
- # Create absolute minimal demo
578
- def emergency_demo(text):
579
- return '{"error":"Server in emergency mode"}'
580
-
581
- emergency = gr.Interface(
582
- fn=emergency_demo,
583
- inputs="text",
584
- outputs="text",
585
- title="KG Builder Emergency Mode"
586
- )
587
- emergency.launch(quiet=True, share=False)
 
 
1
  import os
2
  import json
3
  import requests
4
  from bs4 import BeautifulSoup
 
 
 
 
 
 
 
 
 
5
  from urllib.parse import urlparse
6
+ import gradio as gr
7
+ from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Initialize Mistral client
10
+ client = InferenceClient(
11
+ provider="together",
12
+ api_key=os.environ.get("HF_TOKEN", ""),
13
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ def extract_text_from_url(url):
16
+ """Extract text content from a web URL."""
17
  try:
18
+ headers = {
19
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  }
21
+ response = requests.get(url, headers=headers, timeout=10)
22
+ response.raise_for_status()
 
 
 
 
23
 
24
+ soup = BeautifulSoup(response.content, 'html.parser')
 
 
 
 
 
 
 
 
25
 
26
+ # Remove script and style elements
27
+ for script in soup(["script", "style"]):
28
+ script.decompose()
 
29
 
30
+ # Get text content
31
+ text = soup.get_text()
 
 
32
 
33
+ # Clean up text
34
+ lines = (line.strip() for line in text.splitlines())
35
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
36
+ text = ' '.join(chunk for chunk in chunks if chunk)
 
 
 
 
 
37
 
38
+ return text[:5000] # Limit to first 5000 characters
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  except Exception as e:
40
+ return f"Error fetching URL: {str(e)}"
 
41
 
42
+ def extract_entities_and_relationships(text):
43
+ """Use Mistral to extract entities and relationships from text."""
44
 
45
+ entity_prompt = f"""
46
+ Analyze the following text and extract key entities and their relationships.
47
+ Return the result as a JSON object with this exact structure:
48
+ {{
49
+ "entities": [
50
+ {{"name": "entity_name", "type": "PERSON|ORGANIZATION|LOCATION|CONCEPT|EVENT|OTHER", "description": "brief description"}}
51
+ ],
52
+ "relationships": [
53
+ {{"source": "entity1", "target": "entity2", "relationship": "relationship_type", "description": "brief description"}}
54
+ ]
55
+ }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ Text to analyze:
58
+ {text[:3000]}
 
 
 
59
 
60
+ Please provide only the JSON response without any additional text or formatting.
61
+ """
 
 
 
 
 
 
62
 
 
 
 
 
63
  try:
64
+ completion = client.chat.completions.create(
65
+ model="mistralai/Mistral-Small-24B-Instruct-2501",
66
+ messages=[
67
+ {
68
+ "role": "user",
69
+ "content": entity_prompt
70
+ }
71
+ ],
72
+ max_tokens=2000,
73
+ temperature=0.3
74
+ )
75
 
76
+ response_text = completion.choices[0].message.content.strip()
77
 
78
+ # Try to parse JSON from the response
79
+ # Sometimes the model might return JSON wrapped in markdown code blocks
80
+ if response_text.startswith('```'):
81
+ response_text = response_text.split('```')[1]
82
+ if response_text.startswith('json'):
83
+ response_text = response_text[4:]
84
 
85
+ result = json.loads(response_text)
86
+ return result
 
 
 
 
 
 
87
 
88
+ except json.JSONDecodeError as e:
89
+ # If JSON parsing fails, return a structured error
90
+ return {
91
+ "entities": [],
92
+ "relationships": [],
93
+ "error": f"Failed to parse LLM response as JSON: {str(e)}",
94
+ "raw_response": response_text
95
+ }
96
  except Exception as e:
97
  return {
 
 
98
  "entities": [],
99
  "relationships": [],
100
+ "error": f"Error calling Mistral API: {str(e)}"
 
101
  }
102
 
103
+ def build_knowledge_graph(input_text):
104
+ """Main function to build knowledge graph from text or URL."""
105
 
106
+ if not input_text.strip():
107
+ return json.dumps({
108
+ "error": "Please provide text or a valid URL",
109
+ "knowledge_graph": None
110
+ }, indent=2)
111
+
112
+ # Check if input is a URL
113
+ parsed = urlparse(input_text.strip())
114
+ is_url = parsed.scheme in ('http', 'https') and parsed.netloc
115
+
116
+ if is_url:
117
+ # Extract text from URL
118
+ extracted_text = extract_text_from_url(input_text.strip())
119
+ if extracted_text.startswith("Error fetching URL"):
120
+ return json.dumps({
121
+ "error": extracted_text,
122
+ "knowledge_graph": None
123
+ }, indent=2)
124
+ source_type = "url"
125
+ source = input_text.strip()
126
+ content = extracted_text
127
+ else:
128
+ # Use provided text directly
129
+ source_type = "text"
130
+ source = "direct_input"
131
+ content = input_text.strip()
132
+
133
+ # Extract entities and relationships using Mistral
134
+ kg_data = extract_entities_and_relationships(content)
135
+
136
+ # Build the final knowledge graph structure
137
+ knowledge_graph = {
138
+ "source": {
139
+ "type": source_type,
140
+ "value": source,
141
+ "content_preview": content[:200] + "..." if len(content) > 200 else content
142
+ },
143
+ "knowledge_graph": {
144
+ "entities": kg_data.get("entities", []),
145
+ "relationships": kg_data.get("relationships", []),
146
+ "entity_count": len(kg_data.get("entities", [])),
147
+ "relationship_count": len(kg_data.get("relationships", []))
148
+ },
149
+ "metadata": {
150
+ "model": "mistralai/Mistral-Small-24B-Instruct-2501",
151
+ "content_length": len(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  }
153
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
+ # Add any errors from the extraction process
156
+ if "error" in kg_data:
157
+ knowledge_graph["extraction_error"] = kg_data["error"]
158
+ if "raw_response" in kg_data:
159
+ knowledge_graph["raw_llm_response"] = kg_data["raw_response"]
160
+
161
+ return json.dumps(knowledge_graph, indent=2, ensure_ascii=False)
 
 
 
 
 
 
162
 
163
+ # Create Gradio interface
164
+ demo = gr.Interface(
165
+ fn=build_knowledge_graph,
166
+ inputs=gr.Textbox(
167
+ label="Text or URL Input",
168
+ placeholder="Enter text to analyze or a web URL (e.g., https://example.com)",
169
+ lines=5,
170
+ max_lines=10
171
+ ),
172
+ outputs=gr.JSON(label="Knowledge Graph"),
173
+ title="🧠 Knowledge Graph Builder",
174
+ description="""
175
+ **Build Knowledge Graphs with AI**
176
 
177
+ This tool uses Mistral AI to extract entities and relationships from text or web content:
178
+
179
+ • **Text Input**: Paste any text to analyze
180
+ • **URL Input**: Provide a web URL to extract and analyze content
181
+ **Output**: Structured JSON knowledge graph for LLM agents
182
+
183
+ The output includes entities (people, organizations, locations, concepts) and their relationships, formatted for easy consumption by AI agents.
184
+ """,
185
+ examples=[
186
+ ["Apple Inc. was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in 1976. The company is headquartered in Cupertino, California."],
187
+ ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
188
+ ],
189
+ cache_examples=False,
190
+ theme=gr.themes.Soft()
191
+ )
192
+
193
+ demo.launch(mcp_server=True)