VirtualOasis commited on
Commit
7c56cb5
·
verified ·
1 Parent(s): 804b6dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -60
app.py CHANGED
@@ -42,6 +42,13 @@ def extract_text_from_url(url):
42
  def extract_entities_and_relationships(text):
43
  """Use Mistral to extract entities and relationships from text."""
44
 
 
 
 
 
 
 
 
45
  entity_prompt = f"""
46
  Analyze the following text and extract key entities and their relationships.
47
  Return the result as a JSON object with this exact structure:
@@ -70,19 +77,44 @@ def extract_entities_and_relationships(text):
70
  }
71
  ],
72
  max_tokens=2000,
73
- temperature=0.3
 
74
  )
75
 
 
 
 
 
 
 
 
76
  response_text = completion.choices[0].message.content.strip()
77
 
78
  # Try to parse JSON from the response
79
  # Sometimes the model might return JSON wrapped in markdown code blocks
80
  if response_text.startswith('```'):
81
- response_text = response_text.split('```')[1]
82
- if response_text.startswith('json'):
83
- response_text = response_text[4:]
 
 
 
 
 
 
 
84
 
85
  result = json.loads(response_text)
 
 
 
 
 
 
 
 
 
 
86
  return result
87
 
88
  except json.JSONDecodeError as e:
@@ -91,7 +123,7 @@ def extract_entities_and_relationships(text):
91
  "entities": [],
92
  "relationships": [],
93
  "error": f"Failed to parse LLM response as JSON: {str(e)}",
94
- "raw_response": response_text
95
  }
96
  except Exception as e:
97
  return {
@@ -103,66 +135,79 @@ def extract_entities_and_relationships(text):
103
  def build_knowledge_graph(input_text):
104
  """Main function to build knowledge graph from text or URL."""
105
 
106
- if not input_text.strip():
107
- return json.dumps({
108
- "error": "Please provide text or a valid URL",
109
- "knowledge_graph": None
110
- }, indent=2)
111
-
112
- # Check if input is a URL
113
- parsed = urlparse(input_text.strip())
114
- is_url = parsed.scheme in ('http', 'https') and parsed.netloc
115
-
116
- if is_url:
117
- # Extract text from URL
118
- extracted_text = extract_text_from_url(input_text.strip())
119
- if extracted_text.startswith("Error fetching URL"):
120
- return json.dumps({
121
- "error": extracted_text,
122
  "knowledge_graph": None
123
- }, indent=2)
124
- source_type = "url"
125
- source = input_text.strip()
126
- content = extracted_text
127
- else:
128
- # Use provided text directly
129
- source_type = "text"
130
- source = "direct_input"
131
- content = input_text.strip()
132
-
133
- # Extract entities and relationships using Mistral
134
- kg_data = extract_entities_and_relationships(content)
135
-
136
- # Build the final knowledge graph structure
137
- knowledge_graph = {
138
- "source": {
139
- "type": source_type,
140
- "value": source,
141
- "content_preview": content[:200] + "..." if len(content) > 200 else content
142
- },
143
- "knowledge_graph": {
144
- "entities": kg_data.get("entities", []),
145
- "relationships": kg_data.get("relationships", []),
146
- "entity_count": len(kg_data.get("entities", [])),
147
- "relationship_count": len(kg_data.get("relationships", []))
148
- },
149
- "metadata": {
150
- "model": "mistralai/Mistral-Small-24B-Instruct-2501",
151
- "content_length": len(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  }
153
- }
154
-
155
- # Add any errors from the extraction process
156
- if "error" in kg_data:
157
- knowledge_graph["extraction_error"] = kg_data["error"]
158
- if "raw_response" in kg_data:
159
- knowledge_graph["raw_llm_response"] = kg_data["raw_response"]
160
-
161
- return json.dumps(knowledge_graph, indent=2, ensure_ascii=False)
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  # Create Gradio interface
164
  demo = gr.Interface(
165
- fn=build_knowledge_graph,
166
  inputs=gr.Textbox(
167
  label="Text or URL Input",
168
  placeholder="Enter text to analyze or a web URL (e.g., https://example.com)",
@@ -190,4 +235,17 @@ demo = gr.Interface(
190
  theme=gr.themes.Soft()
191
  )
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  demo.launch(mcp_server=True)
 
42
  def extract_entities_and_relationships(text):
43
  """Use Mistral to extract entities and relationships from text."""
44
 
45
+ if not client.api_key:
46
+ return {
47
+ "entities": [],
48
+ "relationships": [],
49
+ "error": "HF_TOKEN environment variable not set"
50
+ }
51
+
52
  entity_prompt = f"""
53
  Analyze the following text and extract key entities and their relationships.
54
  Return the result as a JSON object with this exact structure:
 
77
  }
78
  ],
79
  max_tokens=2000,
80
+ temperature=0.3,
81
+ timeout=30
82
  )
83
 
84
+ if not completion.choices or not completion.choices[0].message:
85
+ return {
86
+ "entities": [],
87
+ "relationships": [],
88
+ "error": "Empty response from Mistral API"
89
+ }
90
+
91
  response_text = completion.choices[0].message.content.strip()
92
 
93
  # Try to parse JSON from the response
94
  # Sometimes the model might return JSON wrapped in markdown code blocks
95
  if response_text.startswith('```'):
96
+ lines = response_text.split('\n')
97
+ start_idx = 1
98
+ if lines[0].strip() == '```json':
99
+ start_idx = 1
100
+ end_idx = len(lines) - 1
101
+ for i in range(len(lines)-1, 0, -1):
102
+ if lines[i].strip() == '```':
103
+ end_idx = i
104
+ break
105
+ response_text = '\n'.join(lines[start_idx:end_idx])
106
 
107
  result = json.loads(response_text)
108
+
109
+ # Validate the structure
110
+ if not isinstance(result, dict):
111
+ raise ValueError("Response is not a JSON object")
112
+
113
+ if "entities" not in result:
114
+ result["entities"] = []
115
+ if "relationships" not in result:
116
+ result["relationships"] = []
117
+
118
  return result
119
 
120
  except json.JSONDecodeError as e:
 
123
  "entities": [],
124
  "relationships": [],
125
  "error": f"Failed to parse LLM response as JSON: {str(e)}",
126
+ "raw_response": response_text if 'response_text' in locals() else "No response"
127
  }
128
  except Exception as e:
129
  return {
 
135
  def build_knowledge_graph(input_text):
136
  """Main function to build knowledge graph from text or URL."""
137
 
138
+ try:
139
+ if not input_text or not input_text.strip():
140
+ return {
141
+ "error": "Please provide text or a valid URL",
 
 
 
 
 
 
 
 
 
 
 
 
142
  "knowledge_graph": None
143
+ }
144
+
145
+ # Check if input is a URL
146
+ parsed = urlparse(input_text.strip())
147
+ is_url = parsed.scheme in ('http', 'https') and parsed.netloc
148
+
149
+ if is_url:
150
+ # Extract text from URL
151
+ extracted_text = extract_text_from_url(input_text.strip())
152
+ if extracted_text.startswith("Error fetching URL"):
153
+ return {
154
+ "error": extracted_text,
155
+ "knowledge_graph": None
156
+ }
157
+ source_type = "url"
158
+ source = input_text.strip()
159
+ content = extracted_text
160
+ else:
161
+ # Use provided text directly
162
+ source_type = "text"
163
+ source = "direct_input"
164
+ content = input_text.strip()
165
+
166
+ # Extract entities and relationships using Mistral
167
+ kg_data = extract_entities_and_relationships(content)
168
+
169
+ # Build the final knowledge graph structure
170
+ knowledge_graph = {
171
+ "source": {
172
+ "type": source_type,
173
+ "value": source,
174
+ "content_preview": content[:200] + "..." if len(content) > 200 else content
175
+ },
176
+ "knowledge_graph": {
177
+ "entities": kg_data.get("entities", []),
178
+ "relationships": kg_data.get("relationships", []),
179
+ "entity_count": len(kg_data.get("entities", [])),
180
+ "relationship_count": len(kg_data.get("relationships", []))
181
+ },
182
+ "metadata": {
183
+ "model": "mistralai/Mistral-Small-24B-Instruct-2501",
184
+ "content_length": len(content)
185
+ }
186
  }
187
+
188
+ # Add any errors from the extraction process
189
+ if "error" in kg_data:
190
+ knowledge_graph["extraction_error"] = kg_data["error"]
191
+ if "raw_response" in kg_data:
192
+ knowledge_graph["raw_llm_response"] = kg_data["raw_response"]
193
+
194
+ return knowledge_graph
195
+
196
+ except Exception as e:
197
+ return {
198
+ "error": f"Unexpected error: {str(e)}",
199
+ "knowledge_graph": None
200
+ }
201
+
202
+ # Create wrapper function for proper JSON formatting in UI
203
+ def build_knowledge_graph_ui(input_text):
204
+ """Wrapper function that returns JSON string for UI display."""
205
+ result = build_knowledge_graph(input_text)
206
+ return json.dumps(result, indent=2, ensure_ascii=False)
207
 
208
  # Create Gradio interface
209
  demo = gr.Interface(
210
+ fn=build_knowledge_graph_ui,
211
  inputs=gr.Textbox(
212
  label="Text or URL Input",
213
  placeholder="Enter text to analyze or a web URL (e.g., https://example.com)",
 
235
  theme=gr.themes.Soft()
236
  )
237
 
238
+ # Register MCP tools
239
+ demo.mcp.register_tool(
240
+ "build_knowledge_graph",
241
+ build_knowledge_graph,
242
+ "Build knowledge graph from text or URL",
243
+ {
244
+ "input_text": {
245
+ "type": "string",
246
+ "description": "Text content or URL to analyze and extract knowledge graph from"
247
+ }
248
+ }
249
+ )
250
+
251
  demo.launch(mcp_server=True)