jimfhahn commited on
Commit
7c7b0c4
Β·
verified Β·
1 Parent(s): 62f2727

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +379 -7
app.py CHANGED
@@ -13,6 +13,7 @@ import sys
13
  import asyncio
14
  import logging
15
  import requests
 
16
  from typing import Any, Dict, List, Optional
17
  import threading
18
  import time
@@ -168,6 +169,58 @@ def validate_rdf_tool(rdf_content: str, template: str = "monograph") -> dict:
168
  "conforms": False
169
  }
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnings: bool = False) -> str:
172
  """
173
  Generate AI-powered fix suggestions for invalid RDF/XML.
@@ -208,7 +261,309 @@ def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnin
208
 
209
  severity_instruction = "Focus only on violations (errors) and ignore any warnings." if not include_warnings else "Address both violations and warnings."
210
 
211
- prompt = f"""You are an expert in RDF/XML and SHACL validation. Analyze the following validation results and provide clear, actionable suggestions for fixing the RDF issues.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
  {severity_instruction}
214
 
@@ -218,13 +573,30 @@ Validation Results:
218
  Original RDF (first 1000 chars):
219
  {rdf_content[:1000]}...
220
 
221
- Please provide:
222
- 1. A clear summary of what's wrong
223
- 2. Specific step-by-step instructions to fix each issue
224
- 3. Example corrections where applicable
225
- 4. Best practices to prevent similar issues
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
- Format your response in a helpful, structured way using markdown."""
228
 
229
  # Make API call using OpenAI client
230
  print(f"πŸ”„ Making API call to: {HF_ENDPOINT_URL}")
 
13
  import asyncio
14
  import logging
15
  import requests
16
+ import re
17
  from typing import Any, Dict, List, Optional
18
  import threading
19
  import time
 
169
  "conforms": False
170
  }
171
 
172
+ def filter_validation_results_by_class(validation_results: str, rdf_content: str) -> dict:
173
+ """
174
+ Filter validation results by RDF class (Work, Instance, etc.)
175
+
176
+ Args:
177
+ validation_results (str): Full validation results
178
+ rdf_content (str): Original RDF content
179
+
180
+ Returns:
181
+ dict: Validation results organized by class
182
+ """
183
+ import re
184
+
185
+ # Parse validation results to extract class information
186
+ class_results = {
187
+ 'Work': [],
188
+ 'Instance': [],
189
+ 'Title': [],
190
+ 'Contribution': [],
191
+ 'Other': []
192
+ }
193
+
194
+ lines = validation_results.split('\n')
195
+ current_section = []
196
+ current_class = 'Other'
197
+
198
+ for line in lines:
199
+ # Detect which class this error relates to
200
+ if 'bf:Work' in line or '/work/' in line:
201
+ current_class = 'Work'
202
+ elif 'bf:Instance' in line or '/instance/' in line:
203
+ current_class = 'Instance'
204
+ elif 'bf:Title' in line:
205
+ current_class = 'Title'
206
+ elif 'bf:Contribution' in line:
207
+ current_class = 'Contribution'
208
+
209
+ # Collect lines for current violation
210
+ if 'Constraint Violation' in line:
211
+ if current_section:
212
+ class_results[current_class].extend(current_section)
213
+ current_section = [line]
214
+ elif line.strip():
215
+ current_section.append(line)
216
+
217
+ # Add last section
218
+ if current_section:
219
+ class_results[current_class].extend(current_section)
220
+
221
+ # Remove empty classes
222
+ return {k: '\n'.join(v) for k, v in class_results.items() if v}
223
+
224
  def get_ai_suggestions(validation_results: str, rdf_content: str, include_warnings: bool = False) -> str:
225
  """
226
  Generate AI-powered fix suggestions for invalid RDF/XML.
 
261
 
262
  severity_instruction = "Focus only on violations (errors) and ignore any warnings." if not include_warnings else "Address both violations and warnings."
263
 
264
+ # Filter validation results by class to reduce token usage
265
+ class_results = filter_validation_results_by_class(validation_results, rdf_content)
266
+
267
+ # Determine primary class with most errors
268
+ primary_class = max(class_results.keys(), key=lambda k: len(class_results[k]))
269
+ focused_results = class_results[primary_class]
270
+
271
+ # Extract only relevant RDF section for the primary class
272
+ relevant_rdf = extract_relevant_rdf_section(rdf_content, primary_class)
273
+
274
+ prompt = f"""You are an expert in RDF/XML and SHACL validation. Analyze the validation errors for the {primary_class} class and provide CONCISE, ACTIONABLE fixes.
275
+
276
+ {severity_instruction}
277
+
278
+ Validation Errors for {primary_class}:
279
+ {focused_results[:1500]}
280
+
281
+ Relevant RDF Section:
282
+ {relevant_rdf[:800]}
283
+
284
+ Instructions:
285
+ 1. ONE sentence: What's wrong with this {primary_class}?
286
+ 2. List errors (max 3 words each)
287
+ 3. Show exact XML fixes
288
+
289
+ Format:
290
+ **Issue:** [One sentence about the {primary_class} problem]
291
+
292
+ **Errors:**
293
+ β€’ Error 1
294
+ β€’ Error 2
295
+
296
+ **Fix:**
297
+ ```xml
298
+ [Complete corrected {primary_class} section]
299
+ ```
300
+
301
+ Be ultra-concise. Show the fix, not explanations."""
302
+
303
+ # Make API call using OpenAI client
304
+ print(f"πŸ”„ Making focused API call for {primary_class} class")
305
+ print(f"πŸ”„ Sending {len(focused_results)} chars instead of {len(validation_results)} chars")
306
+
307
+ chat_completion = client.chat.completions.create(
308
+ model=HF_MODEL,
309
+ messages=[
310
+ {
311
+ "role": "user",
312
+ "content": prompt
313
+ }
314
+ ],
315
+ max_tokens=800, # Reduced since we're focused on one class
316
+ temperature=0.5, # Lower temperature for more focused responses
317
+ top_p=0.9
318
+ )
319
+
320
+ print("βœ… API call successful")
321
+ generated_text = chat_completion.choices[0].message.content
322
+
323
+ # Add note about other classes if present
324
+ other_classes = [k for k in class_results.keys() if k != primary_class]
325
+ class_note = f"\n\nπŸ“Œ **Note:** Focused on {primary_class} errors. " + \
326
+ (f"Also found issues in: {', '.join(other_classes)}" if other_classes else "")
327
+
328
+ return f"πŸ€– **AI-Powered Suggestions ({('Violations + Warnings' if include_warnings else 'Violations Only')}):**\n\n{generated_text}{class_note}"
329
+
330
+ except Exception as e:
331
+ logger.error(f"OpenAI/HF Inference Endpoint error: {str(e)}")
332
+ return f"""
333
+ ❌ **AI suggestions error**: {str(e)}
334
+
335
+ {generate_manual_suggestions(validation_results)}
336
+ """
337
+
338
+ def extract_relevant_rdf_section(rdf_content: str, class_name: str) -> str:
339
+ """
340
+ Extract only the relevant RDF section for a specific class
341
+
342
+ Args:
343
+ rdf_content (str): Full RDF content
344
+ class_name (str): Class name to extract (Work, Instance, etc.)
345
+
346
+ Returns:
347
+ str: Relevant RDF section
348
+ """
349
+ import re
350
+
351
+ # Map class names to RDF patterns
352
+ patterns = {
353
+ 'Work': r'<bf:Work.*?</bf:Work>',
354
+ 'Instance': r'<bf:Instance.*?</bf:Instance>',
355
+ 'Title': r'<bf:Title.*?</bf:Title>',
356
+ 'Contribution': r'<bf:Contribution.*?</bf:Contribution>'
357
+ }
358
+
359
+ pattern = patterns.get(class_name)
360
+ if not pattern:
361
+ return rdf_content[:1000] # Fallback to first 1000 chars
362
+
363
+ # Extract matching section
364
+ match = re.search(pattern, rdf_content, re.DOTALL)
365
+ if match:
366
+ section = match.group(0)
367
+ # Also include namespace declarations
368
+ namespaces = re.findall(r'xmlns:\w+="[^"]*"', rdf_content[:500])
369
+ if namespaces:
370
+ return f"<!-- Namespaces: {' '.join(namespaces[:3])} -->\n{section}"
371
+ return section
372
+
373
+ return rdf_content[:1000] # Fallback
374
+
375
+ def get_ai_correction(validation_results: str, rdf_content: str, template: str = 'monograph', max_attempts: int = None, include_warnings: bool = False) -> str:
376
+ """
377
+ Generate AI-powered corrected RDF/XML based on validation errors.
378
+
379
+ This tool takes invalid RDF/XML and validation results, then generates
380
+ a corrected version that addresses all identified validation issues.
381
+ The generated correction is validated before being returned to the user.
382
+
383
+ Args:
384
+ validation_results (str): The validation error messages
385
+ rdf_content (str): The original invalid RDF/XML content
386
+ template (str): The validation template to use
387
+ max_attempts (int): Maximum number of attempts to generate valid RDF (uses MAX_CORRECTION_ATTEMPTS if None)
388
+ include_warnings (bool): Whether to fix warnings in addition to violations
389
+
390
+ Returns:
391
+ str: Corrected RDF/XML that should pass validation
392
+ """
393
+
394
+ # Use configuration default if not specified
395
+ if max_attempts is None:
396
+ max_attempts = MAX_CORRECTION_ATTEMPTS
397
+
398
+ # Check if validation loop is enabled
399
+ if not ENABLE_VALIDATION_LOOP:
400
+ max_attempts = 1 # Fall back to single attempt if validation loop disabled
401
+
402
+ if not OPENAI_AVAILABLE:
403
+ return generate_manual_correction_hints(validation_results, rdf_content)
404
+
405
+ # Get API key dynamically at runtime
406
+ current_api_key = os.getenv('HF_API_KEY', '')
407
+ if not current_api_key:
408
+ return f"""<!-- AI correction disabled: Set HF_API_KEY as a Secret in your Space settings -->
409
+
410
+ {generate_manual_correction_hints(validation_results, rdf_content)}"""
411
+
412
+ try:
413
+ client = get_openai_client()
414
+ if not client:
415
+ return f"""<!-- AI correction disabled: HF_API_KEY not configured -->
416
+
417
+ {generate_manual_correction_hints(validation_results, rdf_content)}"""
418
+
419
+ # Add timeout protection
420
+ import time
421
+ start_time = time.time()
422
+ timeout = 60 # 60 second timeout
423
+
424
+ severity_instruction = "Fix only the violations (errors) and ignore any warnings." if not include_warnings else "Fix both violations and warnings."
425
+
426
+ # Filter validation results by class
427
+ class_results = filter_validation_results_by_class(validation_results, rdf_content)
428
+
429
+ # Process each class separately to avoid overwhelming the LLM
430
+ corrected_sections = {}
431
+
432
+ for class_name, class_errors in class_results.items():
433
+ if not class_errors:
434
+ continue
435
+
436
+ # Check timeout
437
+ if time.time() - start_time > timeout - 10:
438
+ print(f"⏰ Approaching timeout, skipping {class_name}")
439
+ break
440
+
441
+ print(f"πŸ”„ Correcting {class_name} section")
442
+
443
+ # Extract relevant section
444
+ relevant_section = extract_relevant_rdf_section(rdf_content, class_name)
445
+
446
+ prompt = f"""Fix this {class_name} RDF section based on these specific errors.
447
+
448
+ {severity_instruction}
449
+
450
+ Errors for {class_name}:
451
+ {class_errors[:800]}
452
+
453
+ Current {class_name} RDF:
454
+ {relevant_section[:800]}
455
+
456
+ Return ONLY the corrected {class_name} XML section. No explanations."""
457
+
458
+ try:
459
+ chat_completion = client.chat.completions.create(
460
+ model=HF_MODEL,
461
+ messages=[
462
+ {
463
+ "role": "user",
464
+ "content": prompt
465
+ }
466
+ ],
467
+ max_tokens=1000,
468
+ temperature=0.3,
469
+ timeout=20 # Shorter timeout per section
470
+ )
471
+
472
+ corrected_section = chat_completion.choices[0].message.content.strip()
473
+ corrected_sections[class_name] = extract_rdf_from_response(corrected_section)
474
+
475
+ except Exception as e:
476
+ print(f"❌ Error correcting {class_name}: {str(e)}")
477
+ continue
478
+
479
+ # Merge corrections back into original RDF
480
+ if corrected_sections:
481
+ corrected_rdf = merge_corrected_sections(rdf_content, corrected_sections)
482
+ return f"""<!-- AI-generated correction (class-based processing) -->
483
+ {corrected_rdf}"""
484
+ else:
485
+ return f"""<!-- AI correction failed - timeout or errors -->
486
+ {generate_manual_correction_hints(validation_results, rdf_content)}"""
487
+
488
+ except Exception as e:
489
+ logger.error(f"LLM API error: {str(e)}")
490
+ return f"""<!-- Error generating AI correction: {str(e)} -->
491
+
492
+ {generate_manual_correction_hints(validation_results, rdf_content)}"""
493
+
494
+ def merge_corrected_sections(original_rdf: str, corrected_sections: dict) -> str:
495
+ """
496
+ Merge corrected class sections back into the original RDF
497
+
498
+ Args:
499
+ original_rdf (str): Original RDF content
500
+ corrected_sections (dict): Corrected sections by class
501
+
502
+ Returns:
503
+ str: Merged RDF with corrections
504
+ """
505
+ import re
506
+
507
+ result = original_rdf
508
+
509
+ # Replace each corrected section
510
+ for class_name, corrected_section in corrected_sections.items():
511
+ patterns = {
512
+ 'Work': r'<bf:Work.*?</bf:Work>',
513
+ 'Instance': r'<bf:Instance.*?</bf:Instance>',
514
+ 'Title': r'<bf:Title.*?</bf:Title>',
515
+ 'Contribution': r'<bf:Contribution.*?</bf:Contribution>'
516
+ }
517
+
518
+ pattern = patterns.get(class_name)
519
+ if pattern:
520
+ result = re.sub(pattern, corrected_section, result, count=1, flags=re.DOTALL)
521
+
522
+ return result
523
+
524
+ # Sample RDF data for examples
525
+ # MCP Server Tools (can be used independently)
526
+ # Note: This section exists earlier in the file, we're removing the duplicates
527
+ """
528
+ Generate AI-powered fix suggestions for invalid RDF/XML.
529
+
530
+ This tool analyzes validation results and provides actionable suggestions
531
+ for fixing RDF/XML validation errors using AI or rule-based analysis.
532
+
533
+ Args:
534
+ validation_results (str): The validation error messages
535
+ rdf_content (str): The original RDF/XML content that failed validation
536
+ include_warnings (bool): Whether to include warnings in suggestions
537
+
538
+ Returns:
539
+ str: Detailed suggestions for fixing the RDF validation issues
540
+ """
541
+
542
+ if not OPENAI_AVAILABLE:
543
+ return generate_manual_suggestions(validation_results)
544
+
545
+ # Get API key dynamically at runtime
546
+ current_api_key = os.getenv('HF_API_KEY', '')
547
+ if not current_api_key:
548
+ return f"""
549
+ πŸ”‘ **AI suggestions disabled**: Please set your Hugging Face API key as a Secret in your Space settings.
550
+
551
+ {generate_manual_suggestions(validation_results)}
552
+ """
553
+
554
+ try:
555
+ # Use OpenAI client with your Hugging Face Inference Endpoint
556
+ client = get_openai_client()
557
+ if not client:
558
+ return f"""
559
+ πŸ”‘ **AI suggestions disabled**: HF_API_KEY not configured.
560
+
561
+ {generate_manual_suggestions(validation_results)}
562
+ """
563
+
564
+ severity_instruction = "Focus only on violations (errors) and ignore any warnings." if not include_warnings else "Address both violations and warnings."
565
+
566
+ prompt = f"""You are an expert in RDF/XML and SHACL validation. Analyze the validation errors and provide CONCISE, ACTIONABLE fix suggestions.
567
 
568
  {severity_instruction}
569
 
 
573
  Original RDF (first 1000 chars):
574
  {rdf_content[:1000]}...
575
 
576
+ Instructions:
577
+ 1. Start with a ONE-SENTENCE summary of the main issue
578
+ 2. List the specific errors in bullet points (max 5 words per error)
579
+ 3. Provide the exact fix for each error with code snippets
580
+ 4. Keep explanations minimal - focus on solutions
581
+
582
+ Format:
583
+ **Main Issue:** [One sentence]
584
+
585
+ **Errors Found:**
586
+ β€’ Error 1 name
587
+ β€’ Error 2 name
588
+
589
+ **Fixes:**
590
+ 1. **Error 1**:
591
+ ```xml
592
+ [exact code to add/fix]
593
+ ```
594
+ 2. **Error 2**:
595
+ ```xml
596
+ [exact code to add/fix]
597
+ ```
598
 
599
+ Be direct and solution-focused. No lengthy explanations."""
600
 
601
  # Make API call using OpenAI client
602
  print(f"πŸ”„ Making API call to: {HF_ENDPOINT_URL}")