Spaces:

ai4data
/

datause-detector

Running

App Files Files Community

rafmacalaba commited on 18 days ago

Commit

2463f9e

1 Parent(s): ab71a6e

add labels and rels

Browse files

Files changed (1) hide show

app.py +41 -1

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import re
 import json
 from collections import defaultdict
 import gradio as gr
 # Load environment variable for cache dir (useful on Spaces)
 _CACHE_DIR = os.environ.get("CACHE_DIR", None)
@@ -41,6 +41,46 @@ TYPE2RELS = {
     "vague dataset":   rels,
 }
 def prune_acronym_and_self_relations(ner_preds, rel_preds):
     # 1) Find acronym targets strictly shorter than their source
     acronym_targets = {

 import json
 from collections import defaultdict
 import gradio as gr
+from typing import List, Dict, Any, Tuple
 # Load environment variable for cache dir (useful on Spaces)
 _CACHE_DIR = os.environ.get("CACHE_DIR", None)
     "vague dataset":   rels,
 }
+def inference_pipeline(
+    text: str,
+    model,
+    labels: List[str],
+    relation_extractor: GLiNERRelationExtractor,
+    TYPE2RELS: Dict[str, List[str]],
+    ner_threshold: float = 0.5,
+    re_threshold: float = 0.4,
+    re_multi_label: bool = False,
+) -> Tuple[List[Dict[str, Any]], Dict[str, List[Dict[str, Any]]]]:
+    ner_preds = model.predict_entities(
+        text,
+        labels,
+        flat_ner=True,
+        threshold=ner_threshold
+    )
+    re_results: Dict[str, List[Dict[str, Any]]] = {}
+    for ner in ner_preds:
+        span       = ner['text']
+        rel_types  = TYPE2RELS.get(ner['label'], [])
+        if not rel_types:
+            continue
+        slot_labels = [f"{span} <> {r}" for r in rel_types]
+        preds = relation_extractor(
+            text,
+            relations=None,
+            entities=None,
+            relation_labels=slot_labels,
+            threshold=re_threshold,
+            multi_label=re_multi_label,
+            distance_threshold=100,
+        )[0]
+        re_results[span] = preds
+    return ner_preds, re_results
 def prune_acronym_and_self_relations(ner_preds, rel_preds):
     # 1) Find acronym targets strictly shorter than their source
     acronym_targets = {