Spaces:
Running
Running
File size: 3,228 Bytes
3d53082 3b9fb2c 3d53082 3b9fb2c 3d53082 c35975c 3d53082 3b9fb2c 3d53082 c35975c 3d53082 c35975c 306e33b c35975c 3d53082 306e33b 3d53082 306e33b 3d53082 306e33b c35975c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import os
import json
import gradio as gr
import torch
import spaces
from gliner import GLiNER
from gliner.multitask import GLiNERRelationExtractor
from typing import List, Dict, Any, Tuple
from tqdm.auto import tqdm
# Configuration
data_model_id = "rafmacalaba/gliner_re_finetuned-v3"
CACHE_DIR = os.environ.get("CACHE_DIR", None)
# Relation types
trels = [
'acronym', 'author', 'data description',
'data geography', 'data source', 'data type',
'publication year', 'publisher', 'reference year', 'version'
]
# Map NER labels to relation types
TYPE2RELS = {
"named dataset": trels,
"unnamed dataset": trels,
"vague dataset": trels,
}
# Load models
print("Loading NER+RE model...")
model = GLiNER.from_pretrained(data_model_id, cache_dir=CACHE_DIR)
relation_extractor = GLiNERRelationExtractor(model=model)
if torch.cuda.is_available():
model.to("cuda")
relation_extractor.model.to("cuda")
print("Models loaded.")
# Inference pipeline
def inference_pipeline(
text: str,
model,
labels: List[str],
relation_extractor: GLiNERRelationExtractor,
TYPE2RELS: Dict[str, List[str]],
ner_threshold: float = 0.5,
re_threshold: float = 0.4,
re_multi_label: bool = False,
) -> Tuple[List[Dict[str, Any]], Dict[str, List[Dict[str, Any]]]]:
# NER predictions
ner_preds = model.predict_entities(
text,
labels,
flat_ner=True,
threshold=ner_threshold
)
# Relation extraction per entity span
re_results: Dict[str, List[Dict[str, Any]]] = {}
for ner in ner_preds:
span = ner['text']
rel_types = TYPE2RELS.get(ner['label'], [])
if not rel_types:
continue
slot_labels = [f"{span} <> {r}" for r in rel_types]
preds = relation_extractor(
text,
relations=None,
entities=None,
relation_labels=slot_labels,
threshold=re_threshold,
multi_label=re_multi_label,
distance_threshold=100,
)[0]
re_results[span] = preds
return ner_preds, re_results
# Gradio UI - Step 2: Model Inference
@spaces.GPU(enable_queue=True, duration=120)
def model_inference(query: str) -> str:
labels = ["named dataset", "unnamed dataset", "vague dataset"]
ner_preds, re_results = inference_pipeline(
query,
model,
labels,
relation_extractor,
TYPE2RELS
)
output = {
"entities": ner_preds,
"relations": re_results,
}
return json.dumps(output, indent=2)
with gr.Blocks(title="Step 2: NER + Relation Inference") as demo:
gr.Markdown(
"""
## Step 2: Integrate Model Inference
Enter text and click submit to run your GLiNER-based NER + RE pipeline.
"""
)
query_input = gr.Textbox(
lines=4,
placeholder="Type your text here...",
label="Input Text",
)
submit_btn = gr.Button("Submit")
output_box = gr.Textbox(
lines=15,
label="Model Output (JSON)",
)
submit_btn.click(
fn=model_inference,
inputs=[query_input],
outputs=[output_box],
)
if __name__ == "__main__":
demo.launch(debug=True)
|