Spaces:
Running
Running
import re | |
import json | |
import gradio as gr | |
# Your model’s raw NER output (we trust these start/end indices) | |
ner = [ | |
{ | |
'start': 11, | |
'end': 29, | |
'text': 'Home Visits Survey', | |
'label': 'named dataset', | |
'score': 0.9947463870048523 | |
} | |
] | |
# Your model’s raw RE output | |
relations = { | |
'Home Visits Survey': [ | |
{'source': 'Home Visits Survey', 'relation': 'data geography', 'target': 'Jordan', 'score': 0.6180844902992249}, | |
{'source': 'Home Visits Survey', 'relation': 'version', 'target': 'Round II', 'score': 0.9688164591789246}, | |
{'source': 'Home Visits Survey', 'relation': 'acronym', 'target': 'HV', 'score': 0.9140607714653015}, | |
{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'UNHCR', 'score': 0.7762154340744019}, | |
{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'World Food Programme', 'score': 0.6582539677619934}, | |
{'source': 'Home Visits Survey', 'relation': 'reference year', 'target': '2013', 'score': 0.524115264415741}, | |
{'source': 'Home Visits Survey', 'relation': 'publication year', 'target': '2014', 'score': 0.6853994131088257}, | |
{'source': 'Home Visits Survey', 'relation': 'data description', 'target': 'detailed socio-economic, health, and protection data', 'score': 0.6544178128242493}, | |
] | |
} | |
# Exact sample text | |
SAMPLE_TEXT = """The Jordan Home Visits Survey, Round II (HV), was carried out by UNHCR and the World Food Programme between November 2013 and September 2014. Through in-home visits to Syrian refugee households in Jordan, it gathered detailed socio-economic, health, and protection data—each household tagged with a unique ID to allow longitudinal tracking.""" | |
def highlight_text(text): | |
entities = [] | |
# 1) NER spans | |
for ent in ner: | |
entities.append({ | |
"entity": ent["label"], | |
"start": ent["start"], | |
"end": ent["end"], | |
}) | |
# 2) RE spans | |
for rel_list in relations.values(): | |
for r in rel_list: | |
for m in re.finditer(re.escape(r["target"]), text): | |
entities.append({ | |
"entity": r["relation"], | |
"start": m.start(), | |
"end": m.end(), | |
}) | |
return {"text": text, "entities": entities} | |
def get_model_predictions(): | |
return json.dumps({"ner": ner, "relations": relations}, indent=2) | |
with gr.Blocks() as demo: | |
gr.Markdown("## Data Use Detector\n" | |
"Edit the sample text, then click **Highlight** to annotate entities, or **Get Model Predictions** to see the raw JSON.") | |
txt_in = gr.Textbox(label="Input Text", lines=4, value=SAMPLE_TEXT) | |
highlight_btn = gr.Button("Highlight") | |
txt_out = gr.HighlightedText(label="Annotated Entities") | |
get_pred_btn = gr.Button("Get Model Predictions") | |
ner_rel_box = gr.Textbox( | |
label="Model Predictions (JSON)", | |
lines=15, | |
value="", | |
interactive=False | |
) | |
# Only trigger highlighting on click | |
highlight_btn.click(fn=highlight_text, inputs=txt_in, outputs=txt_out) | |
# Only show preds on click | |
get_pred_btn.click(fn=get_model_predictions, inputs=None, outputs=ner_rel_box) | |
if __name__ == "__main__": | |
demo.launch() | |