Spaces:
Running
Running
File size: 3,619 Bytes
fd0fe48 d8c3809 3b9fb2c d8c3809 cd683ff 215cbc3 c38ba9f cd683ff d8c3809 cd683ff fd0fe48 c35975c d8c3809 9c95361 fd0fe48 cd683ff 13e7831 d8c3809 cd683ff d8c3809 cd683ff fd0fe48 3d53082 215cbc3 fd0fe48 c38ba9f 215cbc3 13e7831 d8c3809 215cbc3 d8c3809 215cbc3 d8c3809 215cbc3 d8c3809 13e7831 215cbc3 13e7831 fd0fe48 cd683ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import re
import json
import gradio as gr
# Your model’s raw NER output (we trust these start/end indices)
ner = [
{
'start': 11,
'end': 29,
'text': 'Home Visits Survey',
'label': 'named dataset',
'score': 0.9947463870048523
}
]
# Your model’s raw RE output
relations = {
'Home Visits Survey': [
{'source': 'Home Visits Survey', 'relation': 'data geography', 'target': 'Jordan', 'score': 0.6180844902992249},
{'source': 'Home Visits Survey', 'relation': 'version', 'target': 'Round II', 'score': 0.9688164591789246},
{'source': 'Home Visits Survey', 'relation': 'acronym', 'target': 'HV', 'score': 0.9140607714653015},
{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'UNHCR', 'score': 0.7762154340744019},
{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'World Food Programme', 'score': 0.6582539677619934},
{'source': 'Home Visits Survey', 'relation': 'reference year', 'target': '2013', 'score': 0.524115264415741},
{'source': 'Home Visits Survey', 'relation': 'publication year', 'target': '2014', 'score': 0.6853994131088257},
{'source': 'Home Visits Survey', 'relation': 'data description', 'target': 'detailed socio-economic, health, and protection data', 'score': 0.6544178128242493},
]
}
# Exact sample text
SAMPLE_TEXT = """The Jordan Home Visits Survey, Round II (HV), was carried out by UNHCR and the World Food Programme between November 2013 and September 2014. Through in-home visits to Syrian refugee households in Jordan, it gathered detailed socio-economic, health, and protection data—each household tagged with a unique ID to allow longitudinal tracking."""
def highlight_text(text):
entities = []
# 1) NER spans
for ent in ner:
entities.append({
"entity": ent["label"],
"start": ent["start"],
"end": ent["end"],
})
# 2) RE spans
for rel_list in relations.values():
for r in rel_list:
for m in re.finditer(re.escape(r["target"]), text):
entities.append({
"entity": r["relation"],
"start": m.start(),
"end": m.end(),
})
return {"text": text, "entities": entities}
def get_model_predictions():
return json.dumps({"ner": ner, "relations": relations}, indent=2)
with gr.Blocks() as demo:
gr.Markdown("## Data Use Detector\n"
"Edit the sample text, then click **Highlight** to annotate entities, or **Get Model Predictions** to see the raw JSON.")
txt_in = gr.Textbox(label="Input Text", lines=4, value=SAMPLE_TEXT)
highlight_btn = gr.Button("Highlight")
txt_out = gr.HighlightedText(label="Annotated Entities")
get_pred_btn = gr.Button("Get Model Predictions")
ner_rel_box = gr.Textbox(
label="Model Predictions (JSON)",
lines=15,
value="",
interactive=False
)
# Only trigger highlighting on click
highlight_btn.click(fn=highlight_text, inputs=txt_in, outputs=txt_out)
# Only show preds on click
get_pred_btn.click(fn=get_model_predictions, inputs=None, outputs=ner_rel_box)
if __name__ == "__main__":
demo.launch()
|