Spaces:
Running
Running
File size: 3,341 Bytes
fd0fe48 d8c3809 3b9fb2c d8c3809 cd683ff d8c3809 cd683ff fd0fe48 c35975c d8c3809 9c95361 fd0fe48 cd683ff 13e7831 d8c3809 cd683ff d8c3809 cd683ff fd0fe48 3d53082 fd0fe48 cd683ff d8c3809 13e7831 d8c3809 13e7831 d8c3809 9c95361 13e7831 fd0fe48 cd683ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import re
import json
import gradio as gr
# Your model’s raw NER output (we trust these start/end indices)
ner = [
{
'start': 12,
'end': 30,
'text': 'Home Visits Survey',
'label': 'named dataset',
'score': 0.9947463870048523
}
]
# Your model’s raw RE output
relations = {
'Home Visits Survey': [
{'source': 'Home Visits Survey', 'relation': 'data geography', 'target': 'Jordan', 'score': 0.6180844902992249},
{'source': 'Home Visits Survey', 'relation': 'version', 'target': 'Round II', 'score': 0.9688164591789246},
{'source': 'Home Visits Survey', 'relation': 'acronym', 'target': 'HV', 'score': 0.9140607714653015},
{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'UNHCR', 'score': 0.7762154340744019},
{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'World Food Programme', 'score': 0.6582539677619934},
{'source': 'Home Visits Survey', 'relation': 'reference year', 'target': '2013', 'score': 0.524115264415741},
{'source': 'Home Visits Survey', 'relation': 'publication year', 'target': '2014', 'score': 0.6853994131088257},
{'source': 'Home Visits Survey', 'relation': 'data description', 'target': 'detailed socio-economic, health, and protection data', 'score': 0.6544178128242493},
]
}
# Exact sample text
SAMPLE_TEXT = """The Jordan Home Visits Survey, Round II (HV), was carried out by UNHCR and the World Food Programme between November 2013 and September 2014. Through in-home visits to Syrian refugee households in Jordan, it gathered detailed socio-economic, health, and protection data—each household tagged with a unique ID to allow longitudinal tracking."""
def highlight_text(text):
entities = []
# 1) NER spans
for ent in ner:
entities.append({
"entity": ent["label"],
"start": ent["start"],
"end": ent["end"],
})
# 2) RE spans
for rel_list in relations.values():
for r in rel_list:
for m in re.finditer(re.escape(r["target"]), text):
entities.append({
"entity": r["relation"],
"start": m.start(),
"end": m.end(),
})
return {"text": text, "entities": entities}
with gr.Blocks() as demo:
gr.Markdown("## Data Use Detector\n"
"Paste or edit the sample text, then click **Highlight** to run inference.")
txt_in = gr.Textbox(label="Input Text", lines=4, value=SAMPLE_TEXT)
btn = gr.Button("Highlight")
txt_out = gr.HighlightedText(label="Annotated Entities")
ner_rel_box = gr.Textbox(
label="Model Predictions (JSON)",
value=json.dumps({"ner": ner, "relations": relations}, indent=2),
lines=15,
interactive=True
)
# Only trigger on button click
btn.click(fn=highlight_text, inputs=txt_in, outputs=txt_out)
if __name__ == "__main__":
demo.launch()
|