Spaces:

ai4data
/

datause-detector

Running

File size: 3,341 Bytes

fd0fe48
d8c3809
3b9fb2c
 
d8c3809
cd683ff
 
 
 
 
 
 
 
 
 
d8c3809
cd683ff
 
 
 
 
 
 
 
 
 
 
fd0fe48
c35975c
d8c3809
 
9c95361
 
fd0fe48
cd683ff
 
 
 
 
 
 
13e7831
d8c3809
 
 
cd683ff
d8c3809
cd683ff
 
 
fd0fe48
3d53082
fd0fe48
cd683ff
d8c3809
13e7831
d8c3809
 
 
 
 
 
 
 
 
13e7831
d8c3809
9c95361
13e7831
fd0fe48
cd683ff

import re
import json
import gradio as gr

# Your model’s raw NER output (we trust these start/end indices)
ner = [
    {
        'start': 12,
        'end': 30,
        'text': 'Home Visits Survey',
        'label': 'named dataset',
        'score': 0.9947463870048523
    }
]

# Your model’s raw RE output
relations = {
    'Home Visits Survey': [
        {'source': 'Home Visits Survey', 'relation': 'data geography',   'target': 'Jordan',                                        'score': 0.6180844902992249},
        {'source': 'Home Visits Survey', 'relation': 'version',            'target': 'Round II',                                     'score': 0.9688164591789246},
        {'source': 'Home Visits Survey', 'relation': 'acronym',            'target': 'HV',                                           'score': 0.9140607714653015},
        {'source': 'Home Visits Survey', 'relation': 'author',             'target': 'UNHCR',                                       'score': 0.7762154340744019},
        {'source': 'Home Visits Survey', 'relation': 'author',             'target': 'World Food Programme',                        'score': 0.6582539677619934},
        {'source': 'Home Visits Survey', 'relation': 'reference year',     'target': '2013',                                        'score': 0.524115264415741},
        {'source': 'Home Visits Survey', 'relation': 'publication year',   'target': '2014',                                        'score': 0.6853994131088257},
        {'source': 'Home Visits Survey', 'relation': 'data description',   'target': 'detailed socio-economic, health, and protection data', 'score': 0.6544178128242493},
    ]
}

# Exact sample text
SAMPLE_TEXT = """The Jordan Home Visits Survey, Round II (HV), was carried out by UNHCR and the World Food Programme between November 2013 and September 2014. Through in-home visits to Syrian refugee households in Jordan, it gathered detailed socio-economic, health, and protection data—each household tagged with a unique ID to allow longitudinal tracking."""

def highlight_text(text):
    entities = []
    # 1) NER spans
    for ent in ner:
        entities.append({
            "entity": ent["label"],
            "start":  ent["start"],
            "end":    ent["end"],
        })
    # 2) RE spans
    for rel_list in relations.values():
        for r in rel_list:
            for m in re.finditer(re.escape(r["target"]), text):
                entities.append({
                    "entity": r["relation"],
                    "start":  m.start(),
                    "end":    m.end(),
                })
    return {"text": text, "entities": entities}

with gr.Blocks() as demo:
    gr.Markdown("## Data Use Detector\n"
                "Paste or edit the sample text, then click **Highlight** to run inference.")

    txt_in      = gr.Textbox(label="Input Text", lines=4, value=SAMPLE_TEXT)
    btn         = gr.Button("Highlight")
    txt_out     = gr.HighlightedText(label="Annotated Entities")
    ner_rel_box = gr.Textbox(
        label="Model Predictions (JSON)",
        value=json.dumps({"ner": ner, "relations": relations}, indent=2),
        lines=15,
        interactive=True
    )

    # Only trigger on button click
    btn.click(fn=highlight_text, inputs=txt_in, outputs=txt_out)

if __name__ == "__main__":
    demo.launch()