Spaces:

ai4data
/

datause-detector

Running

File size: 3,619 Bytes

fd0fe48
d8c3809
3b9fb2c
 
d8c3809
cd683ff
 
215cbc3
c38ba9f
cd683ff
 
 
 
 
 
d8c3809
cd683ff
 
 
 
 
 
 
 
 
 
 
fd0fe48
c35975c
d8c3809
 
9c95361
 
fd0fe48
cd683ff
 
 
 
 
 
 
13e7831
d8c3809
 
 
cd683ff
d8c3809
cd683ff
 
 
fd0fe48
3d53082
215cbc3
 
 
fd0fe48
c38ba9f
215cbc3
13e7831
d8c3809
215cbc3
d8c3809
215cbc3
 
 
d8c3809
 
215cbc3
 
d8c3809
13e7831
215cbc3
 
 
 
13e7831
fd0fe48
cd683ff

import re
import json
import gradio as gr

# Your model’s raw NER output (we trust these start/end indices)
ner = [
    {
        'start': 11,
        'end': 29,
        'text': 'Home Visits Survey',
        'label': 'named dataset',
        'score': 0.9947463870048523
    }
]

# Your model’s raw RE output
relations = {
    'Home Visits Survey': [
        {'source': 'Home Visits Survey', 'relation': 'data geography',   'target': 'Jordan',                                        'score': 0.6180844902992249},
        {'source': 'Home Visits Survey', 'relation': 'version',            'target': 'Round II',                                     'score': 0.9688164591789246},
        {'source': 'Home Visits Survey', 'relation': 'acronym',            'target': 'HV',                                           'score': 0.9140607714653015},
        {'source': 'Home Visits Survey', 'relation': 'author',             'target': 'UNHCR',                                       'score': 0.7762154340744019},
        {'source': 'Home Visits Survey', 'relation': 'author',             'target': 'World Food Programme',                        'score': 0.6582539677619934},
        {'source': 'Home Visits Survey', 'relation': 'reference year',     'target': '2013',                                        'score': 0.524115264415741},
        {'source': 'Home Visits Survey', 'relation': 'publication year',   'target': '2014',                                        'score': 0.6853994131088257},
        {'source': 'Home Visits Survey', 'relation': 'data description',   'target': 'detailed socio-economic, health, and protection data', 'score': 0.6544178128242493},
    ]
}

# Exact sample text
SAMPLE_TEXT = """The Jordan Home Visits Survey, Round II (HV), was carried out by UNHCR and the World Food Programme between November 2013 and September 2014. Through in-home visits to Syrian refugee households in Jordan, it gathered detailed socio-economic, health, and protection data—each household tagged with a unique ID to allow longitudinal tracking."""

def highlight_text(text):
    entities = []
    # 1) NER spans
    for ent in ner:
        entities.append({
            "entity": ent["label"],
            "start":  ent["start"],
            "end":    ent["end"],
        })
    # 2) RE spans
    for rel_list in relations.values():
        for r in rel_list:
            for m in re.finditer(re.escape(r["target"]), text):
                entities.append({
                    "entity": r["relation"],
                    "start":  m.start(),
                    "end":    m.end(),
                })
    return {"text": text, "entities": entities}

def get_model_predictions():
    return json.dumps({"ner": ner, "relations": relations}, indent=2)

with gr.Blocks() as demo:
    gr.Markdown("## Data Use Detector\n"
                "Edit the sample text, then click **Highlight** to annotate entities, or **Get Model Predictions** to see the raw JSON.")

    txt_in      = gr.Textbox(label="Input Text", lines=4, value=SAMPLE_TEXT)
    highlight_btn = gr.Button("Highlight")
    txt_out     = gr.HighlightedText(label="Annotated Entities")

    get_pred_btn = gr.Button("Get Model Predictions")
    ner_rel_box  = gr.Textbox(
        label="Model Predictions (JSON)",
        lines=15,
        value="",
        interactive=False
    )

    # Only trigger highlighting on click
    highlight_btn.click(fn=highlight_text, inputs=txt_in, outputs=txt_out)
    # Only show preds on click
    get_pred_btn.click(fn=get_model_predictions, inputs=None, outputs=ner_rel_box)

if __name__ == "__main__":
    demo.launch()