Spaces:

ai4data
/

datause-detector

Running

File size: 3,673 Bytes

fd0fe48
3b9fb2c
 
cd683ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd0fe48
c35975c
cd683ff
9c95361
 
 
 
 
 
 
 
fd0fe48
cd683ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd0fe48
3d53082
fd0fe48
cd683ff
 
9c95361
cd683ff
 
9c95361
 
 
 
 
 
fd0fe48
d64d47b
9c95361
 
 
 
 
 
 
 
fd0fe48
306e33b
fd0fe48
cd683ff

import re
import gradio as gr

# Your actual model outputs:
ner = [
    {
        'start': 12,
        'end': 30,
        'text': 'Home Visits Survey',
        'label': 'named dataset',
        'score': 0.9947463870048523
    }
]

relations = {
    'Home Visits Survey': [
        {'source': 'Home Visits Survey', 'relation': 'data geography',   'target': 'Jordan',                                        'score': 0.6180844902992249},
        {'source': 'Home Visits Survey', 'relation': 'version',            'target': 'Round II',                                     'score': 0.9688164591789246},
        {'source': 'Home Visits Survey', 'relation': 'acronym',            'target': 'HV',                                           'score': 0.9140607714653015},
        {'source': 'Home Visits Survey', 'relation': 'author',             'target': 'UNHCR',                                       'score': 0.7762154340744019},
        {'source': 'Home Visits Survey', 'relation': 'author',             'target': 'World Food Programme',                        'score': 0.6582539677619934},
        {'source': 'Home Visits Survey', 'relation': 'reference year',     'target': '2013',                                        'score': 0.524115264415741},
        {'source': 'Home Visits Survey', 'relation': 'publication year',   'target': '2014',                                        'score': 0.6853994131088257},
        {'source': 'Home Visits Survey', 'relation': 'data description',   'target': 'detailed socio-economic, health, and protection data', 'score': 0.6544178128242493},
    ]
}

# The sample sentence you want to highlight:
SAMPLE_TEXT = (
    "The Jordan Home Visits Survey, Round II (HV), was carried out by UNHCR and the World Food "
    "Programme between November 2013 and September 2014. Through in-home visits to Syrian refugee "
    "households in Jordan, it gathered detailed socio-economic, health, and protection data—each "
    "household tagged with a unique ID to allow longitudinal tracking."
)

def highlight_text(text):
    entities = []
    # 1) NER spans
    for ent in ner:
        entities.append({
            "entity": ent["label"],
            "start":  ent["start"],
            "end":    ent["end"],
        })
    # 2) RE spans: annotate each target with its relation label
    for src, rels in relations.items():
        for r in rels:
            label = r["relation"]
            target = r["target"]
            for m in re.finditer(re.escape(target), text):
                entities.append({
                    "entity": label,
                    "start":  m.start(),
                    "end":    m.end(),
                })
    return {"text": text, "entities": entities}

with gr.Blocks() as demo:
    gr.Markdown("## Data Use Detector\n"
                "Input text and the model will highlight the entities it detects.")
    
    txt_in  = gr.Textbox(label="Input Text", lines=4, value=SAMPLE_TEXT)
    btn     = gr.Button("Highlight Entities")
    txt_out = gr.HighlightedText(label="Annotated Entities")
    
    btn.click(fn=highlight_text, inputs=txt_in, outputs=txt_out)
    txt_in.submit(fn=highlight_text, inputs=txt_in, outputs=txt_out)
    demo.load(fn=highlight_text, inputs=txt_in, outputs=txt_out)
    
    gr.Markdown("""
**Legend**  
- **named dataset** → Home Visits Survey  
- **data geography** → Jordan  
- **version** → Round II  
- **acronym** → HV  
- **author** → UNHCR, World Food Programme  
- **reference year** → 2013  
- **publication year** → 2014  
- **data description** → detailed socio-economic, health, and protection data
""")

if __name__ == "__main__":
    demo.launch()