File size: 3,673 Bytes
fd0fe48
3b9fb2c
 
cd683ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd0fe48
c35975c
cd683ff
9c95361
 
 
 
 
 
 
 
fd0fe48
cd683ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd0fe48
3d53082
fd0fe48
cd683ff
 
9c95361
cd683ff
 
9c95361
 
 
 
 
 
fd0fe48
d64d47b
9c95361
 
 
 
 
 
 
 
fd0fe48
306e33b
fd0fe48
cd683ff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import re
import gradio as gr

# Your actual model outputs:
ner = [
    {
        'start': 12,
        'end': 30,
        'text': 'Home Visits Survey',
        'label': 'named dataset',
        'score': 0.9947463870048523
    }
]

relations = {
    'Home Visits Survey': [
        {'source': 'Home Visits Survey', 'relation': 'data geography',   'target': 'Jordan',                                        'score': 0.6180844902992249},
        {'source': 'Home Visits Survey', 'relation': 'version',            'target': 'Round II',                                     'score': 0.9688164591789246},
        {'source': 'Home Visits Survey', 'relation': 'acronym',            'target': 'HV',                                           'score': 0.9140607714653015},
        {'source': 'Home Visits Survey', 'relation': 'author',             'target': 'UNHCR',                                       'score': 0.7762154340744019},
        {'source': 'Home Visits Survey', 'relation': 'author',             'target': 'World Food Programme',                        'score': 0.6582539677619934},
        {'source': 'Home Visits Survey', 'relation': 'reference year',     'target': '2013',                                        'score': 0.524115264415741},
        {'source': 'Home Visits Survey', 'relation': 'publication year',   'target': '2014',                                        'score': 0.6853994131088257},
        {'source': 'Home Visits Survey', 'relation': 'data description',   'target': 'detailed socio-economic, health, and protection data', 'score': 0.6544178128242493},
    ]
}

# The sample sentence you want to highlight:
SAMPLE_TEXT = (
    "The Jordan Home Visits Survey, Round II (HV), was carried out by UNHCR and the World Food "
    "Programme between November 2013 and September 2014. Through in-home visits to Syrian refugee "
    "households in Jordan, it gathered detailed socio-economic, health, and protection data—each "
    "household tagged with a unique ID to allow longitudinal tracking."
)

def highlight_text(text):
    entities = []
    # 1) NER spans
    for ent in ner:
        entities.append({
            "entity": ent["label"],
            "start":  ent["start"],
            "end":    ent["end"],
        })
    # 2) RE spans: annotate each target with its relation label
    for src, rels in relations.items():
        for r in rels:
            label = r["relation"]
            target = r["target"]
            for m in re.finditer(re.escape(target), text):
                entities.append({
                    "entity": label,
                    "start":  m.start(),
                    "end":    m.end(),
                })
    return {"text": text, "entities": entities}

with gr.Blocks() as demo:
    gr.Markdown("## Data Use Detector\n"
                "Input text and the model will highlight the entities it detects.")
    
    txt_in  = gr.Textbox(label="Input Text", lines=4, value=SAMPLE_TEXT)
    btn     = gr.Button("Highlight Entities")
    txt_out = gr.HighlightedText(label="Annotated Entities")
    
    btn.click(fn=highlight_text, inputs=txt_in, outputs=txt_out)
    txt_in.submit(fn=highlight_text, inputs=txt_in, outputs=txt_out)
    demo.load(fn=highlight_text, inputs=txt_in, outputs=txt_out)
    
    gr.Markdown("""
**Legend**  
- **named dataset** → Home Visits Survey  
- **data geography** → Jordan  
- **version** → Round II  
- **acronym** → HV  
- **author** → UNHCR, World Food Programme  
- **reference year** → 2013  
- **publication year** → 2014  
- **data description** → detailed socio-economic, health, and protection data
""")

if __name__ == "__main__":
    demo.launch()