import re
import json
import gradio as gr

# Your model’s raw NER output (we trust these start/end indices)
ner = [
    {
        'start': 11,
        'end': 29,
        'text': 'Home Visits Survey',
        'label': 'named dataset',
        'score': 0.9947463870048523
    }
]

# Your model’s raw RE output
relations = {
    'Home Visits Survey': [
        {'source': 'Home Visits Survey', 'relation': 'data geography',   'target': 'Jordan',                                        'score': 0.6180844902992249},
        {'source': 'Home Visits Survey', 'relation': 'version',            'target': 'Round II',                                     'score': 0.9688164591789246},
        {'source': 'Home Visits Survey', 'relation': 'acronym',            'target': 'HV',                                           'score': 0.9140607714653015},
        {'source': 'Home Visits Survey', 'relation': 'author',             'target': 'UNHCR',                                       'score': 0.7762154340744019},
        {'source': 'Home Visits Survey', 'relation': 'author',             'target': 'World Food Programme',                        'score': 0.6582539677619934},
        {'source': 'Home Visits Survey', 'relation': 'reference year',     'target': '2013',                                        'score': 0.524115264415741},
        {'source': 'Home Visits Survey', 'relation': 'publication year',   'target': '2014',                                        'score': 0.6853994131088257},
        {'source': 'Home Visits Survey', 'relation': 'data description',   'target': 'detailed socio-economic, health, and protection data', 'score': 0.6544178128242493},
    ]
}

# Exact sample text
SAMPLE_TEXT = """The Jordan Home Visits Survey, Round II (HV), was carried out by UNHCR and the World Food Programme between November 2013 and September 2014. Through in-home visits to Syrian refugee households in Jordan, it gathered detailed socio-economic, health, and protection data—each household tagged with a unique ID to allow longitudinal tracking."""

def highlight_text(text):
    entities = []
    # 1) NER spans
    for ent in ner:
        entities.append({
            "entity": ent["label"],
            "start":  ent["start"],
            "end":    ent["end"],
        })
    # 2) RE spans
    for rel_list in relations.values():
        for r in rel_list:
            for m in re.finditer(re.escape(r["target"]), text):
                entities.append({
                    "entity": r["relation"],
                    "start":  m.start(),
                    "end":    m.end(),
                })
    return {"text": text, "entities": entities}

def get_model_predictions():
    return json.dumps({"ner": ner, "relations": relations}, indent=2)

with gr.Blocks() as demo:
    gr.Markdown("## Data Use Detector\n"
                "Edit the sample text, then click **Highlight** to annotate entities, or **Get Model Predictions** to see the raw JSON.")

    txt_in      = gr.Textbox(label="Input Text", lines=4, value=SAMPLE_TEXT)
    highlight_btn = gr.Button("Highlight")
    txt_out     = gr.HighlightedText(label="Annotated Entities")

    get_pred_btn = gr.Button("Get Model Predictions")
    ner_rel_box  = gr.Textbox(
        label="Model Predictions (JSON)",
        lines=15,
        value="",
        interactive=False
    )

    # Only trigger highlighting on click
    highlight_btn.click(fn=highlight_text, inputs=txt_in, outputs=txt_out)
    # Only show preds on click
    get_pred_btn.click(fn=get_model_predictions, inputs=None, outputs=ner_rel_box)

if __name__ == "__main__":
    demo.launch()