Spaces:

ai4data
/

datause-detector

Running

App Files Files Community

datause-detector / app.py

rafmacalaba

test now

c38ba9f 30 days ago

raw

history blame

3.62 kB

	import re
	import json
	import gradio as gr

	# Your model’s raw NER output (we trust these start/end indices)
	ner = [
	{
	'start': 11,
	'end': 29,
	'text': 'Home Visits Survey',
	'label': 'named dataset',
	'score': 0.9947463870048523
	}
	]

	# Your model’s raw RE output
	relations = {
	'Home Visits Survey': [
	{'source': 'Home Visits Survey', 'relation': 'data geography', 'target': 'Jordan', 'score': 0.6180844902992249},
	{'source': 'Home Visits Survey', 'relation': 'version', 'target': 'Round II', 'score': 0.9688164591789246},
	{'source': 'Home Visits Survey', 'relation': 'acronym', 'target': 'HV', 'score': 0.9140607714653015},
	{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'UNHCR', 'score': 0.7762154340744019},
	{'source': 'Home Visits Survey', 'relation': 'author', 'target': 'World Food Programme', 'score': 0.6582539677619934},
	{'source': 'Home Visits Survey', 'relation': 'reference year', 'target': '2013', 'score': 0.524115264415741},
	{'source': 'Home Visits Survey', 'relation': 'publication year', 'target': '2014', 'score': 0.6853994131088257},
	{'source': 'Home Visits Survey', 'relation': 'data description', 'target': 'detailed socio-economic, health, and protection data', 'score': 0.6544178128242493},
	]
	}

	# Exact sample text
	SAMPLE_TEXT = """The Jordan Home Visits Survey, Round II (HV), was carried out by UNHCR and the World Food Programme between November 2013 and September 2014. Through in-home visits to Syrian refugee households in Jordan, it gathered detailed socio-economic, health, and protection data—each household tagged with a unique ID to allow longitudinal tracking."""

	def highlight_text(text):
	entities = []
	# 1) NER spans
	for ent in ner:
	entities.append({
	"entity": ent["label"],
	"start": ent["start"],
	"end": ent["end"],
	})
	# 2) RE spans
	for rel_list in relations.values():
	for r in rel_list:
	for m in re.finditer(re.escape(r["target"]), text):
	entities.append({
	"entity": r["relation"],
	"start": m.start(),
	"end": m.end(),
	})
	return {"text": text, "entities": entities}

	def get_model_predictions():
	return json.dumps({"ner": ner, "relations": relations}, indent=2)

	with gr.Blocks() as demo:
	gr.Markdown("## Data Use Detector\n"
	"Edit the sample text, then click Highlight to annotate entities, or Get Model Predictions to see the raw JSON.")

	txt_in = gr.Textbox(label="Input Text", lines=4, value=SAMPLE_TEXT)
	highlight_btn = gr.Button("Highlight")
	txt_out = gr.HighlightedText(label="Annotated Entities")

	get_pred_btn = gr.Button("Get Model Predictions")
	ner_rel_box = gr.Textbox(
	label="Model Predictions (JSON)",
	lines=15,
	value="",
	interactive=False
	)

	# Only trigger highlighting on click
	highlight_btn.click(fn=highlight_text, inputs=txt_in, outputs=txt_out)
	# Only show preds on click
	get_pred_btn.click(fn=get_model_predictions, inputs=None, outputs=ner_rel_box)

	if __name__ == "__main__":
	demo.launch()