borrowing-detection-es

Sleeping

Johannes

update descr again

bfa5f32 over 2 years ago

2.43 kB

	from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer
	import gradio as gr
	from spacy import displacy

	tokenizer = AutoTokenizer.from_pretrained("lirondos/anglicisms-spanish-mbert")
	model = AutoModelForTokenClassification.from_pretrained(
	"lirondos/anglicisms-spanish-mbert"
	)
	nlp = pipeline("ner", model=model, tokenizer=tokenizer)

	diplacy_dict_template = {
	"text": "But Google is starting from behind.",
	"ents": [{"start": 4, "end": 10, "label": "ORG"}],
	"title": None,
	}


	def infer(input_text):
	displacy_ents = []
	borrowings = nlp(input_text)

	for borrowing in borrowings:
	displacy_ent_dict = {
	"start": borrowing["start"],
	"end": borrowing["end"],
	"label": borrowing["entity"],
	}
	displacy_ents.append(displacy_ent_dict)

	colors = {"B-ENG": "linear-gradient(90deg, #aa9cfc, #fc9ce7)",
	"I-ENG": "linear-gradient(90deg, #99bfff, #a57cf0)",
	"B-OTHER": "linear-gradient(90deg, #79d0a5, #f6e395)",
	"I-OTHER": "linear-gradient(90deg, #f79a76, #fb6d6d)"}

	options = {"ents": ["B-ENG", "I-ENG", "B-OTHER", "I-OTHER"], "colors": colors}
	displacy_dict_template = {"text": input_text, "ents": displacy_ents, "title": None}

	html = displacy.render(displacy_dict_template, style="ent", page=True, manual=True, options=options)

	html = (
	""
	+ html
	+ ""
	)

	return html


	description="""This space is a demo for the paper [Detecting Unassimilated Borrowings in Spanish:
	An Annotated Corpus and Approaches to Modeling](https://arxiv.org/pdf/2203.16169.pdf)

	The goal of the underlying model is to detect foreign words, e.g. anglicisms, in spanish texts.
	In general it has two types of tags for foreign words: ENG and OTHER. The authors used [BIO-tagging](https://en.wikipedia.org/wiki/Inside%E2%80%93outside%E2%80%93beginning_(tagging)),
	which is why in practice you will see a B- or I- in front of the tags.
	"""

	demo = gr.Interface(
	title="Borrowing Detection Español",
	description=description,
	fn=infer,
	inputs=gr.Text(),
	outputs=gr.HTML(),
	examples=["Buscamos data scientist para proyecto de machine learning.",
	"Las fake news sobre la celebrity se reprodujeron por los 'mass media' en prime time.",
	"Me gusta el cine noir y el anime."],
	)

	demo.launch()