Spaces:

spacy
/

healthsea-pipeline

Runtime error

App Files Files Community

healthsea-pipeline / app.py

edichief

Fix benepar download

65dfe84 over 3 years ago

raw

history blame

6.05 kB

	import streamlit as st
	import spacy
	from spacy_streamlit import visualize_ner
	from support_functions import HealthseaPipe
	import operator

	# Header
	with open("style.css") as f:
	st.markdown("<style>" + f.read() + "</style>", unsafe_allow_html=True)

	# Intro
	st.title("Welcome to Healthsea 🪐")

	intro, jellyfish = st.columns(2)
	jellyfish.markdown("\n")

	intro.subheader("Create easier access to health✨")

	jellyfish.image("data/img/Jellymation.gif")
	intro.markdown(
	"""Healthsea is an end-to-end spaCy v3 pipeline for analyzing user reviews to supplementary products and extracting their potential effects on health."""
	)
	intro.markdown(
	"""The code for Healthsea is provided in this [github repository](https://github.com/thomashacker/healthsea). Visit our [blog post](https://explosion.ai/) or more about the Healthsea project.
	"""
	)

	st.write(
	"""This app visualizes the individual processing steps of the pipeline in which you can write custom reviews to get insights into the functionality of all the different components.
	You can visit the [Healthsea Demo app](https://huggingface.co/spaces/edichief/healthsea-demo) for exploring the Healthsea processing on productive data.
	"""
	)

	st.markdown("""---""")

	# Setup
	healthsea_pipe = HealthseaPipe()

	color_code = {
	"POSITIVE": ("#3C9E58", "#1B7735"),
	"NEGATIVE": ("#FF166A", "#C0094B"),
	"NEUTRAL": ("#7E7E7E", "#4E4747"),
	"ANAMNESIS": ("#E49A55", "#AD6B2D"),
	}

	example_reviews = [
	"This is great for joint pain.",
	"Product helped my joint pain but it also caused rashes.",
	"I'm diagnosed with gastritis. This product helped!",
	"This has made my insomnia even worse.",
	"It didn't help my joint pain.",
	]

	# Functions
	def kpi(n, text):
	html = f"""
	<div class='kpi'>
	<h1>{n}</h1>
	<span>{text}</span>
	</div>
	"""
	return html


	def central_text(text):
	html = f"""<h2 class='central_text'>{text}</h2>"""
	return html


	def format_clause(text, meta, pred):
	html = f"""
	<div>
	<div class="clause" style="background-color:{color_code[pred][0]} ; box-shadow: 0px 5px {color_code[pred][1]}; border-color:{color_code[pred][1]};">
	<div class="clause_text">{text}</div>
	</div>
	<div class="clause_meta">
	<div>{meta}</div>
	</div>
	</div>"""
	return html


	def format_effect(text, pred):
	html = f"""
	<div>
	<div class="clause" style="background-color:{color_code[pred][0]} ; box-shadow: 0px 5px {color_code[pred][1]}; border-color:{color_code[pred][1]};">
	<div class="clause_text">{text}</div>
	</div>
	</div>"""
	return html

	# Load model
	load_state = st.info("Loading...")
	try:
	load_state.info("Loading model...")
	if "model" not in st.session_state:
	nlp = spacy.load("en_healthsea")
	st.session_state["model"] = nlp
	load_state.success ("Loading complete!")

	# Download model
	except LookupError:
	import nltk
	import benepar
	load_state.info ("Downloading model...")
	benepar.download('benepar_en3')
	if "model" not in st.session_state:
	nlp = spacy.load("en_healthsea")
	st.session_state["model"] = nlp
	load_state.success ("Loading complete!")

	except Exception as e:
	load_state.success ("Something went wrong!")
	st.error(e)

	# Pipeline
	st.markdown(central_text("⚙️ Pipeline"), unsafe_allow_html=True)

	check = st.checkbox("Use predefined examples")

	if not check:
	text = st.text_input(label="Write a review", value="This is great for joint pain!")
	else:
	text = st.selectbox("Predefined example reviews", example_reviews)

	nlp = st.session_state["model"]
	doc = nlp(text)

	# NER
	visualize_ner(
	doc,
	labels=nlp.get_pipe("ner").labels,
	show_table=False,
	title="✨ Named Entity Recognition",
	colors={"CONDITION": "#FF4B76", "BENEFIT": "#629B68"},
	)

	st.info("""The NER identifies two labels: 'Condition' and 'Benefit'. 'Condition' entities are generally diseases, symptoms, or general health problems (e.g. joint pain), while 'Benefit' entities are positive desired health aspects (e.g. energy)""")

	st.markdown("""---""")

	# Segmentation, Blinding, Classification
	st.markdown("## 🔮 Segmentation, Blinding, Classification")

	clauses = healthsea_pipe.get_clauses(doc)
	for doc_clause, clause in zip(clauses, doc._.clauses):
	classification = max(clause["cats"].items(), key=operator.itemgetter(1))[0]
	percentage = round(float(clause["cats"][classification]) * 100, 2)
	meta = f"{clause['ent_name']} ({classification} {percentage}%)"

	st.markdown(
	format_clause(doc_clause.text, meta, classification), unsafe_allow_html=True
	)
	st.markdown("\n")

	st.info("""The text is segmented into clauses and classified by a Text Classification model. We additionally blind found entities to improve generalization and to inform the model about our current target entity.
	The Text Classification predicts four exclusive classes that represent the health effect: 'Positive', 'Negative', 'Neutral', 'Anamnesis'.""")

	st.info("""The 'Anamnesis' class is defined as the current state of health of a reviewer (e.g. 'I am diagnosed with joint pain'). It is used to link health aspects to health effects that are mentioned later in a review.""")

	st.markdown("""---""")

	# Aggregation
	st.markdown("## 🔗 Aggregation")

	for effect in doc._.health_effects:
	st.markdown(
	format_effect(
	f"{doc._.health_effects[effect]['effect']} effect on {effect}",
	doc._.health_effects[effect]["effect"],
	),
	unsafe_allow_html=True,
	)
	st.markdown("\n")

	st.info("""Multiple classification are aggregated into one final classification.""")

	st.markdown("""---""")

	# Indepth
	st.markdown("## 🔧 Pipeline attributes")
	clauses_col, effect_col = st.columns(2)

	clauses_col.markdown("### doc._.clauses")
	for clause in doc._.clauses:
	clauses_col.json(clause)
	effect_col.markdown("### doc._.health_effects")
	effect_col.json(doc._.health_effects)