Spaces:

vectara
/

hhem-2.1-open-demo

Running

App Files Files Community

hhem-2.1-open-demo / app.py

forrestbao

update title of the app

9bd9faa 17 days ago

raw

history blame contribute delete

2.5 kB

	# Forrest Sheng Bao
	# 2025-05-25
	# forrest@vectara.com

	from typing import List, Literal
	from IPython.display import display, Markdown
	from transformers import AutoModelForSequenceClassification
	from funix import funix

	hhem = AutoModelForSequenceClassification.from_pretrained('vectara/hallucination_evaluation_model', trust_remote_code=True)

	print ("Loading HHEM, this may take a while.")

	@funix(
	title= "GUI demo for Vectara's HHEM-2.1-Open"
	)
	def HHEM(
	LLM_Prompt: str = "The sky is blue.",
	LLM_Response: str = "The ocean is blue."
	) -> Markdown:
	"""
	Vectara's Hughes Hallucination Evaluation Model (HHEM) evaluates how well an LLM's output (called the "response" or the "hypothesis") is faithful/grounded to or supported by the input given to it (called the "prompt" or the "premise"). HHEM has two versions: [HHEM-Open](https://huggingface.co/vectara/hallucination_evaluation_model) and [HHEM Commercial](https://www.vectara.com/blog/hhem-2-1-a-better-hallucination-detection-model).

	To use the demo, fill in the "LLM_Prompt" and "LLM_Response" fields and click the run button. A placeholder example is prefilled for you. Feel free to replace it with your own examples and evaluate them.

	Args:
	LLM_Prompt (str): a.k.a. the "premise".
	LLM_Response (str): a.k.a. the "hypothesis".

	"""

	if len(LLM_Prompt) + len(LLM_Response) > 500:
	return Markdown("""Your input is too long for this demo. Please shorten them such that their total length is under 500 characters.""")


	pairs = [(LLM_Prompt, LLM_Response)]

	score = hhem.predict(pairs)[0]

	verdict = "consistent" if score > 0.5 else "hallucinated"

	output_string = f"""
	Given the prompt:

	> {LLM_Prompt}

	and

	the response:
	> {LLM_Response}

	HHEM's judgement is: <u>{verdict}</u> with the score: <u>{score:0.3f}</u>.

	Wanna know why? Check out [Vectara's Hallucination Corrector (VHC)](https://hcm.demo.vectara.dev/)

	Note that in the industry, there are generally two definitions to hallucations:
	* faithfulness: the LLM's response contains information not supported by the prompt given to it.
	* factuality: the LLM's response is not true per the world knowledge.

	In HHEM, we take the faithfulness definition.

	See also:
	* [HHEM Leaderboard](https://huggingface.co/spaces/vectara/leaderboard)
	* [Source code of this app](https://huggingface.co/spaces/vectara/hhem-2.1-open-demo/tree/main)

	"""

	return output_string