Spaces:
Running
Running
File size: 2,500 Bytes
a6b0504 4e7ea3a a6b0504 9bd9faa 4e7ea3a a6b0504 4e7ea3a 9bd9faa a6b0504 9bd9faa a6b0504 4e7ea3a a6b0504 4e7ea3a a6b0504 4e7ea3a a6b0504 a80a32b a6b0504 4e7ea3a a6b0504 4e7ea3a a6b0504 4e7ea3a a6b0504 4e7ea3a a6b0504 4e7ea3a a6b0504 4e7ea3a a6b0504 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# Forrest Sheng Bao
# 2025-05-25
# forrest@vectara.com
from typing import List, Literal
from IPython.display import display, Markdown
from transformers import AutoModelForSequenceClassification
from funix import funix
hhem = AutoModelForSequenceClassification.from_pretrained('vectara/hallucination_evaluation_model', trust_remote_code=True)
print ("Loading HHEM, this may take a while.")
@funix(
title= "GUI demo for Vectara's HHEM-2.1-Open"
)
def HHEM(
LLM_Prompt: str = "The sky is blue.",
LLM_Response: str = "The ocean is blue."
) -> Markdown:
"""
Vectara's Hughes Hallucination Evaluation Model (HHEM) evaluates how well an LLM's output (called the "response" or the "hypothesis") is faithful/grounded to or supported by the input given to it (called the "prompt" or the "premise"). HHEM has two versions: [HHEM-Open](https://huggingface.co/vectara/hallucination_evaluation_model) and [HHEM Commercial](https://www.vectara.com/blog/hhem-2-1-a-better-hallucination-detection-model).
To use the demo, fill in the "LLM_Prompt" and "LLM_Response" fields and click the run button. A placeholder example is prefilled for you. Feel free to replace it with your own examples and evaluate them.
Args:
LLM_Prompt (str): a.k.a. the "premise".
LLM_Response (str): a.k.a. the "hypothesis".
"""
if len(LLM_Prompt) + len(LLM_Response) > 500:
return Markdown("""Your input is too long for this demo. Please shorten them such that their total length is under 500 characters.""")
pairs = [(LLM_Prompt, LLM_Response)]
score = hhem.predict(pairs)[0]
verdict = "consistent" if score > 0.5 else "hallucinated"
output_string = f"""
Given the **prompt**:
> {LLM_Prompt}
and
the **response**:
> {LLM_Response}
HHEM's **judgement** is: <u>{verdict}</u> **with the score**: <u>{score:0.3f}</u>.
Wanna know why? Check out [Vectara's Hallucination Corrector (VHC)](https://hcm.demo.vectara.dev/)
Note that in the industry, there are generally two definitions to hallucations:
* faithfulness: the LLM's response contains information not supported by the prompt given to it.
* factuality: the LLM's response is not true per the world knowledge.
In HHEM, we take the faithfulness definition.
See also:
* [HHEM Leaderboard](https://huggingface.co/spaces/vectara/leaderboard)
* [Source code of this app](https://huggingface.co/spaces/vectara/hhem-2.1-open-demo/tree/main)
"""
return output_string
|