|
import streamlit as st |
|
import streamlit.components.v1 as components |
|
|
|
from lettucedetect.models.inference import HallucinationDetector |
|
|
|
|
|
def create_interactive_text(text: str, spans: list[dict[str, int | float]]) -> str: |
|
"""Create interactive HTML with highlighting and hover effects. |
|
|
|
:param text: The text to create the interactive text for. |
|
:param spans: The spans to highlight. |
|
:return: The interactive text. |
|
""" |
|
html_text = text |
|
|
|
for span in sorted(spans, key=lambda x: x["start"], reverse=True): |
|
span_text = text[span["start"] : span["end"]] |
|
highlighted_span = f'<span class="hallucination" title="Confidence: {span["confidence"]:.3f}">{span_text}</span>' |
|
html_text = html_text[: span["start"]] + highlighted_span + html_text[span["end"] :] |
|
|
|
return f""" |
|
<style> |
|
.container {{ |
|
font-family: Arial, sans-serif; |
|
font-size: 16px; |
|
line-height: 1.6; |
|
padding: 20px; |
|
}} |
|
.hallucination {{ |
|
background-color: rgba(255, 99, 71, 0.3); |
|
padding: 2px; |
|
border-radius: 3px; |
|
cursor: help; |
|
}} |
|
.hallucination:hover {{ |
|
background-color: rgba(255, 99, 71, 0.5); |
|
}} |
|
</style> |
|
<div class="container">{html_text}</div> |
|
""" |
|
|
|
|
|
|
|
LANGUAGE_EXAMPLES = { |
|
"English (en)": { |
|
"model_path": "KRLabsOrg/lettucedect-base-modernbert-en-v1", |
|
"lang": "en", |
|
"context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.", |
|
"question": "What is the capital of France? What is the population of France?", |
|
"answer": "The capital of France is Paris. The population of France is 69 million.", |
|
"output_label": "Predictions" |
|
}, |
|
"German (de)": { |
|
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-de-v1", |
|
"lang": "de", |
|
"context": "Frankreich ist ein Land in Europa. Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 67 Millionen.", |
|
"question": "Was ist die Hauptstadt von Frankreich? Wie groß ist die Bevölkerung Frankreichs?", |
|
"answer": "Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 69 Millionen.", |
|
"output_label": "Vorhersagen" |
|
}, |
|
"French (fr)": { |
|
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-fr-v1", |
|
"lang": "fr", |
|
"context": "La France est un pays d'Europe. La capitale de la France est Paris. La population de la France est de 67 millions.", |
|
"question": "Quelle est la capitale de la France? Quelle est la population de la France?", |
|
"answer": "La capitale de la France est Paris. La population de la France est de 69 millions.", |
|
"output_label": "Prédictions" |
|
}, |
|
"Spanish (es)": { |
|
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-es-v1", |
|
"lang": "es", |
|
"context": "Francia es un país de Europa. La capital de Francia es París. La población de Francia es de 67 millones.", |
|
"question": "¿Cuál es la capital de Francia? ¿Cuál es la población de Francia?", |
|
"answer": "La capital de Francia es París. La población de Francia es de 69 millones.", |
|
"output_label": "Predicciones" |
|
}, |
|
"Italian (it)": { |
|
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-it-v1", |
|
"lang": "it", |
|
"context": "La Francia è un paese in Europa. La capitale della Francia è Parigi. La popolazione della Francia è di 67 milioni.", |
|
"question": "Qual è la capitale della Francia? Qual è la popolazione della Francia?", |
|
"answer": "La capitale della Francia è Parigi. La popolazione della Francia è di 69 milioni.", |
|
"output_label": "Previsioni" |
|
}, |
|
"Polish (pl)": { |
|
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-pl-v1", |
|
"lang": "pl", |
|
"context": "Kopernikanizm to teoria astronomiczna opracowana przez Mikołaja Kopernika, zgodnie z którą Słońce znajduje się w centrum Układu Słonecznego, a Ziemia i inne planety krążą wokół niego. Teoria ta została opublikowana w dziele 'O obrotach sfer niebieskich' w 1543 roku.", |
|
"question": "Na czym polega teoria kopernikańska i kiedy została opublikowana?", |
|
"answer": "Teoria kopernikańska zakłada, że Ziemia jest jednym z wielu ciał niebieskich krążących wokół Słońca. Kopernik opracował również zaawansowane równania matematyczne opisujące ruch satelitów, które zostały wykorzystane w XX wieku w programie kosmicznym NASA. Teoria została opublikowana w 1543 roku.", |
|
"output_label": "Przewidywania" |
|
}, |
|
"Chinese (cn)": { |
|
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-cn-v1", |
|
"lang": "cn", |
|
"context": "长城是中国古代的伟大防御工程,全长超过21,000公里。它的建造始于公元前7世纪,历经多个朝代。", |
|
"question": "长城有多长?它是什么时候建造的?", |
|
"answer": "长城全长约50,000公里。它的建造始于公元前3世纪,仅在秦朝时期。", |
|
"output_label": "预测" |
|
}, |
|
"LLM-Based": { |
|
"method": "llm", |
|
"lang": "en", |
|
"context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.", |
|
"question": "What is the capital of France? What is the population of France?", |
|
"answer": "The capital of France is Paris. The population of France is 69 million.", |
|
"output_label": "LLM Predictions" |
|
} |
|
} |
|
|
|
|
|
def main(): |
|
st.set_page_config(page_title="Lettuce Detective", page_icon="🥬", layout="wide") |
|
|
|
st.image( |
|
"https://github.com/KRLabsOrg/LettuceDetect/blob/main/assets/lettuce_detective.png?raw=true", |
|
width=600, |
|
) |
|
|
|
st.title("LettuceDetect Multilingual Demo 🌍") |
|
st.markdown("### Detect hallucinations in 7 languages") |
|
|
|
|
|
with st.sidebar: |
|
st.header("Settings") |
|
selected_language = st.selectbox( |
|
"Select Language", |
|
list(LANGUAGE_EXAMPLES.keys()) |
|
) |
|
|
|
example = LANGUAGE_EXAMPLES[selected_language] |
|
|
|
|
|
model_method = example.get("method", "transformer") |
|
|
|
if model_method == "transformer": |
|
model_size = st.radio( |
|
"Model Size", |
|
["Base (210M)", "Large (610M)"], |
|
index=0, |
|
help="Base models are faster, large models are more accurate." |
|
) |
|
|
|
|
|
openai_api_key = None |
|
else: |
|
|
|
st.info("LLM-based detection requires an OpenAI API key") |
|
openai_api_key = st.text_input("OpenAI API Key", type="password") |
|
|
|
st.markdown("---") |
|
st.markdown("### About") |
|
st.markdown( |
|
"LettuceDetect identifies hallucinations by comparing answers to provided context. " |
|
"Highlighted text indicates content not supported by the source material." |
|
) |
|
st.markdown("[GitHub](https://github.com/KRLabsOrg/LettuceDetect) | [HuggingFace](https://huggingface.co/collections/KRLabsOrg/multilingual-hallucination-detection-682a2549c18ecd32689231ce)") |
|
|
|
|
|
example = LANGUAGE_EXAMPLES[selected_language] |
|
|
|
|
|
if model_method == "transformer": |
|
model_path = example["model_path"] |
|
if "base" not in model_path.lower() and "large" not in model_path.lower(): |
|
|
|
if "210m" in model_path.lower() and "Large" in model_size: |
|
model_path = model_path.replace("210m", "610m") |
|
elif "610m" in model_path.lower() and "Base" in model_size: |
|
model_path = model_path.replace("610m", "210m") |
|
else: |
|
|
|
model_path = None |
|
|
|
@st.cache_resource |
|
def load_detector(method, model_path=None, lang=None, api_key=None): |
|
try: |
|
import os |
|
if api_key: |
|
os.environ["OPENAI_API_KEY"] = api_key |
|
|
|
if method == "transformer": |
|
return HallucinationDetector( |
|
method=method, |
|
model_path=model_path, |
|
lang=lang, |
|
trust_remote_code=True |
|
) |
|
else: |
|
|
|
return HallucinationDetector(method=method) |
|
except Exception as e: |
|
st.error(f"Error loading model: {e}") |
|
return None |
|
|
|
|
|
with st.spinner(f"Loading {selected_language} model..."): |
|
detector = load_detector( |
|
method=model_method, |
|
model_path=model_path, |
|
lang=example["lang"], |
|
api_key=openai_api_key |
|
) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
with col1: |
|
st.subheader("Input") |
|
context = st.text_area( |
|
"Context", |
|
example["context"], |
|
height=150 |
|
) |
|
|
|
question = st.text_area( |
|
"Question", |
|
example["question"], |
|
height=80 |
|
) |
|
|
|
answer = st.text_area( |
|
"Answer", |
|
example["answer"], |
|
height=100 |
|
) |
|
|
|
with col2: |
|
st.subheader("Results") |
|
if detector: |
|
if st.button("Detect Hallucinations", type="primary"): |
|
with st.spinner("Analyzing..."): |
|
predictions = detector.predict( |
|
context=[context], question=question, answer=answer, output_format="spans" |
|
) |
|
|
|
if predictions: |
|
st.success(f"Found {len(predictions)} hallucination(s)") |
|
st.markdown(f"**{example['output_label']}:**") |
|
html_content = create_interactive_text(answer, predictions) |
|
components.html(html_content, height=200) |
|
|
|
|
|
with st.expander("Raw prediction data"): |
|
st.json(predictions) |
|
else: |
|
st.info("No hallucinations detected") |
|
else: |
|
st.error("Model not loaded. Please check your internet connection or try a different language.") |
|
|
|
|
|
st.markdown("---") |
|
if model_method == "transformer": |
|
st.markdown(f"**Current Model:** {model_path}") |
|
else: |
|
st.markdown("**Method:** LLM-based hallucination detection") |
|
st.markdown(f"**Language:** {example['lang']}") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|