Kovács Ádám
Changed
811e6a2
import streamlit as st
import streamlit.components.v1 as components
from lettucedetect.models.inference import HallucinationDetector
def create_interactive_text(text: str, spans: list[dict[str, int | float]]) -> str:
"""Create interactive HTML with highlighting and hover effects.
:param text: The text to create the interactive text for.
:param spans: The spans to highlight.
:return: The interactive text.
"""
html_text = text
for span in sorted(spans, key=lambda x: x["start"], reverse=True):
span_text = text[span["start"] : span["end"]]
highlighted_span = f'<span class="hallucination" title="Confidence: {span["confidence"]:.3f}">{span_text}</span>'
html_text = html_text[: span["start"]] + highlighted_span + html_text[span["end"] :]
return f"""
<style>
.container {{
font-family: Arial, sans-serif;
font-size: 16px;
line-height: 1.6;
padding: 20px;
}}
.hallucination {{
background-color: rgba(255, 99, 71, 0.3);
padding: 2px;
border-radius: 3px;
cursor: help;
}}
.hallucination:hover {{
background-color: rgba(255, 99, 71, 0.5);
}}
</style>
<div class="container">{html_text}</div>
"""
# Define examples for each language
LANGUAGE_EXAMPLES = {
"English (en)": {
"model_path": "KRLabsOrg/lettucedect-base-modernbert-en-v1",
"lang": "en",
"context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
"question": "What is the capital of France? What is the population of France?",
"answer": "The capital of France is Paris. The population of France is 69 million.",
"output_label": "Predictions"
},
"German (de)": {
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-de-v1",
"lang": "de",
"context": "Frankreich ist ein Land in Europa. Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 67 Millionen.",
"question": "Was ist die Hauptstadt von Frankreich? Wie groß ist die Bevölkerung Frankreichs?",
"answer": "Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 69 Millionen.",
"output_label": "Vorhersagen"
},
"French (fr)": {
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-fr-v1",
"lang": "fr",
"context": "La France est un pays d'Europe. La capitale de la France est Paris. La population de la France est de 67 millions.",
"question": "Quelle est la capitale de la France? Quelle est la population de la France?",
"answer": "La capitale de la France est Paris. La population de la France est de 69 millions.",
"output_label": "Prédictions"
},
"Spanish (es)": {
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-es-v1",
"lang": "es",
"context": "Francia es un país de Europa. La capital de Francia es París. La población de Francia es de 67 millones.",
"question": "¿Cuál es la capital de Francia? ¿Cuál es la población de Francia?",
"answer": "La capital de Francia es París. La población de Francia es de 69 millones.",
"output_label": "Predicciones"
},
"Italian (it)": {
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-it-v1",
"lang": "it",
"context": "La Francia è un paese in Europa. La capitale della Francia è Parigi. La popolazione della Francia è di 67 milioni.",
"question": "Qual è la capitale della Francia? Qual è la popolazione della Francia?",
"answer": "La capitale della Francia è Parigi. La popolazione della Francia è di 69 milioni.",
"output_label": "Previsioni"
},
"Polish (pl)": {
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-pl-v1",
"lang": "pl",
"context": "Kopernikanizm to teoria astronomiczna opracowana przez Mikołaja Kopernika, zgodnie z którą Słońce znajduje się w centrum Układu Słonecznego, a Ziemia i inne planety krążą wokół niego. Teoria ta została opublikowana w dziele 'O obrotach sfer niebieskich' w 1543 roku.",
"question": "Na czym polega teoria kopernikańska i kiedy została opublikowana?",
"answer": "Teoria kopernikańska zakłada, że Ziemia jest jednym z wielu ciał niebieskich krążących wokół Słońca. Kopernik opracował również zaawansowane równania matematyczne opisujące ruch satelitów, które zostały wykorzystane w XX wieku w programie kosmicznym NASA. Teoria została opublikowana w 1543 roku.",
"output_label": "Przewidywania"
},
"Chinese (cn)": {
"model_path": "KRLabsOrg/lettucedect-210m-eurobert-cn-v1",
"lang": "cn",
"context": "长城是中国古代的伟大防御工程,全长超过21,000公里。它的建造始于公元前7世纪,历经多个朝代。",
"question": "长城有多长?它是什么时候建造的?",
"answer": "长城全长约50,000公里。它的建造始于公元前3世纪,仅在秦朝时期。",
"output_label": "预测"
},
"LLM-Based": {
"method": "llm",
"lang": "en",
"context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
"question": "What is the capital of France? What is the population of France?",
"answer": "The capital of France is Paris. The population of France is 69 million.",
"output_label": "LLM Predictions"
}
}
def main():
st.set_page_config(page_title="Lettuce Detective", page_icon="🥬", layout="wide")
st.image(
"https://github.com/KRLabsOrg/LettuceDetect/blob/main/assets/lettuce_detective.png?raw=true",
width=600,
)
st.title("LettuceDetect Multilingual Demo 🌍")
st.markdown("### Detect hallucinations in 7 languages")
# Create a sidebar for language selection and model options
with st.sidebar:
st.header("Settings")
selected_language = st.selectbox(
"Select Language",
list(LANGUAGE_EXAMPLES.keys())
)
example = LANGUAGE_EXAMPLES[selected_language]
# Only show model size option for transformer-based models
model_method = example.get("method", "transformer")
if model_method == "transformer":
model_size = st.radio(
"Model Size",
["Base (210M)", "Large (610M)"],
index=0,
help="Base models are faster, large models are more accurate."
)
# API key not needed for transformer models
openai_api_key = None
else:
# For LLM-based method
st.info("LLM-based detection requires an OpenAI API key")
openai_api_key = st.text_input("OpenAI API Key", type="password")
st.markdown("---")
st.markdown("### About")
st.markdown(
"LettuceDetect identifies hallucinations by comparing answers to provided context. "
"Highlighted text indicates content not supported by the source material."
)
st.markdown("[GitHub](https://github.com/KRLabsOrg/LettuceDetect) | [HuggingFace](https://huggingface.co/collections/KRLabsOrg/multilingual-hallucination-detection-682a2549c18ecd32689231ce)")
# Get the example data for the selected language
example = LANGUAGE_EXAMPLES[selected_language]
# Adjust model path based on selected size if needed
if model_method == "transformer":
model_path = example["model_path"]
if "base" not in model_path.lower() and "large" not in model_path.lower():
# Only adjust if it's a numerical size model that can be switched
if "210m" in model_path.lower() and "Large" in model_size:
model_path = model_path.replace("210m", "610m")
elif "610m" in model_path.lower() and "Base" in model_size:
model_path = model_path.replace("610m", "210m")
else:
# For LLM-based method, no model path needed
model_path = None
@st.cache_resource
def load_detector(method, model_path=None, lang=None, api_key=None):
try:
import os
if api_key:
os.environ["OPENAI_API_KEY"] = api_key
if method == "transformer":
return HallucinationDetector(
method=method,
model_path=model_path,
lang=lang,
trust_remote_code=True
)
else:
# LLM-based method
return HallucinationDetector(method=method)
except Exception as e:
st.error(f"Error loading model: {e}")
return None
# Load detector for the selected language
with st.spinner(f"Loading {selected_language} model..."):
detector = load_detector(
method=model_method,
model_path=model_path,
lang=example["lang"],
api_key=openai_api_key
)
# Create a two-column layout
col1, col2 = st.columns(2)
with col1:
st.subheader("Input")
context = st.text_area(
"Context",
example["context"],
height=150
)
question = st.text_area(
"Question",
example["question"],
height=80
)
answer = st.text_area(
"Answer",
example["answer"],
height=100
)
with col2:
st.subheader("Results")
if detector:
if st.button("Detect Hallucinations", type="primary"):
with st.spinner("Analyzing..."):
predictions = detector.predict(
context=[context], question=question, answer=answer, output_format="spans"
)
if predictions:
st.success(f"Found {len(predictions)} hallucination(s)")
st.markdown(f"**{example['output_label']}:**")
html_content = create_interactive_text(answer, predictions)
components.html(html_content, height=200)
# Display raw predictions in a collapsible section
with st.expander("Raw prediction data"):
st.json(predictions)
else:
st.info("No hallucinations detected")
else:
st.error("Model not loaded. Please check your internet connection or try a different language.")
# Show information about current model
st.markdown("---")
if model_method == "transformer":
st.markdown(f"**Current Model:** {model_path}")
else:
st.markdown("**Method:** LLM-based hallucination detection")
st.markdown(f"**Language:** {example['lang']}")
if __name__ == "__main__":
main()