Spaces:

KRLabsOrg
/

LettuceDetect-Multilingual

Running

File size: 11,225 Bytes

a44ee7a
ebb897b
a44ee7a
ebb897b

import streamlit as st
import streamlit.components.v1 as components

from lettucedetect.models.inference import HallucinationDetector


def create_interactive_text(text: str, spans: list[dict[str, int | float]]) -> str:
    """Create interactive HTML with highlighting and hover effects.

    :param text: The text to create the interactive text for.
    :param spans: The spans to highlight.
    :return: The interactive text.
    """
    html_text = text

    for span in sorted(spans, key=lambda x: x["start"], reverse=True):
        span_text = text[span["start"] : span["end"]]
        highlighted_span = f'<span class="hallucination" title="Confidence: {span["confidence"]:.3f}">{span_text}</span>'
        html_text = html_text[: span["start"]] + highlighted_span + html_text[span["end"] :]

    return f"""
    <style>
        .container {{
            font-family: Arial, sans-serif;
            font-size: 16px;
            line-height: 1.6;
            padding: 20px;
        }}
        .hallucination {{
            background-color: rgba(255, 99, 71, 0.3);
            padding: 2px;
            border-radius: 3px;
            cursor: help;
        }}
        .hallucination:hover {{
            background-color: rgba(255, 99, 71, 0.5);
        }}
    </style>
    <div class="container">{html_text}</div>
    """


# Define examples for each language
LANGUAGE_EXAMPLES = {
    "English (en)": {
        "model_path": "KRLabsOrg/lettucedect-base-modernbert-en-v1",
        "lang": "en",
        "context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
        "question": "What is the capital of France? What is the population of France?",
        "answer": "The capital of France is Paris. The population of France is 69 million.",
        "output_label": "Predictions"
    },
    "German (de)": {
        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-de-v1",
        "lang": "de",
        "context": "Frankreich ist ein Land in Europa. Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 67 Millionen.",
        "question": "Was ist die Hauptstadt von Frankreich? Wie groß ist die Bevölkerung Frankreichs?",
        "answer": "Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 69 Millionen.",
        "output_label": "Vorhersagen"
    },
    "French (fr)": {
        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-fr-v1",
        "lang": "fr",
        "context": "La France est un pays d'Europe. La capitale de la France est Paris. La population de la France est de 67 millions.",
        "question": "Quelle est la capitale de la France? Quelle est la population de la France?",
        "answer": "La capitale de la France est Paris. La population de la France est de 69 millions.",
        "output_label": "Prédictions"
    },
    "Spanish (es)": {
        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-es-v1",
        "lang": "es",
        "context": "Francia es un país de Europa. La capital de Francia es París. La población de Francia es de 67 millones.",
        "question": "¿Cuál es la capital de Francia? ¿Cuál es la población de Francia?",
        "answer": "La capital de Francia es París. La población de Francia es de 69 millones.",
        "output_label": "Predicciones"
    },
    "Italian (it)": {
        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-it-v1",
        "lang": "it",
        "context": "La Francia è un paese in Europa. La capitale della Francia è Parigi. La popolazione della Francia è di 67 milioni.",
        "question": "Qual è la capitale della Francia? Qual è la popolazione della Francia?",
        "answer": "La capitale della Francia è Parigi. La popolazione della Francia è di 69 milioni.",
        "output_label": "Previsioni"
    },
    "Polish (pl)": {
        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-pl-v1",
        "lang": "pl",
        "context": "Kopernikanizm to teoria astronomiczna opracowana przez Mikołaja Kopernika, zgodnie z którą Słońce znajduje się w centrum Układu Słonecznego, a Ziemia i inne planety krążą wokół niego. Teoria ta została opublikowana w dziele 'O obrotach sfer niebieskich' w 1543 roku.",
        "question": "Na czym polega teoria kopernikańska i kiedy została opublikowana?",
        "answer": "Teoria kopernikańska zakłada, że Ziemia jest jednym z wielu ciał niebieskich krążących wokół Słońca. Kopernik opracował również zaawansowane równania matematyczne opisujące ruch satelitów, które zostały wykorzystane w XX wieku w programie kosmicznym NASA. Teoria została opublikowana w 1543 roku.",
        "output_label": "Przewidywania"
    },
    "Chinese (cn)": {
        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-cn-v1",
        "lang": "cn",
        "context": "长城是中国古代的伟大防御工程，全长超过21,000公里。它的建造始于公元前7世纪，历经多个朝代。",
        "question": "长城有多长？它是什么时候建造的？",
        "answer": "长城全长约50,000公里。它的建造始于公元前3世纪，仅在秦朝时期。",
        "output_label": "预测"
    },
    "LLM-Based": {
        "method": "llm",
        "lang": "en",
        "context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
        "question": "What is the capital of France? What is the population of France?",
        "answer": "The capital of France is Paris. The population of France is 69 million.",
        "output_label": "LLM Predictions"
    }
}


def main():
    st.set_page_config(page_title="Lettuce Detective", page_icon="🥬", layout="wide")

    st.image(
        "https://github.com/KRLabsOrg/LettuceDetect/blob/main/assets/lettuce_detective.png?raw=true",
        width=600,
    )

    st.title("LettuceDetect Multilingual Demo 🌍")
    st.markdown("### Detect hallucinations in 7 languages")

    # Create a sidebar for language selection and model options
    with st.sidebar:
        st.header("Settings")
        selected_language = st.selectbox(
            "Select Language",
            list(LANGUAGE_EXAMPLES.keys())
        )
        
        example = LANGUAGE_EXAMPLES[selected_language]
        
        # Only show model size option for transformer-based models
        model_method = example.get("method", "transformer")
        
        if model_method == "transformer":
            model_size = st.radio(
                "Model Size",
                ["Base (210M)", "Large (610M)"],
                index=0,
                help="Base models are faster, large models are more accurate."
            )
            
            # API key not needed for transformer models
            openai_api_key = None
        else:
            # For LLM-based method
            st.info("LLM-based detection requires an OpenAI API key")
            openai_api_key = st.text_input("OpenAI API Key", type="password")
        
        st.markdown("---")
        st.markdown("### About")
        st.markdown(
            "LettuceDetect identifies hallucinations by comparing answers to provided context. "
            "Highlighted text indicates content not supported by the source material."
        )
        st.markdown("[GitHub](https://github.com/KRLabsOrg/LettuceDetect) | [HuggingFace](https://huggingface.co/collections/KRLabsOrg/multilingual-hallucination-detection-682a2549c18ecd32689231ce)")

    # Get the example data for the selected language
    example = LANGUAGE_EXAMPLES[selected_language]
    
    # Adjust model path based on selected size if needed
    if model_method == "transformer":
        model_path = example["model_path"]
        if "base" not in model_path.lower() and "large" not in model_path.lower():
            # Only adjust if it's a numerical size model that can be switched
            if "210m" in model_path.lower() and "Large" in model_size:
                model_path = model_path.replace("210m", "610m")
            elif "610m" in model_path.lower() and "Base" in model_size:
                model_path = model_path.replace("610m", "210m")
    else:
        # For LLM-based method, no model path needed
        model_path = None

    @st.cache_resource
    def load_detector(method, model_path=None, lang=None, api_key=None):
        try:
            import os
            if api_key:
                os.environ["OPENAI_API_KEY"] = api_key
                
            if method == "transformer":
                return HallucinationDetector(
                    method=method,
                    model_path=model_path,
                    lang=lang,
                    trust_remote_code=True
                )
            else:
                # LLM-based method
                return HallucinationDetector(method=method)
        except Exception as e:
            st.error(f"Error loading model: {e}")
            return None

    # Load detector for the selected language
    with st.spinner(f"Loading {selected_language} model..."):
        detector = load_detector(
            method=model_method, 
            model_path=model_path, 
            lang=example["lang"],
            api_key=openai_api_key
        )

    # Create a two-column layout
    col1, col2 = st.columns(2)
    
    with col1:
        st.subheader("Input")
        context = st.text_area(
            "Context",
            example["context"],
            height=150
        )

        question = st.text_area(
            "Question",
            example["question"],
            height=80
        )

        answer = st.text_area(
            "Answer",
            example["answer"],
            height=100
        )

    with col2:
        st.subheader("Results")
        if detector:
            if st.button("Detect Hallucinations", type="primary"):
                with st.spinner("Analyzing..."):
                    predictions = detector.predict(
                        context=[context], question=question, answer=answer, output_format="spans"
                    )
                    
                    if predictions:
                        st.success(f"Found {len(predictions)} hallucination(s)")
                        st.markdown(f"**{example['output_label']}:**")
                        html_content = create_interactive_text(answer, predictions)
                        components.html(html_content, height=200)
                        
                        # Display raw predictions in a collapsible section
                        with st.expander("Raw prediction data"):
                            st.json(predictions)
                    else:
                        st.info("No hallucinations detected")
        else:
            st.error("Model not loaded. Please check your internet connection or try a different language.")

    # Show information about current model
    st.markdown("---")
    if model_method == "transformer":
        st.markdown(f"**Current Model:** {model_path}")
    else:
        st.markdown("**Method:** LLM-based hallucination detection")
    st.markdown(f"**Language:** {example['lang']}")


if __name__ == "__main__":
    main()