Spaces:

KRLabsOrg
/

LettuceDetect-Multilingual

Running

App Files Files Community

adaamko commited on May 18

Commit

ebb897b

verified ·

1 Parent(s): 86c548a

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +259 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,261 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import streamlit.components.v1 as components
+from lettucedetect.models.inference import HallucinationDetector
+def create_interactive_text(text: str, spans: list[dict[str, int | float]]) -> str:
+    """Create interactive HTML with highlighting and hover effects.
+    :param text: The text to create the interactive text for.
+    :param spans: The spans to highlight.
+    :return: The interactive text.
+    """
+    html_text = text
+    for span in sorted(spans, key=lambda x: x["start"], reverse=True):
+        span_text = text[span["start"] : span["end"]]
+        highlighted_span = f'<span class="hallucination" title="Confidence: {span["confidence"]:.3f}">{span_text}</span>'
+        html_text = html_text[: span["start"]] + highlighted_span + html_text[span["end"] :]
+    return f"""
+    <style>
+        .container {{
+            font-family: Arial, sans-serif;
+            font-size: 16px;
+            line-height: 1.6;
+            padding: 20px;
+        }}
+        .hallucination {{
+            background-color: rgba(255, 99, 71, 0.3);
+            padding: 2px;
+            border-radius: 3px;
+            cursor: help;
+        }}
+        .hallucination:hover {{
+            background-color: rgba(255, 99, 71, 0.5);
+        }}
+    </style>
+    <div class="container">{html_text}</div>
+    """
+# Define examples for each language
+LANGUAGE_EXAMPLES = {
+    "English (en)": {
+        "model_path": "KRLabsOrg/lettucedect-base-modernbert-en-v1",
+        "lang": "en",
+        "context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
+        "question": "What is the capital of France? What is the population of France?",
+        "answer": "The capital of France is Paris. The population of France is 69 million.",
+        "output_label": "Predictions"
+    },
+    "German (de)": {
+        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-de-v1",
+        "lang": "de",
+        "context": "Frankreich ist ein Land in Europa. Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 67 Millionen.",
+        "question": "Was ist die Hauptstadt von Frankreich? Wie groß ist die Bevölkerung Frankreichs?",
+        "answer": "Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 69 Millionen.",
+        "output_label": "Vorhersagen"
+    },
+    "French (fr)": {
+        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-fr-v1",
+        "lang": "fr",
+        "context": "La France est un pays d'Europe. La capitale de la France est Paris. La population de la France est de 67 millions.",
+        "question": "Quelle est la capitale de la France? Quelle est la population de la France?",
+        "answer": "La capitale de la France est Paris. La population de la France est de 69 millions.",
+        "output_label": "Prédictions"
+    },
+    "Spanish (es)": {
+        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-es-v1",
+        "lang": "es",
+        "context": "Francia es un país de Europa. La capital de Francia es París. La población de Francia es de 67 millones.",
+        "question": "¿Cuál es la capital de Francia? ¿Cuál es la población de Francia?",
+        "answer": "La capital de Francia es París. La población de Francia es de 69 millones.",
+        "output_label": "Predicciones"
+    },
+    "Italian (it)": {
+        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-it-v1",
+        "lang": "it",
+        "context": "La Francia è un paese in Europa. La capitale della Francia è Parigi. La popolazione della Francia è di 67 milioni.",
+        "question": "Qual è la capitale della Francia? Qual è la popolazione della Francia?",
+        "answer": "La capitale della Francia è Parigi. La popolazione della Francia è di 69 milioni.",
+        "output_label": "Previsioni"
+    },
+    "Polish (pl)": {
+        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-pl-v1",
+        "lang": "pl",
+        "context": "Kopernikanizm to teoria astronomiczna opracowana przez Mikołaja Kopernika, zgodnie z którą Słońce znajduje się w centrum Układu Słonecznego, a Ziemia i inne planety krążą wokół niego. Teoria ta została opublikowana w dziele 'O obrotach sfer niebieskich' w 1543 roku.",
+        "question": "Na czym polega teoria kopernikańska i kiedy została opublikowana?",
+        "answer": "Teoria kopernikańska zakłada, że Ziemia jest jednym z wielu ciał niebieskich krążących wokół Słońca. Kopernik opracował również zaawansowane równania matematyczne opisujące ruch satelitów, które zostały wykorzystane w XX wieku w programie kosmicznym NASA. Teoria została opublikowana w 1543 roku.",
+        "output_label": "Przewidywania"
+    },
+    "Chinese (cn)": {
+        "model_path": "KRLabsOrg/lettucedect-210m-eurobert-cn-v1",
+        "lang": "cn",
+        "context": "长城是中国古代的伟大防御工程，全长超过21,000公里。它的建造始于公元前7世纪，历经多个朝代。",
+        "question": "长城有多长？它是什么时候建造的？",
+        "answer": "长城全长约50,000公里。它的建造始于公元前3世纪，仅在秦朝时期。",
+        "output_label": "预测"
+    },
+    "LLM-Based": {
+        "method": "llm",
+        "lang": "en",
+        "context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
+        "question": "What is the capital of France? What is the population of France?",
+        "answer": "The capital of France is Paris. The population of France is 69 million.",
+        "output_label": "LLM Predictions"
+    }
+}
+def main():
+    st.set_page_config(page_title="Lettuce Detective", page_icon="🥬", layout="wide")
+    st.image(
+        "https://github.com/KRLabsOrg/LettuceDetect/blob/main/assets/lettuce_detective.png?raw=true",
+        width=600,
+    )
+    st.title("LettuceDetect Multilingual Demo 🌍")
+    st.markdown("### Detect hallucinations in 7 languages")
+    # Create a sidebar for language selection and model options
+    with st.sidebar:
+        st.header("Settings")
+        selected_language = st.selectbox(
+            "Select Language",
+            list(LANGUAGE_EXAMPLES.keys())
+        )
+        example = LANGUAGE_EXAMPLES[selected_language]
+        # Only show model size option for transformer-based models
+        model_method = example.get("method", "transformer")
+        if model_method == "transformer":
+            model_size = st.radio(
+                "Model Size",
+                ["Base (210M)", "Large (610M)"],
+                index=0,
+                help="Base models are faster, large models are more accurate."
+            )
+            # API key not needed for transformer models
+            openai_api_key = None
+        else:
+            # For LLM-based method
+            st.info("LLM-based detection requires an OpenAI API key")
+            openai_api_key = st.text_input("OpenAI API Key", type="password")
+        st.markdown("---")
+        st.markdown("### About")
+        st.markdown(
+            "LettuceDetect identifies hallucinations by comparing answers to provided context. "
+            "Highlighted text indicates content not supported by the source material."
+        )
+        st.markdown("[GitHub](https://github.com/KRLabsOrg/LettuceDetect) | [HuggingFace](https://huggingface.co/collections/KRLabsOrg/multilingual-hallucination-detection-682a2549c18ecd32689231ce)")
+    # Get the example data for the selected language
+    example = LANGUAGE_EXAMPLES[selected_language]
+    # Adjust model path based on selected size if needed
+    if model_method == "transformer":
+        model_path = example["model_path"]
+        if "base" not in model_path.lower() and "large" not in model_path.lower():
+            # Only adjust if it's a numerical size model that can be switched
+            if "210m" in model_path.lower() and "Large" in model_size:
+                model_path = model_path.replace("210m", "610m")
+            elif "610m" in model_path.lower() and "Base" in model_size:
+                model_path = model_path.replace("610m", "210m")
+    else:
+        # For LLM-based method, no model path needed
+        model_path = None
+    @st.cache_resource
+    def load_detector(method, model_path=None, lang=None, api_key=None):
+        try:
+            import os
+            if api_key:
+                os.environ["OPENAI_API_KEY"] = api_key
+            if method == "transformer":
+                return HallucinationDetector(
+                    method=method,
+                    model_path=model_path,
+                    lang=lang,
+                    trust_remote_code=True
+                )
+            else:
+                # LLM-based method
+                return HallucinationDetector(method=method)
+        except Exception as e:
+            st.error(f"Error loading model: {e}")
+            return None
+    # Load detector for the selected language
+    with st.spinner(f"Loading {selected_language} model..."):
+        detector = load_detector(
+            method=model_method,
+            model_path=model_path,
+            lang=example["lang"],
+            api_key=openai_api_key
+        )
+    # Create a two-column layout
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("Input")
+        context = st.text_area(
+            "Context",
+            example["context"],
+            height=150
+        )
+        question = st.text_area(
+            "Question",
+            example["question"],
+            height=80
+        )
+        answer = st.text_area(
+            "Answer",
+            example["answer"],
+            height=100
+        )
+    with col2:
+        st.subheader("Results")
+        if detector:
+            if st.button("Detect Hallucinations", type="primary"):
+                with st.spinner("Analyzing..."):
+                    predictions = detector.predict(
+                        context=[context], question=question, answer=answer, output_format="spans"
+                    )
+                    if predictions:
+                        st.success(f"Found {len(predictions)} hallucination(s)")
+                        st.markdown(f"**{example['output_label']}:**")
+                        html_content = create_interactive_text(answer, predictions)
+                        components.html(html_content, height=200)
+                        # Display raw predictions in a collapsible section
+                        with st.expander("Raw prediction data"):
+                            st.json(predictions)
+                    else:
+                        st.info("No hallucinations detected")
+        else:
+            st.error("Model not loaded. Please check your internet connection or try a different language.")
+    # Show information about current model
+    st.markdown("---")
+    if model_method == "transformer":
+        st.markdown(f"**Current Model:** {model_path}")
+    else:
+        st.markdown("**Method:** LLM-based hallucination detection")
+    st.markdown(f"**Language:** {example['lang']}")
+if __name__ == "__main__":
+    main()