import streamlit as st import streamlit.components.v1 as components from lettucedetect.models.inference import HallucinationDetector def create_interactive_text(text: str, spans: list[dict[str, int | float]]) -> str: """Create interactive HTML with highlighting and hover effects. :param text: The text to create the interactive text for. :param spans: The spans to highlight. :return: The interactive text. """ html_text = text for span in sorted(spans, key=lambda x: x["start"], reverse=True): span_text = text[span["start"] : span["end"]] highlighted_span = f'{span_text}' html_text = html_text[: span["start"]] + highlighted_span + html_text[span["end"] :] return f"""
{html_text}
""" # Define examples for each language LANGUAGE_EXAMPLES = { "English (en)": { "model_path": "KRLabsOrg/lettucedect-base-modernbert-en-v1", "lang": "en", "context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.", "question": "What is the capital of France? What is the population of France?", "answer": "The capital of France is Paris. The population of France is 69 million.", "output_label": "Predictions" }, "German (de)": { "model_path": "KRLabsOrg/lettucedect-210m-eurobert-de-v1", "lang": "de", "context": "Frankreich ist ein Land in Europa. Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 67 Millionen.", "question": "Was ist die Hauptstadt von Frankreich? Wie groß ist die Bevölkerung Frankreichs?", "answer": "Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 69 Millionen.", "output_label": "Vorhersagen" }, "French (fr)": { "model_path": "KRLabsOrg/lettucedect-210m-eurobert-fr-v1", "lang": "fr", "context": "La France est un pays d'Europe. La capitale de la France est Paris. La population de la France est de 67 millions.", "question": "Quelle est la capitale de la France? Quelle est la population de la France?", "answer": "La capitale de la France est Paris. La population de la France est de 69 millions.", "output_label": "Prédictions" }, "Spanish (es)": { "model_path": "KRLabsOrg/lettucedect-210m-eurobert-es-v1", "lang": "es", "context": "Francia es un país de Europa. La capital de Francia es París. La población de Francia es de 67 millones.", "question": "¿Cuál es la capital de Francia? ¿Cuál es la población de Francia?", "answer": "La capital de Francia es París. La población de Francia es de 69 millones.", "output_label": "Predicciones" }, "Italian (it)": { "model_path": "KRLabsOrg/lettucedect-210m-eurobert-it-v1", "lang": "it", "context": "La Francia è un paese in Europa. La capitale della Francia è Parigi. La popolazione della Francia è di 67 milioni.", "question": "Qual è la capitale della Francia? Qual è la popolazione della Francia?", "answer": "La capitale della Francia è Parigi. La popolazione della Francia è di 69 milioni.", "output_label": "Previsioni" }, "Polish (pl)": { "model_path": "KRLabsOrg/lettucedect-210m-eurobert-pl-v1", "lang": "pl", "context": "Kopernikanizm to teoria astronomiczna opracowana przez Mikołaja Kopernika, zgodnie z którą Słońce znajduje się w centrum Układu Słonecznego, a Ziemia i inne planety krążą wokół niego. Teoria ta została opublikowana w dziele 'O obrotach sfer niebieskich' w 1543 roku.", "question": "Na czym polega teoria kopernikańska i kiedy została opublikowana?", "answer": "Teoria kopernikańska zakłada, że Ziemia jest jednym z wielu ciał niebieskich krążących wokół Słońca. Kopernik opracował również zaawansowane równania matematyczne opisujące ruch satelitów, które zostały wykorzystane w XX wieku w programie kosmicznym NASA. Teoria została opublikowana w 1543 roku.", "output_label": "Przewidywania" }, "Chinese (cn)": { "model_path": "KRLabsOrg/lettucedect-210m-eurobert-cn-v1", "lang": "cn", "context": "长城是中国古代的伟大防御工程,全长超过21,000公里。它的建造始于公元前7世纪,历经多个朝代。", "question": "长城有多长?它是什么时候建造的?", "answer": "长城全长约50,000公里。它的建造始于公元前3世纪,仅在秦朝时期。", "output_label": "预测" }, "LLM-Based": { "method": "llm", "lang": "en", "context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.", "question": "What is the capital of France? What is the population of France?", "answer": "The capital of France is Paris. The population of France is 69 million.", "output_label": "LLM Predictions" } } def main(): st.set_page_config(page_title="Lettuce Detective", page_icon="🥬", layout="wide") st.image( "https://github.com/KRLabsOrg/LettuceDetect/blob/main/assets/lettuce_detective.png?raw=true", width=600, ) st.title("LettuceDetect Multilingual Demo 🌍") st.markdown("### Detect hallucinations in 7 languages") # Create a sidebar for language selection and model options with st.sidebar: st.header("Settings") selected_language = st.selectbox( "Select Language", list(LANGUAGE_EXAMPLES.keys()) ) example = LANGUAGE_EXAMPLES[selected_language] # Only show model size option for transformer-based models model_method = example.get("method", "transformer") if model_method == "transformer": model_size = st.radio( "Model Size", ["Base (210M)", "Large (610M)"], index=0, help="Base models are faster, large models are more accurate." ) # API key not needed for transformer models openai_api_key = None else: # For LLM-based method st.info("LLM-based detection requires an OpenAI API key") openai_api_key = st.text_input("OpenAI API Key", type="password") st.markdown("---") st.markdown("### About") st.markdown( "LettuceDetect identifies hallucinations by comparing answers to provided context. " "Highlighted text indicates content not supported by the source material." ) st.markdown("[GitHub](https://github.com/KRLabsOrg/LettuceDetect) | [HuggingFace](https://huggingface.co/collections/KRLabsOrg/multilingual-hallucination-detection-682a2549c18ecd32689231ce)") # Get the example data for the selected language example = LANGUAGE_EXAMPLES[selected_language] # Adjust model path based on selected size if needed if model_method == "transformer": model_path = example["model_path"] if "base" not in model_path.lower() and "large" not in model_path.lower(): # Only adjust if it's a numerical size model that can be switched if "210m" in model_path.lower() and "Large" in model_size: model_path = model_path.replace("210m", "610m") elif "610m" in model_path.lower() and "Base" in model_size: model_path = model_path.replace("610m", "210m") else: # For LLM-based method, no model path needed model_path = None @st.cache_resource def load_detector(method, model_path=None, lang=None, api_key=None): try: import os if api_key: os.environ["OPENAI_API_KEY"] = api_key if method == "transformer": return HallucinationDetector( method=method, model_path=model_path, lang=lang, trust_remote_code=True ) else: # LLM-based method return HallucinationDetector(method=method) except Exception as e: st.error(f"Error loading model: {e}") return None # Load detector for the selected language with st.spinner(f"Loading {selected_language} model..."): detector = load_detector( method=model_method, model_path=model_path, lang=example["lang"], api_key=openai_api_key ) # Create a two-column layout col1, col2 = st.columns(2) with col1: st.subheader("Input") context = st.text_area( "Context", example["context"], height=150 ) question = st.text_area( "Question", example["question"], height=80 ) answer = st.text_area( "Answer", example["answer"], height=100 ) with col2: st.subheader("Results") if detector: if st.button("Detect Hallucinations", type="primary"): with st.spinner("Analyzing..."): predictions = detector.predict( context=[context], question=question, answer=answer, output_format="spans" ) if predictions: st.success(f"Found {len(predictions)} hallucination(s)") st.markdown(f"**{example['output_label']}:**") html_content = create_interactive_text(answer, predictions) components.html(html_content, height=200) # Display raw predictions in a collapsible section with st.expander("Raw prediction data"): st.json(predictions) else: st.info("No hallucinations detected") else: st.error("Model not loaded. Please check your internet connection or try a different language.") # Show information about current model st.markdown("---") if model_method == "transformer": st.markdown(f"**Current Model:** {model_path}") else: st.markdown("**Method:** LLM-based hallucination detection") st.markdown(f"**Language:** {example['lang']}") if __name__ == "__main__": main()