adaamko commited on
Commit
ebb897b
·
verified ·
1 Parent(s): 86c548a

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +259 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,261 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import streamlit.components.v1 as components
3
 
4
+ from lettucedetect.models.inference import HallucinationDetector
5
+
6
+
7
+ def create_interactive_text(text: str, spans: list[dict[str, int | float]]) -> str:
8
+ """Create interactive HTML with highlighting and hover effects.
9
+
10
+ :param text: The text to create the interactive text for.
11
+ :param spans: The spans to highlight.
12
+ :return: The interactive text.
13
+ """
14
+ html_text = text
15
+
16
+ for span in sorted(spans, key=lambda x: x["start"], reverse=True):
17
+ span_text = text[span["start"] : span["end"]]
18
+ highlighted_span = f'<span class="hallucination" title="Confidence: {span["confidence"]:.3f}">{span_text}</span>'
19
+ html_text = html_text[: span["start"]] + highlighted_span + html_text[span["end"] :]
20
+
21
+ return f"""
22
+ <style>
23
+ .container {{
24
+ font-family: Arial, sans-serif;
25
+ font-size: 16px;
26
+ line-height: 1.6;
27
+ padding: 20px;
28
+ }}
29
+ .hallucination {{
30
+ background-color: rgba(255, 99, 71, 0.3);
31
+ padding: 2px;
32
+ border-radius: 3px;
33
+ cursor: help;
34
+ }}
35
+ .hallucination:hover {{
36
+ background-color: rgba(255, 99, 71, 0.5);
37
+ }}
38
+ </style>
39
+ <div class="container">{html_text}</div>
40
+ """
41
+
42
+
43
+ # Define examples for each language
44
+ LANGUAGE_EXAMPLES = {
45
+ "English (en)": {
46
+ "model_path": "KRLabsOrg/lettucedect-base-modernbert-en-v1",
47
+ "lang": "en",
48
+ "context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
49
+ "question": "What is the capital of France? What is the population of France?",
50
+ "answer": "The capital of France is Paris. The population of France is 69 million.",
51
+ "output_label": "Predictions"
52
+ },
53
+ "German (de)": {
54
+ "model_path": "KRLabsOrg/lettucedect-210m-eurobert-de-v1",
55
+ "lang": "de",
56
+ "context": "Frankreich ist ein Land in Europa. Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 67 Millionen.",
57
+ "question": "Was ist die Hauptstadt von Frankreich? Wie groß ist die Bevölkerung Frankreichs?",
58
+ "answer": "Die Hauptstadt von Frankreich ist Paris. Die Bevölkerung Frankreichs beträgt 69 Millionen.",
59
+ "output_label": "Vorhersagen"
60
+ },
61
+ "French (fr)": {
62
+ "model_path": "KRLabsOrg/lettucedect-210m-eurobert-fr-v1",
63
+ "lang": "fr",
64
+ "context": "La France est un pays d'Europe. La capitale de la France est Paris. La population de la France est de 67 millions.",
65
+ "question": "Quelle est la capitale de la France? Quelle est la population de la France?",
66
+ "answer": "La capitale de la France est Paris. La population de la France est de 69 millions.",
67
+ "output_label": "Prédictions"
68
+ },
69
+ "Spanish (es)": {
70
+ "model_path": "KRLabsOrg/lettucedect-210m-eurobert-es-v1",
71
+ "lang": "es",
72
+ "context": "Francia es un país de Europa. La capital de Francia es París. La población de Francia es de 67 millones.",
73
+ "question": "¿Cuál es la capital de Francia? ¿Cuál es la población de Francia?",
74
+ "answer": "La capital de Francia es París. La población de Francia es de 69 millones.",
75
+ "output_label": "Predicciones"
76
+ },
77
+ "Italian (it)": {
78
+ "model_path": "KRLabsOrg/lettucedect-210m-eurobert-it-v1",
79
+ "lang": "it",
80
+ "context": "La Francia è un paese in Europa. La capitale della Francia è Parigi. La popolazione della Francia è di 67 milioni.",
81
+ "question": "Qual è la capitale della Francia? Qual è la popolazione della Francia?",
82
+ "answer": "La capitale della Francia è Parigi. La popolazione della Francia è di 69 milioni.",
83
+ "output_label": "Previsioni"
84
+ },
85
+ "Polish (pl)": {
86
+ "model_path": "KRLabsOrg/lettucedect-210m-eurobert-pl-v1",
87
+ "lang": "pl",
88
+ "context": "Kopernikanizm to teoria astronomiczna opracowana przez Mikołaja Kopernika, zgodnie z którą Słońce znajduje się w centrum Układu Słonecznego, a Ziemia i inne planety krążą wokół niego. Teoria ta została opublikowana w dziele 'O obrotach sfer niebieskich' w 1543 roku.",
89
+ "question": "Na czym polega teoria kopernikańska i kiedy została opublikowana?",
90
+ "answer": "Teoria kopernikańska zakłada, że Ziemia jest jednym z wielu ciał niebieskich krążących wokół Słońca. Kopernik opracował również zaawansowane równania matematyczne opisujące ruch satelitów, które zostały wykorzystane w XX wieku w programie kosmicznym NASA. Teoria została opublikowana w 1543 roku.",
91
+ "output_label": "Przewidywania"
92
+ },
93
+ "Chinese (cn)": {
94
+ "model_path": "KRLabsOrg/lettucedect-210m-eurobert-cn-v1",
95
+ "lang": "cn",
96
+ "context": "长城是中国古代的伟大防御工程,全长超过21,000公里。它的建造始于公元前7世纪,历经多个朝代。",
97
+ "question": "长城有多长?它是什么时候建造的?",
98
+ "answer": "长城全长约50,000公里。它的建造始于公元前3世纪,仅在秦朝时期。",
99
+ "output_label": "预测"
100
+ },
101
+ "LLM-Based": {
102
+ "method": "llm",
103
+ "lang": "en",
104
+ "context": "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.",
105
+ "question": "What is the capital of France? What is the population of France?",
106
+ "answer": "The capital of France is Paris. The population of France is 69 million.",
107
+ "output_label": "LLM Predictions"
108
+ }
109
+ }
110
+
111
+
112
+ def main():
113
+ st.set_page_config(page_title="Lettuce Detective", page_icon="🥬", layout="wide")
114
+
115
+ st.image(
116
+ "https://github.com/KRLabsOrg/LettuceDetect/blob/main/assets/lettuce_detective.png?raw=true",
117
+ width=600,
118
+ )
119
+
120
+ st.title("LettuceDetect Multilingual Demo 🌍")
121
+ st.markdown("### Detect hallucinations in 7 languages")
122
+
123
+ # Create a sidebar for language selection and model options
124
+ with st.sidebar:
125
+ st.header("Settings")
126
+ selected_language = st.selectbox(
127
+ "Select Language",
128
+ list(LANGUAGE_EXAMPLES.keys())
129
+ )
130
+
131
+ example = LANGUAGE_EXAMPLES[selected_language]
132
+
133
+ # Only show model size option for transformer-based models
134
+ model_method = example.get("method", "transformer")
135
+
136
+ if model_method == "transformer":
137
+ model_size = st.radio(
138
+ "Model Size",
139
+ ["Base (210M)", "Large (610M)"],
140
+ index=0,
141
+ help="Base models are faster, large models are more accurate."
142
+ )
143
+
144
+ # API key not needed for transformer models
145
+ openai_api_key = None
146
+ else:
147
+ # For LLM-based method
148
+ st.info("LLM-based detection requires an OpenAI API key")
149
+ openai_api_key = st.text_input("OpenAI API Key", type="password")
150
+
151
+ st.markdown("---")
152
+ st.markdown("### About")
153
+ st.markdown(
154
+ "LettuceDetect identifies hallucinations by comparing answers to provided context. "
155
+ "Highlighted text indicates content not supported by the source material."
156
+ )
157
+ st.markdown("[GitHub](https://github.com/KRLabsOrg/LettuceDetect) | [HuggingFace](https://huggingface.co/collections/KRLabsOrg/multilingual-hallucination-detection-682a2549c18ecd32689231ce)")
158
+
159
+ # Get the example data for the selected language
160
+ example = LANGUAGE_EXAMPLES[selected_language]
161
+
162
+ # Adjust model path based on selected size if needed
163
+ if model_method == "transformer":
164
+ model_path = example["model_path"]
165
+ if "base" not in model_path.lower() and "large" not in model_path.lower():
166
+ # Only adjust if it's a numerical size model that can be switched
167
+ if "210m" in model_path.lower() and "Large" in model_size:
168
+ model_path = model_path.replace("210m", "610m")
169
+ elif "610m" in model_path.lower() and "Base" in model_size:
170
+ model_path = model_path.replace("610m", "210m")
171
+ else:
172
+ # For LLM-based method, no model path needed
173
+ model_path = None
174
+
175
+ @st.cache_resource
176
+ def load_detector(method, model_path=None, lang=None, api_key=None):
177
+ try:
178
+ import os
179
+ if api_key:
180
+ os.environ["OPENAI_API_KEY"] = api_key
181
+
182
+ if method == "transformer":
183
+ return HallucinationDetector(
184
+ method=method,
185
+ model_path=model_path,
186
+ lang=lang,
187
+ trust_remote_code=True
188
+ )
189
+ else:
190
+ # LLM-based method
191
+ return HallucinationDetector(method=method)
192
+ except Exception as e:
193
+ st.error(f"Error loading model: {e}")
194
+ return None
195
+
196
+ # Load detector for the selected language
197
+ with st.spinner(f"Loading {selected_language} model..."):
198
+ detector = load_detector(
199
+ method=model_method,
200
+ model_path=model_path,
201
+ lang=example["lang"],
202
+ api_key=openai_api_key
203
+ )
204
+
205
+ # Create a two-column layout
206
+ col1, col2 = st.columns(2)
207
+
208
+ with col1:
209
+ st.subheader("Input")
210
+ context = st.text_area(
211
+ "Context",
212
+ example["context"],
213
+ height=150
214
+ )
215
+
216
+ question = st.text_area(
217
+ "Question",
218
+ example["question"],
219
+ height=80
220
+ )
221
+
222
+ answer = st.text_area(
223
+ "Answer",
224
+ example["answer"],
225
+ height=100
226
+ )
227
+
228
+ with col2:
229
+ st.subheader("Results")
230
+ if detector:
231
+ if st.button("Detect Hallucinations", type="primary"):
232
+ with st.spinner("Analyzing..."):
233
+ predictions = detector.predict(
234
+ context=[context], question=question, answer=answer, output_format="spans"
235
+ )
236
+
237
+ if predictions:
238
+ st.success(f"Found {len(predictions)} hallucination(s)")
239
+ st.markdown(f"**{example['output_label']}:**")
240
+ html_content = create_interactive_text(answer, predictions)
241
+ components.html(html_content, height=200)
242
+
243
+ # Display raw predictions in a collapsible section
244
+ with st.expander("Raw prediction data"):
245
+ st.json(predictions)
246
+ else:
247
+ st.info("No hallucinations detected")
248
+ else:
249
+ st.error("Model not loaded. Please check your internet connection or try a different language.")
250
+
251
+ # Show information about current model
252
+ st.markdown("---")
253
+ if model_method == "transformer":
254
+ st.markdown(f"**Current Model:** {model_path}")
255
+ else:
256
+ st.markdown("**Method:** LLM-based hallucination detection")
257
+ st.markdown(f"**Language:** {example['lang']}")
258
+
259
+
260
+ if __name__ == "__main__":
261
+ main()