robiro commited on
Commit
fa3a13f
·
verified ·
1 Parent(s): f740c8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -352
app.py CHANGED
@@ -1,363 +1,159 @@
1
  import gradio as gr
2
- from llama_cpp import Llama
3
- from huggingface_hub import hf_hub_download
4
- import os
5
- import time
6
-
7
- # --- Configuration ---
8
- MODEL_REPO_ID = "unsloth/DeepSeek-R1-0528-Qwen3-8B-GGUF"
9
- MODEL_FILENAME = "DeepSeek-R1-0528-Qwen3-8B-Q4_K_M.gguf" # IMPORTANT: Verify this filename
10
- LOCAL_MODEL_PATH = f"./{MODEL_FILENAME}"
11
-
12
- # LLM Llama Parameters
13
- N_CTX = 2048
14
- N_THREADS = None
15
- N_GPU_LAYERS = 0
16
- VERBOSE_LLAMA = True
17
-
18
- # Generation parameters
19
- DEFAULT_MAX_NEW_TOKENS = 512
20
- DEFAULT_TEMPERATURE = 0.7
21
- DEFAULT_TOP_P = 0.95
22
- DEFAULT_TOP_K = 40
23
- DEFAULT_REPEAT_PENALTY = 1.1
24
-
25
- # Qwen specific chat format elements (defined globally)
26
- IM_START_TOKEN = "<|im_start|>"
27
- IM_END_TOKEN = "<|im_end|>"
28
-
29
- # --- Global variable for the model ---
30
- llm = None
31
-
32
- # --- Model Download ---
33
- def download_model_if_needed():
34
- if not os.path.exists(LOCAL_MODEL_PATH):
35
- print(f"Downloading {MODEL_FILENAME} from {MODEL_REPO_ID}...")
36
- start_time = time.time()
37
- try:
38
- hf_hub_download(
39
- repo_id=MODEL_REPO_ID,
40
- filename=MODEL_FILENAME,
41
- local_dir=".",
42
- local_dir_use_symlinks=False,
43
- resume_download=True
44
- )
45
- end_time = time.time()
46
- print(f"Download complete in {end_time - start_time:.2f} seconds.")
47
- return True
48
- except Exception as e:
49
- print(f"Error downloading model: {e}")
50
- print(f"Attempted to download: {MODEL_REPO_ID}/{MODEL_FILENAME}")
51
- return False
52
- else:
53
- print(f"Model file {MODEL_FILENAME} already exists.")
54
- return True
55
- return False # Should not be reached if logic is correct, but good for completeness
56
-
57
- # --- Model Loading ---
58
- def load_llm_model():
59
- global llm
60
- if llm is None:
61
- if not os.path.exists(LOCAL_MODEL_PATH):
62
- print("Model file not found. Cannot load.")
63
- return False
64
- print("Loading Llama model...")
65
- start_time = time.time()
66
- try:
67
- llm = Llama(
68
- model_path=LOCAL_MODEL_PATH,
69
- n_ctx=N_CTX,
70
- n_threads=N_THREADS,
71
- n_gpu_layers=N_GPU_LAYERS,
72
- verbose=VERBOSE_LLAMA,
73
- )
74
- end_time = time.time()
75
- print(f"Model loaded successfully in {end_time - start_time:.2f} seconds.")
76
- return True
77
- except Exception as e:
78
- print(f"Error loading Llama model: {e}")
79
- print(f"If on resource-constrained environment, model ({MODEL_FILENAME}, ~{os.path.getsize(LOCAL_MODEL_PATH)/(1024**3):.2f}GB if exists) might be too large.")
80
- llm = None
81
- return False
82
- else:
83
- print("Model already loaded.")
84
- return True
85
-
86
- # --- Chat Function ---
87
- def predict(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k, repeat_penalty):
88
- if llm is None:
89
- return "Model not loaded. Please check the logs."
90
-
91
- # Common stop tokens for Qwen-like models
92
- # Accessing global IM_START_TOKEN and IM_END_TOKEN
93
- stop_tokens = [IM_END_TOKEN, IM_START_TOKEN + "user", IM_START_TOKEN + "system", llm.token_eos_str()] # Use string representation of EOS
94
-
95
- messages_for_api = [] # Renamed to avoid conflict with Gradio's 'messages' type
96
- if system_prompt and system_prompt.strip():
97
- messages_for_api.append({"role": "system", "content": system_prompt.strip()})
98
-
99
- # History for Gradio Chatbot with type="messages" is already in the correct format
100
- # history will be a list of lists, where each inner list is [user_msg, ai_msg]
101
- # or if type="messages", it's a list of dicts.
102
- # Let's assume for now the input `history` from chatbot (when type="tuples")
103
- # needs conversion if predict is called directly with such history.
104
- # If chatbot type="messages", history is already List[Dict[str, str]]
105
- # The `user_chat_fn` and `bot_response_fn` handle history in `messages` format for the chatbot.
106
- # So, when `predict` is called by `bot_response_fn`, `history` is actually `history_for_predict`
107
- # which is `chat_history[:-1]`. `chat_history` is a list of tuples.
108
- # We need to convert this tuple-style history to OpenAI dict style for create_chat_completion.
109
-
110
- # The history passed from `bot_response_fn` (history_for_predict) is list of [user, assistant] tuples
111
- for human_msg, ai_msg in history: # history here is history_for_predict from bot_response_fn
112
- messages_for_api.append({"role": "user", "content": human_msg})
113
- if ai_msg is not None:
114
- messages_for_api.append({"role": "assistant", "content": ai_msg})
115
- messages_for_api.append({"role": "user", "content": message})
116
-
117
-
118
- print("\n--- Input to Model ---")
119
- print(f"System Prompt: {system_prompt if system_prompt and system_prompt.strip() else 'None'}")
120
- print(f"History (tuples format for predict): {history}")
121
- print(f"Current Message: {message}")
122
- print(f"Formatted messages for create_chat_completion: {messages_for_api}")
123
- print("--- End Input to Model ---\n")
124
-
125
- assistant_response_text = ""
126
- generation_start_time = time.time()
127
-
128
  try:
129
- print("Attempting generation with llm.create_chat_completion()...")
130
- response = llm.create_chat_completion(
131
- messages=messages_for_api,
132
- temperature=temperature,
133
- top_p=top_p,
134
- top_k=top_k,
135
- repeat_penalty=repeat_penalty,
136
- max_tokens=max_new_tokens,
137
- stop=stop_tokens,
138
- )
139
- assistant_response_text = response['choices'][0]['message']['content'].strip()
140
- print(f"create_chat_completion successful. Raw response: {response['choices'][0]['message']}")
141
-
142
- except Exception as e_chat_completion:
143
- print(f"Error during create_chat_completion: {e_chat_completion}")
144
- print("Falling back to manual prompt construction and llm()...")
145
-
146
- prompt = ""
147
- if system_prompt and system_prompt.strip():
148
- prompt += f"{IM_START_TOKEN}system\n{system_prompt.strip()}{IM_END_TOKEN}\n"
149
-
150
- for human_msg, ai_msg in history: # history here is history_for_predict
151
- prompt += f"{IM_START_TOKEN}user\n{human_msg}{IM_END_TOKEN}\n"
152
- if ai_msg is not None:
153
- prompt += f"{IM_START_TOKEN}assistant\n{ai_msg}{IM_END_TOKEN}\n"
154
- prompt += f"{IM_START_TOKEN}user\n{message}{IM_END_TOKEN}\n{IM_START_TOKEN}assistant\n"
155
-
156
- print(f"Fallback prompt: {prompt}")
157
-
158
- try:
159
- output = llm(
160
  prompt,
161
- max_tokens=max_new_tokens,
162
- temperature=temperature,
163
- top_p=top_p,
164
- top_k=top_k,
165
- repeat_penalty=repeat_penalty,
166
- stop=stop_tokens,
167
- echo=False
168
  )
169
- assistant_response_text = output['choices'][0]['text'].strip()
170
- print(f"Fallback llm() successful. Raw output: {output['choices'][0]['text']}")
171
- except Exception as e_fallback:
172
- print(f"Error during fallback llm() generation: {e_fallback}")
173
- assistant_response_text = "Sorry, I encountered an error during generation. Please check the logs."
174
-
175
- generation_end_time = time.time()
176
- print(f"Generated response: {assistant_response_text}")
177
- print(f"Generation took {generation_end_time - generation_start_time:.2f} seconds.")
178
- return assistant_response_text
179
-
180
- # --- Gradio Interface ---
181
- def create_gradio_interface():
182
- with gr.Blocks(theme=gr.themes.Soft()) as iface:
183
- gr.Markdown(f"""
184
- # Chat with {MODEL_REPO_ID.split('/')[-1]} ({MODEL_FILENAME})
185
- This Space runs a GGUF quantized version of the model using `llama-cpp-python`.
186
- Model: [{MODEL_REPO_ID}](https://huggingface.co/{MODEL_REPO_ID})
187
- GGUF File: `{MODEL_FILENAME}` (Quantization: Q4_K_M)
188
- """)
189
-
190
- with gr.Row():
191
- with gr.Column(scale=3):
192
- chatbot = gr.Chatbot(
193
- [],
194
- elem_id="chatbot",
195
- label="Chat Window",
196
- # bubble_full_width=False, # Deprecated
197
- height=500,
198
- type="messages" # Use OpenAI-style messages format
199
- )
200
- user_input = gr.Textbox(
201
- show_label=False,
202
- placeholder="Type your message here and press Enter...",
203
- container=False,
204
- scale=7,
205
- )
206
-
207
- with gr.Column(scale=1):
208
- gr.Markdown("### Model Parameters")
209
- system_prompt_input = gr.Textbox(
210
- label="System Prompt (Optional)",
211
- placeholder="e.g., You are a helpful AI assistant.",
212
- lines=3
213
- )
214
- max_new_tokens_slider = gr.Slider(
215
- minimum=32, maximum=N_CTX, value=DEFAULT_MAX_NEW_TOKENS, step=32,
216
- label="Max New Tokens"
217
- )
218
- temperature_slider = gr.Slider(
219
- minimum=0.0, maximum=2.0, value=DEFAULT_TEMPERATURE, step=0.05,
220
- label="Temperature"
221
  )
222
- top_p_slider = gr.Slider(
223
- minimum=0.0, maximum=1.0, value=DEFAULT_TOP_P, step=0.05,
224
- label="Top-P (Nucleus Sampling)"
225
  )
226
- top_k_slider = gr.Slider(
227
- minimum=0, maximum=100, value=DEFAULT_TOP_K, step=1,
228
- label="Top-K Sampling"
229
  )
230
- repeat_penalty_slider = gr.Slider(
231
- minimum=1.0, maximum=2.0, value=DEFAULT_REPEAT_PENALTY, step=0.05,
232
- label="Repeat Penalty"
233
  )
234
- status_display = gr.Textbox(label="Status", interactive=False, visible=False)
235
-
236
-
237
- def user_chat_fn(user_message, chat_history_messages, sys_prompt, max_tok, temp, top_p_val, top_k_val, rep_pen):
238
- if not user_message.strip(): # Do nothing if user message is empty
239
- return "", chat_history_messages, sys_prompt, max_tok, temp, top_p_val, top_k_val, rep_pen
240
-
241
- if llm is None:
242
- chat_history_messages.append({"role": "user", "content": user_message})
243
- chat_history_messages.append({"role": "assistant", "content": "ERROR: Model not loaded. Check server logs."})
244
- return "", chat_history_messages, sys_prompt, max_tok, temp, top_p_val, top_k_val, rep_pen
245
-
246
- chat_history_messages.append({"role": "user", "content": user_message})
247
- # Add a placeholder for assistant message that bot_response_fn will fill
248
- chat_history_messages.append({"role": "assistant", "content": None})
249
- return "", chat_history_messages, sys_prompt, max_tok, temp, top_p_val, top_k_val, rep_pen
250
-
251
- def bot_response_fn(chat_history_messages, sys_prompt, max_tok, temp, top_p_val, top_k_val, rep_pen):
252
- if llm is None or chat_history_messages[-1]["content"] is not None : # If model not loaded or already processed
253
- return chat_history_messages
254
-
255
- user_message = chat_history_messages[-2]["content"] # Get the last user message
256
-
257
- # Convert OpenAI-style message history (List[Dict]) to tuple-style for predict's current internal logic
258
- history_for_predict_tuples = []
259
- # Iterate up to the second to last message (the current user's message)
260
- # Each pair of (user, assistant) forms one turn for the tuple history
261
- i = 0
262
- temp_history = chat_history_messages[:-2] # Exclude current user and assistant placeholder
263
-
264
- # Skip system prompt if present at the beginning for tuple conversion
265
- start_index = 0
266
- if temp_history and temp_history[0]["role"] == "system":
267
- start_index = 1 # System prompt handled separately in predict
268
-
269
- for i in range(start_index, len(temp_history), 2):
270
- if i + 1 < len(temp_history) and temp_history[i]["role"] == "user" and temp_history[i+1]["role"] == "assistant":
271
- history_for_predict_tuples.append(
272
- (temp_history[i]["content"], temp_history[i+1]["content"])
273
- )
274
- elif temp_history[i]["role"] == "user": # Handle case where last turn was only a user message (shouldn't happen if paired)
275
- history_for_predict_tuples.append((temp_history[i]["content"], None))
276
-
277
-
278
- bot_msg_content = predict(user_message, history_for_predict_tuples, sys_prompt, max_tok, temp, top_p_val, top_k_val, rep_pen)
279
- chat_history_messages[-1]["content"] = bot_msg_content # Update the assistant's placeholder message
280
- return chat_history_messages
281
-
282
- user_input.submit(
283
- user_chat_fn,
284
- [user_input, chatbot, system_prompt_input, max_new_tokens_slider, temperature_slider, top_p_slider, top_k_slider, repeat_penalty_slider],
285
- [user_input, chatbot, system_prompt_input, max_new_tokens_slider, temperature_slider, top_p_slider, top_k_slider, repeat_penalty_slider],
286
- queue=False
287
- ).then(
288
- bot_response_fn,
289
- [chatbot, system_prompt_input, max_new_tokens_slider, temperature_slider, top_p_slider, top_k_slider, repeat_penalty_slider],
290
- [chatbot],
291
- queue=True
292
- )
293
-
294
- gr.Examples(
295
- examples=[
296
- ["Hello, how are you today?", "You are a friendly and helpful AI assistant specializing in concise answers."],
297
- ["What is the capital of France?", "Be very brief."],
298
- ["Write a short poem about a robot learning to dream.", ""],
299
- ["Explain the concept of black holes to a 5-year-old.", "Keep it simple and use an analogy."]
300
- ],
301
- inputs=[user_input, system_prompt_input],
302
- )
303
-
304
- with gr.Accordion("Advanced/Debug Info", open=False):
305
- # Accessing global IM_START_TOKEN and IM_END_TOKEN
306
- gr.Markdown(f"""
307
- - **Model File:** `{LOCAL_MODEL_PATH}`
308
- - **N_CTX:** `{N_CTX}`
309
- - **N_THREADS:** `{N_THREADS if N_THREADS is not None else 'Auto'}`
310
- - **N_GPU_LAYERS:** `{N_GPU_LAYERS}`
311
- - **Log Verbosity (llama.cpp):** `{VERBOSE_LLAMA}`
312
- - **Stop Tokens Used (Conceptual):** `{IM_START_TOKEN}system`, `{IM_START_TOKEN}user`, `{IM_END_TOKEN}`, `EOS_TOKEN`
313
- """)
314
- reload_button = gr.Button("Attempt to Reload Model")
315
- reload_status = gr.Label(value="Model Status: Unknown")
316
-
317
- def update_reload_status():
318
- if llm:
319
- return "Model Status: Loaded Successfully"
320
- else:
321
- return "Model Status: Not Loaded (Check logs for errors)"
322
-
323
- def attempt_reload():
324
- global llm
325
- if llm is not None:
326
- try:
327
- # Attempt to free existing model if Llama.cpp supports it or by reassigning
328
- print("Freeing existing model instance (if any)...")
329
- del llm # Explicitly delete to trigger __del__ if possible
330
- llm = None
331
- import gc
332
- gc.collect() # Suggest garbage collection
333
- except Exception as e_del:
334
- print(f"Error during manual deletion of llm: {e_del}")
335
-
336
- if load_llm_model():
337
- return "Model reloaded successfully!"
338
- else:
339
- return "Model reload FAILED. Check server logs."
340
-
341
- reload_button.click(attempt_reload, outputs=[reload_status])
342
- iface.load(update_reload_status, outputs=[reload_status]) # Update status on interface load
343
- return iface
344
 
345
- # --- Main Execution ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  if __name__ == "__main__":
347
- print("Starting application...")
348
- model_available = download_model_if_needed()
349
-
350
- if model_available:
351
- if not load_llm_model():
352
- print("Initial model loading failed. Gradio will start; use UI to attempt reload.")
353
- else:
354
- print("Model ready.")
355
  else:
356
- print("Model download failed. Cannot load model. Gradio will start; chat will be non-functional.")
357
-
358
- print("Creating Gradio interface...")
359
- app_interface = create_gradio_interface()
360
-
361
- print("Launching Gradio interface...")
362
- app_interface.launch()
363
- print("Gradio interface launched.")
 
1
  import gradio as gr
2
+ import torch
3
+ from diffusers import StableDiffusionPipeline
4
+ from PIL import Image
5
+
6
+ # --- Globale Konfiguration und Modellladung ---
7
+ MODEL_ID = "runwayml/stable-diffusion-v1-5"
8
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
+ print(f"Verwende Gerät: {DEVICE}")
10
+
11
+ # Lade das Modell nur einmal beim Start der App
12
+ # Für GPU: torch_dtype=torch.float16 spart VRAM und ist schneller
13
+ # Für CPU: torch_dtype=torch.float32 (float16 wird auf CPU nicht gut unterstützt)
14
+ dtype = torch.float16 if DEVICE == "cuda" else torch.float32
15
+
16
+ print(f"Lade Modell '{MODEL_ID}'... Dies kann einige Minuten dauern.")
17
+ try:
18
+ pipe = StableDiffusionPipeline.from_pretrained(MODEL_ID, torch_dtype=dtype)
19
+ pipe = pipe.to(DEVICE)
20
+ print("Modell erfolgreich geladen!")
21
+ except Exception as e:
22
+ print(f"Fehler beim Laden des Modells: {e}")
23
+ print("Stelle sicher, dass du eine Internetverbindung hast und der Modellname korrekt ist.")
24
+ print("Wenn du wenig VRAM hast, versuche ein kleineres Modell oder Einstellungen zur Speicheroptimierung.")
25
+ pipe = None # Signalisiert, dass das Modell nicht geladen werden konnte
26
+
27
+ # --- Bildgenerierungsfunktion ---
28
+ def generate_image(
29
+ prompt: str,
30
+ negative_prompt: str = "",
31
+ num_inference_steps: int = 50,
32
+ guidance_scale: float = 7.5,
33
+ height: int = 512,
34
+ width: int = 512,
35
+ seed: int = -1 # -1 für zufälligen Seed
36
+ ) -> Image.Image:
37
+ """
38
+ Generiert ein Bild basierend auf dem Prompt und anderen Parametern.
39
+ """
40
+ if pipe is None:
41
+ raise gr.Error("Modell konnte nicht geladen werden. Bitte überprüfe die Konsolenausgabe.")
42
+
43
+ print(f"Generiere Bild für Prompt: '{prompt}'")
44
+ print(f" Negative Prompt: '{negative_prompt}'")
45
+ print(f" Schritte: {num_inference_steps}, Guidance: {guidance_scale}")
46
+ print(f" Dimensionen: {width}x{height}, Seed: {seed}")
47
+
48
+ # Seed Handling
49
+ generator = None
50
+ if seed != -1:
51
+ generator = torch.Generator(device=DEVICE).manual_seed(seed)
52
+
53
+ # Bild generieren
54
+ # safety_checker=None kann verwendet werden, um den NSFW-Filter zu deaktivieren,
55
+ # sei dir aber der Implikationen bewusst. Standardmäßig ist er aktiv.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  try:
57
+ with torch.inference_mode(): # Wichtig für geringeren Speicherverbrauch bei Inferenz
58
+ result = pipe(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  prompt,
60
+ negative_prompt=negative_prompt if negative_prompt else None,
61
+ num_inference_steps=int(num_inference_steps),
62
+ guidance_scale=guidance_scale,
63
+ height=int(height),
64
+ width=int(width),
65
+ generator=generator
 
66
  )
67
+ image = result.images[0]
68
+ print("Bild erfolgreich generiert.")
69
+ return image
70
+ except Exception as e:
71
+ print(f"Fehler bei der Bildgenerierung: {e}")
72
+ # Versuche, eine spezifischere Fehlermeldung für OOM-Fehler (Out Of Memory) zu geben
73
+ if "CUDA out of memory" in str(e):
74
+ raise gr.Error(
75
+ "CUDA out of memory. Versuche, die Bildgröße zu verringern, "
76
+ "weniger Inferenzschritte zu verwenden oder ein kleineres Modell zu laden."
77
+ )
78
+ raise gr.Error(f"Fehler bei der Bildgenerierung: {e}")
79
+
80
+
81
+ # --- Gradio Interface Definition ---
82
+ with gr.Blocks(theme=gr.themes.Soft()) as app:
83
+ gr.Markdown(
84
+ """
85
+ # 🖼️ Bildgenerator mit Stable Diffusion
86
+ Gib einen Text-Prompt ein, um ein Bild zu generieren.
87
+ Das Laden des Modells beim ersten Start kann einige Minuten dauern.
88
+ """
89
+ )
90
+
91
+ with gr.Row():
92
+ with gr.Column(scale=2):
93
+ prompt_input = gr.Textbox(
94
+ label="Prompt",
95
+ placeholder="z.B. Ein fotorealistisches Bild einer Katze, die einen Hut trägt",
96
+ lines=3
97
+ )
98
+ negative_prompt_input = gr.Textbox(
99
+ label="Negativer Prompt (was vermieden werden soll)",
100
+ placeholder="z.B. schlecht gezeichnet, unscharf, Text, Wasserzeichen",
101
+ lines=2
102
+ )
103
+ with gr.Row():
104
+ steps_slider = gr.Slider(
105
+ minimum=10, maximum=150, value=50, step=1, label="Inferenzschritte"
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  )
107
+ guidance_slider = gr.Slider(
108
+ minimum=1, maximum=20, value=7.5, step=0.1, label="Guidance Scale (CFG)"
 
109
  )
110
+ with gr.Row():
111
+ height_slider = gr.Slider(
112
+ minimum=256, maximum=1024, value=512, step=64, label="Höhe"
113
  )
114
+ width_slider = gr.Slider(
115
+ minimum=256, maximum=1024, value=512, step=64, label="Breite"
 
116
  )
117
+ seed_input = gr.Number(
118
+ label="Seed (-1 für zufällig)", value=-1, precision=0
119
+ )
120
+ generate_button = gr.Button("Bild generieren", variant="primary")
121
+
122
+ with gr.Column(scale=1):
123
+ image_output = gr.Image(label="Generiertes Bild", type="pil")
124
+ gr.Markdown("### Beispiel-Prompts:")
125
+ gr.Examples(
126
+ examples=[
127
+ ["Ein Astronaut reitet ein Pferd auf dem Mond, digitale Kunst", "", 50, 7.5, 512, 512, -1],
128
+ ["Ein impressionistisches Gemälde eines Sonnenuntergangs über einem Lavendelfeld", "Menschen, Gebäude", 40, 8.0, 512, 768, -1],
129
+ ["Ein niedlicher Corgi-Hund als Pixel-Art-Charakter", "fotorealistisch", 30, 7.0, 512, 512, 12345],
130
+ ["Eine surreale Landschaft mit schwebenden Inseln und Wasserfällen aus Licht", "dunkel, düster", 60, 9.0, 768, 512, -1],
131
+ ],
132
+ inputs=[prompt_input, negative_prompt_input, steps_slider, guidance_slider, height_slider, width_slider, seed_input],
133
+ outputs=image_output,
134
+ fn=generate_image, # Die Funktion, die bei Klick auf ein Beispiel ausgeführt wird
135
+ cache_examples=False # Oder True, wenn du die Ergebnisse cachen willst
136
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
+ generate_button.click(
139
+ fn=generate_image,
140
+ inputs=[
141
+ prompt_input,
142
+ negative_prompt_input,
143
+ steps_slider,
144
+ guidance_slider,
145
+ height_slider,
146
+ width_slider,
147
+ seed_input
148
+ ],
149
+ outputs=image_output,
150
+ api_name="generate_image" # Für API-Zugriff
151
+ )
152
+
153
+ # --- App starten ---
154
  if __name__ == "__main__":
155
+ if pipe is None:
156
+ print("Das Modell konnte nicht geladen werden. Die Gradio-App wird nicht gestartet.")
157
+ print("Bitte behebe die Fehler und versuche es erneut.")
 
 
 
 
 
158
  else:
159
+ app.launch(share=False) # Setze share=True, um einen öffentlichen Link zu erhalten (erfordert `gradio-client`)