Update app.py
Browse files
app.py
CHANGED
@@ -61,7 +61,6 @@ def clean_response_text(response_text):
|
|
61 |
def generate_ideas(user_input):
|
62 |
"""
|
63 |
Generate a diverse set of ideas based on the user's input concept using the LLM.
|
64 |
-
Validate the relevance of each idea using a cheaper LLM (gemini-2.0-flash-lite).
|
65 |
|
66 |
Args:
|
67 |
user_input (str): The user's input concept or idea (e.g., "blindfolded Rubik's Cube challenge").
|
@@ -69,7 +68,7 @@ def generate_ideas(user_input):
|
|
69 |
Returns:
|
70 |
list: A list of ideas as strings.
|
71 |
"""
|
72 |
-
#
|
73 |
prompt = f"""
|
74 |
The user has provided the concept: "{user_input}". You must generate 5 diverse and creative ideas for a TikTok video that are directly and explicitly related to "{user_input}".
|
75 |
Each idea must clearly incorporate and focus on the core theme of "{user_input}" without deviating into unrelated topics.
|
@@ -87,7 +86,7 @@ def generate_ideas(user_input):
|
|
87 |
"""
|
88 |
try:
|
89 |
response = client.models.generate_content(
|
90 |
-
model='gemini-2.0-flash',
|
91 |
contents=[prompt],
|
92 |
config=types.GenerateContentConfig(
|
93 |
temperature=1.2,
|
@@ -103,54 +102,6 @@ def generate_ideas(user_input):
|
|
103 |
raise ValueError("Invalid JSON format: 'ideas' key missing, not a list, or incorrect length")
|
104 |
|
105 |
ideas = response_json['ideas']
|
106 |
-
|
107 |
-
# Step 2: Validate relevance of each idea using gemini-2.0-flash-lite
|
108 |
-
for idea in ideas:
|
109 |
-
validation_prompt = f"""
|
110 |
-
Determine if the following idea for a TikTok video is related to the user's concept.
|
111 |
-
User's concept: "{user_input}"
|
112 |
-
Idea: "{idea}"
|
113 |
-
Respond with a JSON object containing a single key 'is_related' with a boolean value (true or false).
|
114 |
-
Example:
|
115 |
-
{{"is_related": true}}
|
116 |
-
"""
|
117 |
-
try:
|
118 |
-
validation_response = client.models.generate_content(
|
119 |
-
model='gemini-2.0-flash-lite',
|
120 |
-
contents=[validation_prompt],
|
121 |
-
config=types.GenerateContentConfig(
|
122 |
-
temperature=0.0, # Low temperature for deterministic output
|
123 |
-
safety_settings=SAFETY_SETTINGS
|
124 |
-
)
|
125 |
-
)
|
126 |
-
print(f"Validation response for idea '{idea}': {validation_response.text}") # Debugging
|
127 |
-
if not validation_response.text or validation_response.text.isspace():
|
128 |
-
raise ValueError("Empty validation response from API")
|
129 |
-
cleaned_validation_text = clean_response_text(validation_response.text)
|
130 |
-
validation_json = json.loads(cleaned_validation_text)
|
131 |
-
if 'is_related' not in validation_json or not isinstance(validation_json['is_related'], bool):
|
132 |
-
raise ValueError("Invalid validation JSON format: 'is_related' key missing or not a boolean")
|
133 |
-
|
134 |
-
if not validation_json['is_related']:
|
135 |
-
print(f"Idea '{idea}' is not related to '{user_input}'. Falling back to default ideas.")
|
136 |
-
return [
|
137 |
-
f"A dramatic {user_input} scene with cinematic lighting",
|
138 |
-
f"A close-up of {user_input} in a futuristic setting",
|
139 |
-
f"A high-energy {user_input} moment with vibrant colors",
|
140 |
-
f"A serene {user_input} scene with soft focus",
|
141 |
-
f"An action-packed {user_input} challenge with dynamic angles"
|
142 |
-
]
|
143 |
-
except Exception as e:
|
144 |
-
print(f"Error validating idea '{idea}': {e}. Falling back to default ideas.")
|
145 |
-
return [
|
146 |
-
f"A dramatic {user_input} scene with cinematic lighting",
|
147 |
-
f"A close-up of {user_input} in a futuristic setting",
|
148 |
-
f"A high-energy {user_input} moment with vibrant colors",
|
149 |
-
f"A serene {user_input} scene with soft focus",
|
150 |
-
f"An action-packed {user_input} challenge with dynamic angles"
|
151 |
-
]
|
152 |
-
|
153 |
-
# All ideas are related, return them
|
154 |
return ideas
|
155 |
|
156 |
except Exception as e:
|
@@ -202,7 +153,7 @@ def generate_item(user_input, ideas, generate_video=False, max_retries=3):
|
|
202 |
"""
|
203 |
try:
|
204 |
response = client.models.generate_content(
|
205 |
-
model='gemini-2.0-flash',
|
206 |
contents=[prompt],
|
207 |
config=types.GenerateContentConfig(
|
208 |
temperature=1.2,
|
@@ -282,108 +233,94 @@ def generate_item(user_input, ideas, generate_video=False, max_retries=3):
|
|
282 |
# Otherwise, continue to next cycle
|
283 |
break # Exit inner loop to retry with new idea
|
284 |
|
285 |
-
# Step 2: Generate video if enabled (with
|
286 |
if generate_video and generated_image is not None:
|
287 |
-
max_video_retries_per_image = 2 # Try video generation twice
|
288 |
video_generated = False
|
289 |
|
290 |
-
# First, try image-to-video generation
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
""
|
307 |
-
|
308 |
-
print(f"Attempting image-to-video generation (video attempt {video_attempt + 1}, total attempt {total_attempts}): {video_prompt}")
|
309 |
-
operation = client.models.generate_videos(
|
310 |
-
model="veo-2.0-generate-001",
|
311 |
-
prompt=video_prompt,
|
312 |
-
image=generated_image.image,
|
313 |
-
config=types.GenerateVideosConfig(
|
314 |
-
aspect_ratio="9:16",
|
315 |
-
number_of_videos=1,
|
316 |
-
duration_seconds=8,
|
317 |
-
negative_prompt="blurry, low quality, text, letters"
|
318 |
-
)
|
319 |
)
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
print("No generated_videos attribute in response")
|
342 |
-
|
343 |
-
# Enhanced error handling for video generation response
|
344 |
-
if operation.error:
|
345 |
-
raise ValueError(f"Video generation operation failed with error: {operation.error.message}")
|
346 |
-
if operation.response is None:
|
347 |
-
raise ValueError("Video generation operation failed: No response")
|
348 |
-
if not hasattr(operation.response, 'generated_videos') or operation.response.generated_videos is None:
|
349 |
-
raise ValueError("Video generation operation failed: No generated_videos in response")
|
350 |
-
|
351 |
-
# Process the single generated video
|
352 |
-
if len(operation.response.generated_videos) > 0:
|
353 |
-
video = operation.response.generated_videos[0]
|
354 |
-
if video is None or not hasattr(video, 'video'):
|
355 |
-
raise ValueError("Video is invalid or missing video data")
|
356 |
-
fname = 'with_image_input.mp4'
|
357 |
-
print(f"Generated video: {fname}")
|
358 |
-
# Download the video and get the raw bytes
|
359 |
-
video_data = client.files.download(file=video.video)
|
360 |
-
# Ensure video_data is in bytes
|
361 |
-
if isinstance(video_data, bytes):
|
362 |
-
video_bytes = video_data
|
363 |
-
else:
|
364 |
-
# If video_data is a file-like object, read the bytes
|
365 |
-
video_buffer = BytesIO()
|
366 |
-
for chunk in video_data:
|
367 |
-
video_buffer.write(chunk)
|
368 |
-
video_bytes = video_buffer.getvalue()
|
369 |
-
# Encode the video bytes as base64
|
370 |
-
video_base64 = base64.b64encode(video_bytes).decode()
|
371 |
-
video_generated = True
|
372 |
-
# Successfully generated video, return the result
|
373 |
-
return {
|
374 |
-
'text': text,
|
375 |
-
'image_base64': img_str,
|
376 |
-
'video_base64': video_base64,
|
377 |
-
'ideas': ideas
|
378 |
-
}
|
379 |
else:
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
|
388 |
# If image-to-video generation failed, try text-to-video generation
|
389 |
if not video_generated:
|
@@ -487,7 +424,7 @@ def generate_item(user_input, ideas, generate_video=False, max_retries=3):
|
|
487 |
'video_base64': video_base64,
|
488 |
'ideas': ideas
|
489 |
}
|
490 |
-
#
|
491 |
print(f"Text-to-video generation failed after {max_video_retries_per_image} attempts. Selecting a new idea and generating a new image.")
|
492 |
break
|
493 |
continue # Retry text-to-video generation with a modified prompt
|
|
|
61 |
def generate_ideas(user_input):
|
62 |
"""
|
63 |
Generate a diverse set of ideas based on the user's input concept using the LLM.
|
|
|
64 |
|
65 |
Args:
|
66 |
user_input (str): The user's input concept or idea (e.g., "blindfolded Rubik's Cube challenge").
|
|
|
68 |
Returns:
|
69 |
list: A list of ideas as strings.
|
70 |
"""
|
71 |
+
# Generate ideas using gemini-2.0-flash-lite
|
72 |
prompt = f"""
|
73 |
The user has provided the concept: "{user_input}". You must generate 5 diverse and creative ideas for a TikTok video that are directly and explicitly related to "{user_input}".
|
74 |
Each idea must clearly incorporate and focus on the core theme of "{user_input}" without deviating into unrelated topics.
|
|
|
86 |
"""
|
87 |
try:
|
88 |
response = client.models.generate_content(
|
89 |
+
model='gemini-2.0-flash-lite',
|
90 |
contents=[prompt],
|
91 |
config=types.GenerateContentConfig(
|
92 |
temperature=1.2,
|
|
|
102 |
raise ValueError("Invalid JSON format: 'ideas' key missing, not a list, or incorrect length")
|
103 |
|
104 |
ideas = response_json['ideas']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
return ideas
|
106 |
|
107 |
except Exception as e:
|
|
|
153 |
"""
|
154 |
try:
|
155 |
response = client.models.generate_content(
|
156 |
+
model='gemini-2.0-flash-lite',
|
157 |
contents=[prompt],
|
158 |
config=types.GenerateContentConfig(
|
159 |
temperature=1.2,
|
|
|
233 |
# Otherwise, continue to next cycle
|
234 |
break # Exit inner loop to retry with new idea
|
235 |
|
236 |
+
# Step 2: Generate video if enabled (with fallback to text-to-video if image-to-video fails)
|
237 |
if generate_video and generated_image is not None:
|
238 |
+
max_video_retries_per_image = 2 # Try text-to-video generation twice if needed
|
239 |
video_generated = False
|
240 |
|
241 |
+
# First, try image-to-video generation (only once)
|
242 |
+
try:
|
243 |
+
video_prompt = f"""
|
244 |
+
The user concept is "{user_input}". Based on this and the scene: {image_prompt}, create a video.
|
245 |
+
Use a close-up shot with a slow dolly shot circling around the subject,
|
246 |
+
using shallow focus on the main subject to emphasize details, in a realistic style with cinematic lighting.
|
247 |
+
"""
|
248 |
+
print(f"Attempting image-to-video generation (total attempt {total_attempts}): {video_prompt}")
|
249 |
+
operation = client.models.generate_videos(
|
250 |
+
model="veo-2.0-generate-001",
|
251 |
+
prompt=video_prompt,
|
252 |
+
image=generated_image.image,
|
253 |
+
config=types.GenerateVideosConfig(
|
254 |
+
aspect_ratio="9:16",
|
255 |
+
number_of_videos=1,
|
256 |
+
duration_seconds=8,
|
257 |
+
negative_prompt="blurry, low quality, text, letters"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
)
|
259 |
+
)
|
260 |
+
# Wait for video to generate
|
261 |
+
while not operation.done:
|
262 |
+
time.sleep(20)
|
263 |
+
operation = client.operations.get(operation)
|
264 |
+
|
265 |
+
# Log detailed information about the operation
|
266 |
+
print(f"Video generation operation completed: {operation}")
|
267 |
+
print(f"Operation done: {operation.done}")
|
268 |
+
print(f"Operation error: {operation.error}")
|
269 |
+
if operation.error:
|
270 |
+
print(f"Operation error message: {operation.error.message}")
|
271 |
+
if hasattr(operation.error, 'code'):
|
272 |
+
print(f"Operation error code: {operation.error.code}")
|
273 |
+
if hasattr(operation.error, 'details'):
|
274 |
+
print(f"Operation error details: {operation.error.details}")
|
275 |
+
print(f"Operation response: {operation.response}")
|
276 |
+
if operation.response:
|
277 |
+
print(f"Operation response has generated_videos: {hasattr(operation.response, 'generated_videos')}")
|
278 |
+
if hasattr(operation.response, 'generated_videos'):
|
279 |
+
print(f"Generated videos: {operation.response.generated_videos}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
else:
|
281 |
+
print("No generated_videos attribute in response")
|
282 |
+
|
283 |
+
# Enhanced error handling for video generation response
|
284 |
+
if operation.error:
|
285 |
+
raise ValueError(f"Video generation operation failed with error: {operation.error.message}")
|
286 |
+
if operation.response is None:
|
287 |
+
raise ValueError("Video generation operation failed: No response")
|
288 |
+
if not hasattr(operation.response, 'generated_videos') or operation.response.generated_videos is None:
|
289 |
+
raise ValueError("Video generation operation failed: No generated_videos in response")
|
290 |
+
|
291 |
+
# Process the single generated video
|
292 |
+
if len(operation.response.generated_videos) > 0:
|
293 |
+
video = operation.response.generated_videos[0]
|
294 |
+
if video is None or not hasattr(video, 'video'):
|
295 |
+
raise ValueError("Video is invalid or missing video data")
|
296 |
+
fname = 'with_image_input.mp4'
|
297 |
+
print(f"Generated video: {fname}")
|
298 |
+
# Download the video and get the raw bytes
|
299 |
+
video_data = client.files.download(file=video.video)
|
300 |
+
# Ensure video_data is in bytes
|
301 |
+
if isinstance(video_data, bytes):
|
302 |
+
video_bytes = video_data
|
303 |
+
else:
|
304 |
+
# If video_data is a file-like object, read the bytes
|
305 |
+
video_buffer = BytesIO()
|
306 |
+
for chunk in video_data:
|
307 |
+
video_buffer.write(chunk)
|
308 |
+
video_bytes = video_buffer.getvalue()
|
309 |
+
# Encode the video bytes as base64
|
310 |
+
video_base64 = base64.b64encode(video_bytes).decode()
|
311 |
+
video_generated = True
|
312 |
+
# Successfully generated video, return the result
|
313 |
+
return {
|
314 |
+
'text': text,
|
315 |
+
'image_base64': img_str,
|
316 |
+
'video_base64': video_base64,
|
317 |
+
'ideas': ideas
|
318 |
+
}
|
319 |
+
else:
|
320 |
+
raise ValueError("No video was generated")
|
321 |
+
except Exception as e:
|
322 |
+
print(f"Error generating video (image-to-video, total attempt {total_attempts}): {e}")
|
323 |
+
print("Image-to-video generation failed. Falling back to text-to-video generation.")
|
324 |
|
325 |
# If image-to-video generation failed, try text-to-video generation
|
326 |
if not video_generated:
|
|
|
424 |
'video_base64': video_base64,
|
425 |
'ideas': ideas
|
426 |
}
|
427 |
+
# Text-to-video failed, break to outer loop to try a new image
|
428 |
print(f"Text-to-video generation failed after {max_video_retries_per_image} attempts. Selecting a new idea and generating a new image.")
|
429 |
break
|
430 |
continue # Retry text-to-video generation with a modified prompt
|