codelion commited on
Commit
3ed94dc
·
verified ·
1 Parent(s): ec68305

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -150
app.py CHANGED
@@ -61,7 +61,6 @@ def clean_response_text(response_text):
61
  def generate_ideas(user_input):
62
  """
63
  Generate a diverse set of ideas based on the user's input concept using the LLM.
64
- Validate the relevance of each idea using a cheaper LLM (gemini-2.0-flash-lite).
65
 
66
  Args:
67
  user_input (str): The user's input concept or idea (e.g., "blindfolded Rubik's Cube challenge").
@@ -69,7 +68,7 @@ def generate_ideas(user_input):
69
  Returns:
70
  list: A list of ideas as strings.
71
  """
72
- # Step 1: Generate ideas using gemini-2.0-flash
73
  prompt = f"""
74
  The user has provided the concept: "{user_input}". You must generate 5 diverse and creative ideas for a TikTok video that are directly and explicitly related to "{user_input}".
75
  Each idea must clearly incorporate and focus on the core theme of "{user_input}" without deviating into unrelated topics.
@@ -87,7 +86,7 @@ def generate_ideas(user_input):
87
  """
88
  try:
89
  response = client.models.generate_content(
90
- model='gemini-2.0-flash',
91
  contents=[prompt],
92
  config=types.GenerateContentConfig(
93
  temperature=1.2,
@@ -103,54 +102,6 @@ def generate_ideas(user_input):
103
  raise ValueError("Invalid JSON format: 'ideas' key missing, not a list, or incorrect length")
104
 
105
  ideas = response_json['ideas']
106
-
107
- # Step 2: Validate relevance of each idea using gemini-2.0-flash-lite
108
- for idea in ideas:
109
- validation_prompt = f"""
110
- Determine if the following idea for a TikTok video is related to the user's concept.
111
- User's concept: "{user_input}"
112
- Idea: "{idea}"
113
- Respond with a JSON object containing a single key 'is_related' with a boolean value (true or false).
114
- Example:
115
- {{"is_related": true}}
116
- """
117
- try:
118
- validation_response = client.models.generate_content(
119
- model='gemini-2.0-flash-lite',
120
- contents=[validation_prompt],
121
- config=types.GenerateContentConfig(
122
- temperature=0.0, # Low temperature for deterministic output
123
- safety_settings=SAFETY_SETTINGS
124
- )
125
- )
126
- print(f"Validation response for idea '{idea}': {validation_response.text}") # Debugging
127
- if not validation_response.text or validation_response.text.isspace():
128
- raise ValueError("Empty validation response from API")
129
- cleaned_validation_text = clean_response_text(validation_response.text)
130
- validation_json = json.loads(cleaned_validation_text)
131
- if 'is_related' not in validation_json or not isinstance(validation_json['is_related'], bool):
132
- raise ValueError("Invalid validation JSON format: 'is_related' key missing or not a boolean")
133
-
134
- if not validation_json['is_related']:
135
- print(f"Idea '{idea}' is not related to '{user_input}'. Falling back to default ideas.")
136
- return [
137
- f"A dramatic {user_input} scene with cinematic lighting",
138
- f"A close-up of {user_input} in a futuristic setting",
139
- f"A high-energy {user_input} moment with vibrant colors",
140
- f"A serene {user_input} scene with soft focus",
141
- f"An action-packed {user_input} challenge with dynamic angles"
142
- ]
143
- except Exception as e:
144
- print(f"Error validating idea '{idea}': {e}. Falling back to default ideas.")
145
- return [
146
- f"A dramatic {user_input} scene with cinematic lighting",
147
- f"A close-up of {user_input} in a futuristic setting",
148
- f"A high-energy {user_input} moment with vibrant colors",
149
- f"A serene {user_input} scene with soft focus",
150
- f"An action-packed {user_input} challenge with dynamic angles"
151
- ]
152
-
153
- # All ideas are related, return them
154
  return ideas
155
 
156
  except Exception as e:
@@ -202,7 +153,7 @@ def generate_item(user_input, ideas, generate_video=False, max_retries=3):
202
  """
203
  try:
204
  response = client.models.generate_content(
205
- model='gemini-2.0-flash',
206
  contents=[prompt],
207
  config=types.GenerateContentConfig(
208
  temperature=1.2,
@@ -282,108 +233,94 @@ def generate_item(user_input, ideas, generate_video=False, max_retries=3):
282
  # Otherwise, continue to next cycle
283
  break # Exit inner loop to retry with new idea
284
 
285
- # Step 2: Generate video if enabled (with retries using the same image)
286
  if generate_video and generated_image is not None:
287
- max_video_retries_per_image = 2 # Try video generation twice per image
288
  video_generated = False
289
 
290
- # First, try image-to-video generation
291
- for video_attempt in range(max_video_retries_per_image):
292
- try:
293
- # Base video prompt
294
- video_prompt_base = f"""
295
- The user concept is "{user_input}". Based on this and the scene: {image_prompt}, create a video.
296
- Use a close-up shot with a slow dolly shot circling around the subject,
297
- using shallow focus on the main subject to emphasize details, in a realistic style with cinematic lighting.
298
- """
299
- # Modify the prompt slightly for each retry
300
- if video_attempt == 0:
301
- video_prompt = video_prompt_base
302
- else:
303
- video_prompt = f"""
304
- The user concept is "{user_input}". Based on this and a simplified scene: {image_prompt}, create a video.
305
- Use a static close-up shot of the subject in a realistic style.
306
- """
307
-
308
- print(f"Attempting image-to-video generation (video attempt {video_attempt + 1}, total attempt {total_attempts}): {video_prompt}")
309
- operation = client.models.generate_videos(
310
- model="veo-2.0-generate-001",
311
- prompt=video_prompt,
312
- image=generated_image.image,
313
- config=types.GenerateVideosConfig(
314
- aspect_ratio="9:16",
315
- number_of_videos=1,
316
- duration_seconds=8,
317
- negative_prompt="blurry, low quality, text, letters"
318
- )
319
  )
320
- # Wait for video to generate
321
- while not operation.done:
322
- time.sleep(20)
323
- operation = client.operations.get(operation)
324
-
325
- # Log detailed information about the operation
326
- print(f"Video generation operation completed: {operation}")
327
- print(f"Operation done: {operation.done}")
328
- print(f"Operation error: {operation.error}")
329
- if operation.error:
330
- print(f"Operation error message: {operation.error.message}")
331
- if hasattr(operation.error, 'code'):
332
- print(f"Operation error code: {operation.error.code}")
333
- if hasattr(operation.error, 'details'):
334
- print(f"Operation error details: {operation.error.details}")
335
- print(f"Operation response: {operation.response}")
336
- if operation.response:
337
- print(f"Operation response has generated_videos: {hasattr(operation.response, 'generated_videos')}")
338
- if hasattr(operation.response, 'generated_videos'):
339
- print(f"Generated videos: {operation.response.generated_videos}")
340
- else:
341
- print("No generated_videos attribute in response")
342
-
343
- # Enhanced error handling for video generation response
344
- if operation.error:
345
- raise ValueError(f"Video generation operation failed with error: {operation.error.message}")
346
- if operation.response is None:
347
- raise ValueError("Video generation operation failed: No response")
348
- if not hasattr(operation.response, 'generated_videos') or operation.response.generated_videos is None:
349
- raise ValueError("Video generation operation failed: No generated_videos in response")
350
-
351
- # Process the single generated video
352
- if len(operation.response.generated_videos) > 0:
353
- video = operation.response.generated_videos[0]
354
- if video is None or not hasattr(video, 'video'):
355
- raise ValueError("Video is invalid or missing video data")
356
- fname = 'with_image_input.mp4'
357
- print(f"Generated video: {fname}")
358
- # Download the video and get the raw bytes
359
- video_data = client.files.download(file=video.video)
360
- # Ensure video_data is in bytes
361
- if isinstance(video_data, bytes):
362
- video_bytes = video_data
363
- else:
364
- # If video_data is a file-like object, read the bytes
365
- video_buffer = BytesIO()
366
- for chunk in video_data:
367
- video_buffer.write(chunk)
368
- video_bytes = video_buffer.getvalue()
369
- # Encode the video bytes as base64
370
- video_base64 = base64.b64encode(video_bytes).decode()
371
- video_generated = True
372
- # Successfully generated video, return the result
373
- return {
374
- 'text': text,
375
- 'image_base64': img_str,
376
- 'video_base64': video_base64,
377
- 'ideas': ideas
378
- }
379
  else:
380
- raise ValueError("No video was generated")
381
- except Exception as e:
382
- print(f"Error generating video (image-to-video attempt {video_attempt + 1}, total attempt {total_attempts}): {e}")
383
- if video_attempt == max_video_retries_per_image - 1:
384
- print("Image-to-video generation failed after all attempts. Falling back to text-to-video generation.")
385
- break
386
- continue # Retry image-to-video generation with a modified prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
  # If image-to-video generation failed, try text-to-video generation
389
  if not video_generated:
@@ -487,7 +424,7 @@ def generate_item(user_input, ideas, generate_video=False, max_retries=3):
487
  'video_base64': video_base64,
488
  'ideas': ideas
489
  }
490
- # Both image-to-video and text-to-video failed, break to outer loop to try a new image
491
  print(f"Text-to-video generation failed after {max_video_retries_per_image} attempts. Selecting a new idea and generating a new image.")
492
  break
493
  continue # Retry text-to-video generation with a modified prompt
 
61
  def generate_ideas(user_input):
62
  """
63
  Generate a diverse set of ideas based on the user's input concept using the LLM.
 
64
 
65
  Args:
66
  user_input (str): The user's input concept or idea (e.g., "blindfolded Rubik's Cube challenge").
 
68
  Returns:
69
  list: A list of ideas as strings.
70
  """
71
+ # Generate ideas using gemini-2.0-flash-lite
72
  prompt = f"""
73
  The user has provided the concept: "{user_input}". You must generate 5 diverse and creative ideas for a TikTok video that are directly and explicitly related to "{user_input}".
74
  Each idea must clearly incorporate and focus on the core theme of "{user_input}" without deviating into unrelated topics.
 
86
  """
87
  try:
88
  response = client.models.generate_content(
89
+ model='gemini-2.0-flash-lite',
90
  contents=[prompt],
91
  config=types.GenerateContentConfig(
92
  temperature=1.2,
 
102
  raise ValueError("Invalid JSON format: 'ideas' key missing, not a list, or incorrect length")
103
 
104
  ideas = response_json['ideas']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  return ideas
106
 
107
  except Exception as e:
 
153
  """
154
  try:
155
  response = client.models.generate_content(
156
+ model='gemini-2.0-flash-lite',
157
  contents=[prompt],
158
  config=types.GenerateContentConfig(
159
  temperature=1.2,
 
233
  # Otherwise, continue to next cycle
234
  break # Exit inner loop to retry with new idea
235
 
236
+ # Step 2: Generate video if enabled (with fallback to text-to-video if image-to-video fails)
237
  if generate_video and generated_image is not None:
238
+ max_video_retries_per_image = 2 # Try text-to-video generation twice if needed
239
  video_generated = False
240
 
241
+ # First, try image-to-video generation (only once)
242
+ try:
243
+ video_prompt = f"""
244
+ The user concept is "{user_input}". Based on this and the scene: {image_prompt}, create a video.
245
+ Use a close-up shot with a slow dolly shot circling around the subject,
246
+ using shallow focus on the main subject to emphasize details, in a realistic style with cinematic lighting.
247
+ """
248
+ print(f"Attempting image-to-video generation (total attempt {total_attempts}): {video_prompt}")
249
+ operation = client.models.generate_videos(
250
+ model="veo-2.0-generate-001",
251
+ prompt=video_prompt,
252
+ image=generated_image.image,
253
+ config=types.GenerateVideosConfig(
254
+ aspect_ratio="9:16",
255
+ number_of_videos=1,
256
+ duration_seconds=8,
257
+ negative_prompt="blurry, low quality, text, letters"
 
 
 
 
 
 
 
 
 
 
 
 
258
  )
259
+ )
260
+ # Wait for video to generate
261
+ while not operation.done:
262
+ time.sleep(20)
263
+ operation = client.operations.get(operation)
264
+
265
+ # Log detailed information about the operation
266
+ print(f"Video generation operation completed: {operation}")
267
+ print(f"Operation done: {operation.done}")
268
+ print(f"Operation error: {operation.error}")
269
+ if operation.error:
270
+ print(f"Operation error message: {operation.error.message}")
271
+ if hasattr(operation.error, 'code'):
272
+ print(f"Operation error code: {operation.error.code}")
273
+ if hasattr(operation.error, 'details'):
274
+ print(f"Operation error details: {operation.error.details}")
275
+ print(f"Operation response: {operation.response}")
276
+ if operation.response:
277
+ print(f"Operation response has generated_videos: {hasattr(operation.response, 'generated_videos')}")
278
+ if hasattr(operation.response, 'generated_videos'):
279
+ print(f"Generated videos: {operation.response.generated_videos}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  else:
281
+ print("No generated_videos attribute in response")
282
+
283
+ # Enhanced error handling for video generation response
284
+ if operation.error:
285
+ raise ValueError(f"Video generation operation failed with error: {operation.error.message}")
286
+ if operation.response is None:
287
+ raise ValueError("Video generation operation failed: No response")
288
+ if not hasattr(operation.response, 'generated_videos') or operation.response.generated_videos is None:
289
+ raise ValueError("Video generation operation failed: No generated_videos in response")
290
+
291
+ # Process the single generated video
292
+ if len(operation.response.generated_videos) > 0:
293
+ video = operation.response.generated_videos[0]
294
+ if video is None or not hasattr(video, 'video'):
295
+ raise ValueError("Video is invalid or missing video data")
296
+ fname = 'with_image_input.mp4'
297
+ print(f"Generated video: {fname}")
298
+ # Download the video and get the raw bytes
299
+ video_data = client.files.download(file=video.video)
300
+ # Ensure video_data is in bytes
301
+ if isinstance(video_data, bytes):
302
+ video_bytes = video_data
303
+ else:
304
+ # If video_data is a file-like object, read the bytes
305
+ video_buffer = BytesIO()
306
+ for chunk in video_data:
307
+ video_buffer.write(chunk)
308
+ video_bytes = video_buffer.getvalue()
309
+ # Encode the video bytes as base64
310
+ video_base64 = base64.b64encode(video_bytes).decode()
311
+ video_generated = True
312
+ # Successfully generated video, return the result
313
+ return {
314
+ 'text': text,
315
+ 'image_base64': img_str,
316
+ 'video_base64': video_base64,
317
+ 'ideas': ideas
318
+ }
319
+ else:
320
+ raise ValueError("No video was generated")
321
+ except Exception as e:
322
+ print(f"Error generating video (image-to-video, total attempt {total_attempts}): {e}")
323
+ print("Image-to-video generation failed. Falling back to text-to-video generation.")
324
 
325
  # If image-to-video generation failed, try text-to-video generation
326
  if not video_generated:
 
424
  'video_base64': video_base64,
425
  'ideas': ideas
426
  }
427
+ # Text-to-video failed, break to outer loop to try a new image
428
  print(f"Text-to-video generation failed after {max_video_retries_per_image} attempts. Selecting a new idea and generating a new image.")
429
  break
430
  continue # Retry text-to-video generation with a modified prompt