akhaliq HF Staff commited on
Commit
9ed1404
·
1 Parent(s): e40e7dc

use temporary urls and HF upload for media

Browse files
Files changed (1) hide show
  1. app.py +328 -70
app.py CHANGED
@@ -27,8 +27,12 @@ from tavily import TavilyClient
27
  from huggingface_hub import HfApi
28
  import tempfile
29
  from openai import OpenAI
30
- from mistralai import Mistral
31
  import uuid
 
 
 
 
 
32
  import threading
33
 
34
  # Gradio supported languages for syntax highlighting
@@ -84,8 +88,11 @@ def validate_video_html(video_html: str) -> bool:
84
  if '<source' not in video_html:
85
  return False
86
 
87
- # Check for data URI format
88
- if 'data:video/mp4;base64,' not in video_html:
 
 
 
89
  return False
90
 
91
  # Basic HTML structure validation
@@ -1796,8 +1803,217 @@ def compress_audio_for_data_uri(audio_bytes: bytes, max_size_mb: int = 4) -> byt
1796
  print(f"[AudioCompress] Compression failed: {e}, using original audio")
1797
  return audio_bytes
1798
 
1799
- def generate_image_with_qwen(prompt: str, image_index: int = 0) -> str:
1800
- """Generate image using Qwen image model via Hugging Face InferenceClient with optimized data URL"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1801
  try:
1802
  # Check if HF_TOKEN is available
1803
  if not os.getenv('HF_TOKEN'):
@@ -1817,27 +2033,33 @@ def generate_image_with_qwen(prompt: str, image_index: int = 0) -> str:
1817
  )
1818
 
1819
  # Resize image to reduce size while maintaining quality
1820
- max_size = 512
1821
  if image.width > max_size or image.height > max_size:
1822
  image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
1823
 
1824
- # Convert PIL Image to optimized base64 for HTML embedding
1825
  import io
1826
- import base64
1827
-
1828
  buffer = io.BytesIO()
1829
- # Save as JPEG with compression for smaller file size
1830
- image.convert('RGB').save(buffer, format='JPEG', quality=85, optimize=True)
1831
- img_str = base64.b64encode(buffer.getvalue()).decode()
1832
 
1833
- # Return HTML img tag with optimized data URL
1834
- return f'<img src="data:image/jpeg;base64,{img_str}" alt="{prompt}" style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;" loading="lazy" />'
 
 
 
 
 
 
 
 
1835
 
1836
  except Exception as e:
1837
  print(f"Image generation error: {str(e)}")
1838
  return f"Error generating image: {str(e)}"
1839
 
1840
- def generate_image_to_image(input_image_data, prompt: str) -> str:
1841
  """Generate an image using image-to-image with Qwen-Image-Edit via Hugging Face InferenceClient.
1842
 
1843
  Returns an HTML <img> tag with optimized base64 JPEG data, similar to text-to-image output.
@@ -1897,22 +2119,29 @@ def generate_image_to_image(input_image_data, prompt: str) -> str:
1897
  model="Qwen/Qwen-Image-Edit",
1898
  )
1899
 
1900
- # Resize/optimize
1901
- max_size = 512
1902
  if image.width > max_size or image.height > max_size:
1903
  image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
1904
 
1905
  out_buf = io.BytesIO()
1906
- image.convert('RGB').save(out_buf, format='JPEG', quality=85, optimize=True)
 
 
 
 
 
 
 
 
 
1907
 
1908
- import base64
1909
- img_str = base64.b64encode(out_buf.getvalue()).decode()
1910
- return f"<img src=\"data:image/jpeg;base64,{img_str}\" alt=\"{prompt}\" style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\" loading=\"lazy\" />"
1911
  except Exception as e:
1912
  print(f"Image-to-image generation error: {str(e)}")
1913
  return f"Error generating image (image-to-image): {str(e)}"
1914
 
1915
- def generate_video_from_image(input_image_data, prompt: str, session_id: Optional[str] = None) -> str:
1916
  """Generate a video from an input image and prompt using Hugging Face InferenceClient.
1917
 
1918
  Returns an HTML <video> tag whose source points to a local file URL (file://...).
@@ -2006,27 +2235,25 @@ def generate_video_from_image(input_image_data, prompt: str, session_id: Optiona
2006
  )
2007
  print(f"[Image2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
2008
 
2009
- # Convert video to compressed data URI for deployment compatibility
2010
- import base64
2011
-
2012
- # Compress video for data URI embedding
2013
- compressed_video_bytes = compress_video_for_data_uri(video_bytes, max_size_mb=8)
2014
 
2015
- # Create data URI
2016
- video_b64 = base64.b64encode(compressed_video_bytes).decode()
2017
- data_uri = f"data:video/mp4;base64,{video_b64}"
2018
 
2019
  video_html = (
2020
  f'<video controls autoplay muted loop playsinline '
2021
  f'style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;" '
2022
  f'onloadstart="this.style.backgroundColor=\'#f0f0f0\'" '
2023
  f'onerror="this.style.display=\'none\'; console.error(\'Video failed to load\')">'
2024
- f'<source src="{data_uri}" type="video/mp4" />'
2025
  f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
2026
  f'</video>'
2027
  )
2028
 
2029
- print(f"[Image2Video] Successfully generated video HTML tag with data URI ({len(compressed_video_bytes)} bytes)")
2030
 
2031
  # Validate the generated video HTML
2032
  if not validate_video_html(video_html):
@@ -2041,7 +2268,7 @@ def generate_video_from_image(input_image_data, prompt: str, session_id: Optiona
2041
  print(f"Image-to-video generation error: {str(e)}")
2042
  return f"Error generating video (image-to-video): {str(e)}"
2043
 
2044
- def generate_video_from_text(prompt: str, session_id: Optional[str] = None) -> str:
2045
  """Generate a video from a text prompt using Hugging Face InferenceClient.
2046
 
2047
  Returns an HTML <video> tag with compressed data URI for deployment compatibility.
@@ -2069,7 +2296,7 @@ def generate_video_from_text(prompt: str, session_id: Optional[str] = None) -> s
2069
  "`pip install -U huggingface_hub` and try again."
2070
  )
2071
 
2072
- model_id = "Wan-AI/Wan2.2-TI2V-5B"
2073
  prompt_str = (prompt or "").strip()
2074
  print(f"[Text2Video] Calling text_to_video with model={model_id}, prompt length={len(prompt_str)}")
2075
  video_bytes = text_to_video_method(
@@ -2078,27 +2305,25 @@ def generate_video_from_text(prompt: str, session_id: Optional[str] = None) -> s
2078
  )
2079
  print(f"[Text2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
2080
 
2081
- # Convert video to compressed data URI for deployment compatibility
2082
- import base64
2083
-
2084
- # Compress video for data URI embedding
2085
- compressed_video_bytes = compress_video_for_data_uri(video_bytes, max_size_mb=8)
2086
 
2087
- # Create data URI
2088
- video_b64 = base64.b64encode(compressed_video_bytes).decode()
2089
- data_uri = f"data:video/mp4;base64,{video_b64}"
2090
 
2091
  video_html = (
2092
  f'<video controls autoplay muted loop playsinline '
2093
  f'style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;" '
2094
  f'onloadstart="this.style.backgroundColor=\'#f0f0f0\'" '
2095
  f'onerror="this.style.display=\'none\'; console.error(\'Video failed to load\')">'
2096
- f'<source src="{data_uri}" type="video/mp4" />'
2097
  f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
2098
  f'</video>'
2099
  )
2100
 
2101
- print(f"[Text2Video] Successfully generated video HTML tag with data URI ({len(compressed_video_bytes)} bytes)")
2102
 
2103
  # Validate the generated video HTML
2104
  if not validate_video_html(video_html):
@@ -2113,7 +2338,7 @@ def generate_video_from_text(prompt: str, session_id: Optional[str] = None) -> s
2113
  print(f"Text-to-video generation error: {str(e)}")
2114
  return f"Error generating video (text-to-video): {str(e)}"
2115
 
2116
- def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_id: Optional[str] = None) -> str:
2117
  """Generate music from a text prompt using ElevenLabs Music API and return an HTML <audio> tag.
2118
 
2119
  Returns compressed data URI for deployment compatibility.
@@ -2139,16 +2364,13 @@ def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_
2139
  except Exception as e:
2140
  return f"Error generating music: {getattr(e, 'response', resp).text if hasattr(e, 'response') else resp.text}"
2141
 
2142
- # Convert audio to compressed data URI for deployment compatibility
2143
- import base64
 
2144
 
2145
- # Compress audio for data URI embedding
2146
- compressed_audio_bytes = compress_audio_for_data_uri(resp.content, max_size_mb=4)
2147
-
2148
- # Create data URI - use appropriate MIME type based on compression
2149
- audio_format = "audio/mpeg" if len(compressed_audio_bytes) < len(resp.content) else "audio/wav"
2150
- audio_b64 = base64.b64encode(compressed_audio_bytes).decode()
2151
- data_uri = f"data:{audio_format};base64,{audio_b64}"
2152
 
2153
  audio_html = (
2154
  "<div class=\"anycoder-music\" style=\"max-width:420px;margin:16px auto;padding:12px 16px;border:1px solid #e5e7eb;border-radius:12px;background:linear-gradient(180deg,#fafafa,#f3f4f6);box-shadow:0 2px 8px rgba(0,0,0,0.06)\">"
@@ -2156,13 +2378,13 @@ def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_
2156
  " <span>🎵 Generated music</span>"
2157
  " </div>"
2158
  f" <audio controls autoplay loop style=\"width:100%;outline:none;\">"
2159
- f" <source src=\"{data_uri}\" type=\"{audio_format}\" />"
2160
  " Your browser does not support the audio element."
2161
  " </audio>"
2162
  "</div>"
2163
  )
2164
 
2165
- print(f"[Music] Successfully generated music HTML tag with data URI ({len(compressed_audio_bytes)} bytes)")
2166
  return audio_html
2167
  except Exception as e:
2168
  return f"Error generating music: {str(e)}"
@@ -2236,6 +2458,9 @@ def create_image_replacement_blocks(html_content: str, user_prompt: str) -> str:
2236
  matches = re.findall(pattern, html_content, re.IGNORECASE)
2237
  placeholder_images.extend(matches)
2238
 
 
 
 
2239
  # If no placeholder images found, look for any img tags
2240
  if not placeholder_images:
2241
  img_pattern = r'<img[^>]*>'
@@ -2264,7 +2489,7 @@ def create_image_replacement_blocks(html_content: str, user_prompt: str) -> str:
2264
  # Generate images for each prompt
2265
  generated_images = []
2266
  for i, prompt in enumerate(image_prompts):
2267
- image_html = generate_image_with_qwen(prompt, i)
2268
  if not image_html.startswith("Error"):
2269
  generated_images.append((i, image_html))
2270
 
@@ -2341,6 +2566,12 @@ def create_image_replacement_blocks_text_to_image_single(html_content: str, prom
2341
  matches = re.findall(pattern, html_content, re.IGNORECASE)
2342
  if matches:
2343
  placeholder_images.extend(matches)
 
 
 
 
 
 
2344
 
2345
  # Fallback to any <img> if no placeholders
2346
  if not placeholder_images:
@@ -2348,7 +2579,7 @@ def create_image_replacement_blocks_text_to_image_single(html_content: str, prom
2348
  placeholder_images = re.findall(img_pattern, html_content)
2349
 
2350
  # Generate a single image
2351
- image_html = generate_image_with_qwen(prompt, 0)
2352
  if image_html.startswith("Error"):
2353
  return ""
2354
 
@@ -2415,12 +2646,15 @@ def create_video_replacement_blocks_text_to_video(html_content: str, prompt: str
2415
  matches = re.findall(pattern, html_content, re.IGNORECASE)
2416
  if matches:
2417
  placeholder_images.extend(matches)
 
 
 
2418
 
2419
  if not placeholder_images:
2420
  img_pattern = r'<img[^>]*>'
2421
  placeholder_images = re.findall(img_pattern, html_content)
2422
 
2423
- video_html = generate_video_from_text(prompt, session_id=session_id)
2424
  if video_html.startswith("Error"):
2425
  return ""
2426
 
@@ -2503,7 +2737,7 @@ def create_music_replacement_blocks_text_to_music(html_content: str, prompt: str
2503
  if not prompt or not prompt.strip():
2504
  return ""
2505
 
2506
- audio_html = generate_music_from_text(prompt, session_id=session_id)
2507
  if audio_html.startswith("Error"):
2508
  return ""
2509
 
@@ -2567,10 +2801,15 @@ def create_image_replacement_blocks_from_input_image(html_content: str, user_pro
2567
  for pattern in placeholder_patterns:
2568
  matches = re.findall(pattern, html_content, re.IGNORECASE)
2569
  placeholder_images.extend(matches)
 
 
 
2570
 
2571
  if not placeholder_images:
2572
  img_pattern = r'<img[^>]*>'
2573
  placeholder_images = re.findall(img_pattern, html_content)
 
 
2574
 
2575
  div_placeholder_patterns = [
2576
  r'<div[^>]*class=["\'][^"\']*(?:image|img|photo|picture)[^"\']*["\'][^>]*>.*?</div>',
@@ -2589,7 +2828,7 @@ def create_image_replacement_blocks_from_input_image(html_content: str, user_pro
2589
  prompts = extract_image_prompts_from_text(user_prompt, 1)
2590
  if not prompts:
2591
  return ""
2592
- image_html = generate_image_to_image(input_image_data, prompts[0])
2593
  if image_html.startswith("Error"):
2594
  return ""
2595
  return f"{SEARCH_START}\n\n{DIVIDER}\n<div class=\"generated-images\">{image_html}</div>\n{REPLACE_END}"
@@ -2600,7 +2839,7 @@ def create_image_replacement_blocks_from_input_image(html_content: str, user_pro
2600
 
2601
  generated_images = []
2602
  for i, prompt in enumerate(image_prompts):
2603
- image_html = generate_image_to_image(input_image_data, prompt)
2604
  if not image_html.startswith("Error"):
2605
  generated_images.append((i, image_html))
2606
 
@@ -2658,13 +2897,16 @@ def create_video_replacement_blocks_from_input_image(html_content: str, user_pro
2658
  matches = re.findall(pattern, html_content, re.IGNORECASE)
2659
  if matches:
2660
  placeholder_images.extend(matches)
 
 
 
2661
 
2662
  if not placeholder_images:
2663
  img_pattern = r'<img[^>]*>'
2664
  placeholder_images = re.findall(img_pattern, html_content)
2665
  print(f"[Image2Video] Found {len(placeholder_images)} candidate <img> elements")
2666
 
2667
- video_html = generate_video_from_image(input_image_data, user_prompt, session_id=session_id)
2668
  try:
2669
  has_file_src = 'src="' in video_html and video_html.count('src="') >= 1 and 'data:video/mp4;base64' not in video_html.split('src="', 1)[1]
2670
  print(f"[Image2Video] Generated video HTML length={len(video_html)}; has_file_src={has_file_src}")
@@ -2712,7 +2954,7 @@ def create_video_replacement_blocks_from_input_image(html_content: str, user_pro
2712
  print("[Image2Video] No <body> tag; appending video via replacement block")
2713
  return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
2714
 
2715
- def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_text_to_image: bool, enable_image_to_image: bool, input_image_data, image_to_image_prompt: str | None = None, text_to_image_prompt: str | None = None, enable_image_to_video: bool = False, image_to_video_prompt: str | None = None, session_id: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None) -> str:
2716
  """Apply text/image/video/music replacements to HTML content.
2717
 
2718
  - Works with single-document HTML strings
@@ -2749,7 +2991,7 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
2749
  i2v_prompt = (image_to_video_prompt or user_prompt or "").strip()
2750
  print(f"[MediaApply] Running image-to-video with prompt len={len(i2v_prompt)}")
2751
  try:
2752
- video_html_tag = generate_video_from_image(input_image_data, i2v_prompt, session_id=session_id)
2753
  if not (video_html_tag or "").startswith("Error"):
2754
  # Validate video HTML before attempting placement
2755
  if validate_video_html(video_html_tag):
@@ -2792,7 +3034,7 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
2792
  t2v_prompt = (text_to_video_prompt or user_prompt or "").strip()
2793
  print(f"[MediaApply] Running text-to-video with prompt len={len(t2v_prompt)}")
2794
  try:
2795
- video_html_tag = generate_video_from_text(t2v_prompt, session_id=session_id)
2796
  if not (video_html_tag or "").startswith("Error"):
2797
  # Validate video HTML before attempting placement
2798
  if validate_video_html(video_html_tag):
@@ -2823,7 +3065,7 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
2823
  t2m_prompt = (text_to_music_prompt or user_prompt or "").strip()
2824
  print(f"[MediaApply] Running text-to-music with prompt len={len(t2m_prompt)}")
2825
  try:
2826
- audio_html_tag = generate_music_from_text(t2m_prompt, session_id=session_id)
2827
  if not (audio_html_tag or "").startswith("Error"):
2828
  blocks_tm = llm_place_media(result, audio_html_tag, media_kind="audio")
2829
  else:
@@ -2847,7 +3089,7 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
2847
  if enable_image_to_image and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
2848
  i2i_prompt = (image_to_image_prompt or user_prompt or "").strip()
2849
  try:
2850
- image_html_tag = generate_image_to_image(input_image_data, i2i_prompt)
2851
  if not (image_html_tag or "").startswith("Error"):
2852
  blocks2 = llm_place_media(result, image_html_tag, media_kind="image")
2853
  else:
@@ -2868,7 +3110,7 @@ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_te
2868
  print(f"[MediaApply] Running text-to-image with prompt len={len(t2i_prompt)}")
2869
  # Single-image flow for text-to-image (LLM placement first, fallback deterministic)
2870
  try:
2871
- image_html_tag = generate_image_with_qwen(t2i_prompt, 0)
2872
  if not (image_html_tag or "").startswith("Error"):
2873
  blocks = llm_place_media(result, image_html_tag, media_kind="image")
2874
  else:
@@ -4193,6 +4435,7 @@ This will help me create a better design for you."""
4193
  text_to_video_prompt=text_to_video_prompt,
4194
  enable_text_to_music=enable_text_to_music,
4195
  text_to_music_prompt=text_to_music_prompt,
 
4196
  )
4197
 
4198
  yield {
@@ -4219,6 +4462,7 @@ This will help me create a better design for you."""
4219
  text_to_video_prompt=text_to_video_prompt,
4220
  enable_text_to_music=enable_text_to_music,
4221
  text_to_music_prompt=text_to_music_prompt,
 
4222
  )
4223
 
4224
  preview_val = None
@@ -4645,6 +4889,7 @@ This will help me create a better design for you."""
4645
  text_to_video_prompt=text_to_video_prompt,
4646
  enable_text_to_music=enable_text_to_music,
4647
  text_to_music_prompt=text_to_music_prompt,
 
4648
  )
4649
 
4650
  # Update history with the cleaned content
@@ -6294,7 +6539,7 @@ with gr.Blocks(
6294
  show_progress="hidden",
6295
  ).then(
6296
  generation_code,
6297
- inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt],
6298
  outputs=[code_output, history, sandbox, history_output]
6299
  ).then(
6300
  end_generation_ui,
@@ -7060,6 +7305,13 @@ with gr.Blocks(
7060
  import tempfile
7061
  import os
7062
 
 
 
 
 
 
 
 
7063
  try:
7064
  with tempfile.TemporaryDirectory() as tmpdir:
7065
  # Write each file preserving subdirectories if any
@@ -7088,6 +7340,12 @@ with gr.Blocks(
7088
 
7089
  # Fallback: single-file static HTML (upload index.html only)
7090
  file_name = "index.html"
 
 
 
 
 
 
7091
  max_attempts = 3
7092
  for attempt in range(max_attempts):
7093
  import tempfile
 
27
  from huggingface_hub import HfApi
28
  import tempfile
29
  from openai import OpenAI
 
30
  import uuid
31
+ import datetime
32
+ from mistralai import Mistral
33
+ import shutil
34
+ import urllib.parse
35
+ import mimetypes
36
  import threading
37
 
38
  # Gradio supported languages for syntax highlighting
 
88
  if '<source' not in video_html:
89
  return False
90
 
91
+ # Check for valid video source (data URI, HF URL, or file URL)
92
+ has_data_uri = 'data:video/mp4;base64,' in video_html
93
+ has_hf_url = 'https://huggingface.co/datasets/' in video_html and '/resolve/main/' in video_html
94
+ has_file_url = 'file://' in video_html
95
+ if not (has_data_uri or has_hf_url or has_file_url):
96
  return False
97
 
98
  # Basic HTML structure validation
 
1803
  print(f"[AudioCompress] Compression failed: {e}, using original audio")
1804
  return audio_bytes
1805
 
1806
+ # Global dictionary to store temporary media files for the session
1807
+ temp_media_files = {}
1808
+
1809
+ def create_temp_media_url(media_bytes: bytes, filename: str, media_type: str = "image") -> str:
1810
+ """Create a temporary file and return a local URL for preview.
1811
+
1812
+ Args:
1813
+ media_bytes: Raw bytes of the media file
1814
+ filename: Name for the file (will be made unique)
1815
+ media_type: Type of media ('image', 'video', 'audio')
1816
+
1817
+ Returns:
1818
+ Temporary file URL for preview or error message
1819
+ """
1820
+ try:
1821
+ # Create unique filename with timestamp and UUID
1822
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
1823
+ unique_id = str(uuid.uuid4())[:8]
1824
+ base_name, ext = os.path.splitext(filename)
1825
+ unique_filename = f"{media_type}_{timestamp}_{unique_id}_{base_name}{ext}"
1826
+
1827
+ # Create temporary file in a dedicated directory
1828
+ temp_dir = os.path.join(tempfile.gettempdir(), "anycoder_media")
1829
+ os.makedirs(temp_dir, exist_ok=True)
1830
+ temp_path = os.path.join(temp_dir, unique_filename)
1831
+
1832
+ # Write media bytes to temporary file
1833
+ with open(temp_path, 'wb') as f:
1834
+ f.write(media_bytes)
1835
+
1836
+ # Store the file info for later upload
1837
+ file_id = f"{media_type}_{unique_id}"
1838
+ temp_media_files[file_id] = {
1839
+ 'path': temp_path,
1840
+ 'filename': filename,
1841
+ 'media_type': media_type,
1842
+ 'media_bytes': media_bytes
1843
+ }
1844
+
1845
+ # Return file:// URL for preview
1846
+ file_url = f"file://{temp_path}"
1847
+ print(f"[TempMedia] Created temporary {media_type} file: {file_url}")
1848
+ return file_url
1849
+
1850
+ except Exception as e:
1851
+ print(f"[TempMedia] Failed to create temporary file: {str(e)}")
1852
+ return f"Error creating temporary {media_type} file: {str(e)}"
1853
+
1854
+ def upload_media_to_hf(media_bytes: bytes, filename: str, media_type: str = "image", token: gr.OAuthToken | None = None, use_temp: bool = True) -> str:
1855
+ """Upload media file to user's Hugging Face account or create temporary file.
1856
+
1857
+ Args:
1858
+ media_bytes: Raw bytes of the media file
1859
+ filename: Name for the file (will be made unique)
1860
+ media_type: Type of media ('image', 'video', 'audio')
1861
+ token: OAuth token from gr.login (takes priority over env var)
1862
+ use_temp: If True, create temporary file for preview; if False, upload to HF
1863
+
1864
+ Returns:
1865
+ Permanent URL to the uploaded file, temporary URL, or error message
1866
+ """
1867
+ try:
1868
+ # If use_temp is True, create temporary file for preview
1869
+ if use_temp:
1870
+ return create_temp_media_url(media_bytes, filename, media_type)
1871
+
1872
+ # Otherwise, upload to Hugging Face for permanent URL
1873
+ # Try to get token from OAuth first, then fall back to environment variable
1874
+ hf_token = None
1875
+ if token and token.token:
1876
+ hf_token = token.token
1877
+ else:
1878
+ hf_token = os.getenv('HF_TOKEN')
1879
+
1880
+ if not hf_token:
1881
+ return "Error: Please log in with your Hugging Face account to upload media, or set HF_TOKEN environment variable."
1882
+
1883
+ # Initialize HF API
1884
+ api = HfApi(token=hf_token)
1885
+
1886
+ # Get current user info to determine username
1887
+ try:
1888
+ user_info = api.whoami()
1889
+ username = user_info.get('name', 'unknown-user')
1890
+ except Exception as e:
1891
+ print(f"[HFUpload] Could not get user info: {e}")
1892
+ username = 'anycoder-user'
1893
+
1894
+ # Create repository name for media storage
1895
+ repo_name = f"{username}/anycoder-media"
1896
+
1897
+ # Try to create the repository if it doesn't exist
1898
+ try:
1899
+ api.create_repo(
1900
+ repo_id=repo_name,
1901
+ repo_type="dataset",
1902
+ private=False,
1903
+ exist_ok=True
1904
+ )
1905
+ print(f"[HFUpload] Repository {repo_name} ready")
1906
+ except Exception as e:
1907
+ print(f"[HFUpload] Repository creation/access issue: {e}")
1908
+ # Continue anyway, repo might already exist
1909
+
1910
+ # Create unique filename with timestamp and UUID
1911
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
1912
+ unique_id = str(uuid.uuid4())[:8]
1913
+ base_name, ext = os.path.splitext(filename)
1914
+ unique_filename = f"{media_type}/{timestamp}_{unique_id}_{base_name}{ext}"
1915
+
1916
+ # Create temporary file for upload
1917
+ with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
1918
+ temp_file.write(media_bytes)
1919
+ temp_path = temp_file.name
1920
+
1921
+ try:
1922
+ # Upload file to HF repository
1923
+ api.upload_file(
1924
+ path_or_fileobj=temp_path,
1925
+ path_in_repo=unique_filename,
1926
+ repo_id=repo_name,
1927
+ repo_type="dataset",
1928
+ commit_message=f"Upload {media_type} generated by AnyCoder"
1929
+ )
1930
+
1931
+ # Generate permanent URL
1932
+ permanent_url = f"https://huggingface.co/datasets/{repo_name}/resolve/main/{unique_filename}"
1933
+ print(f"[HFUpload] Successfully uploaded {media_type} to {permanent_url}")
1934
+ return permanent_url
1935
+
1936
+ finally:
1937
+ # Clean up temporary file
1938
+ try:
1939
+ os.unlink(temp_path)
1940
+ except Exception:
1941
+ pass
1942
+
1943
+ except Exception as e:
1944
+ print(f"[HFUpload] Upload failed: {str(e)}")
1945
+ return f"Error uploading {media_type} to Hugging Face: {str(e)}"
1946
+
1947
+ def upload_temp_files_to_hf_and_replace_urls(html_content: str, token: gr.OAuthToken | None = None) -> str:
1948
+ """Upload all temporary media files to HF and replace their URLs in HTML content.
1949
+
1950
+ Args:
1951
+ html_content: HTML content containing temporary file URLs
1952
+ token: OAuth token for HF authentication
1953
+
1954
+ Returns:
1955
+ Updated HTML content with permanent HF URLs
1956
+ """
1957
+ try:
1958
+ if not temp_media_files:
1959
+ print("[DeployUpload] No temporary media files to upload")
1960
+ return html_content
1961
+
1962
+ print(f"[DeployUpload] Uploading {len(temp_media_files)} temporary media files to HF")
1963
+ updated_content = html_content
1964
+
1965
+ for file_id, file_info in temp_media_files.items():
1966
+ try:
1967
+ # Upload to HF with permanent URL
1968
+ permanent_url = upload_media_to_hf(
1969
+ file_info['media_bytes'],
1970
+ file_info['filename'],
1971
+ file_info['media_type'],
1972
+ token,
1973
+ use_temp=False # Force permanent upload
1974
+ )
1975
+
1976
+ if not permanent_url.startswith("Error"):
1977
+ # Replace the temporary file URL with permanent URL
1978
+ temp_url = f"file://{file_info['path']}"
1979
+ updated_content = updated_content.replace(temp_url, permanent_url)
1980
+ print(f"[DeployUpload] Replaced {temp_url} with {permanent_url}")
1981
+ else:
1982
+ print(f"[DeployUpload] Failed to upload {file_id}: {permanent_url}")
1983
+
1984
+ except Exception as e:
1985
+ print(f"[DeployUpload] Error uploading {file_id}: {str(e)}")
1986
+ continue
1987
+
1988
+ # Clean up temporary files after upload
1989
+ cleanup_temp_media_files()
1990
+
1991
+ return updated_content
1992
+
1993
+ except Exception as e:
1994
+ print(f"[DeployUpload] Failed to upload temporary files: {str(e)}")
1995
+ return html_content
1996
+
1997
+ def cleanup_temp_media_files():
1998
+ """Clean up temporary media files from disk and memory."""
1999
+ try:
2000
+ for file_id, file_info in temp_media_files.items():
2001
+ try:
2002
+ if os.path.exists(file_info['path']):
2003
+ os.remove(file_info['path'])
2004
+ print(f"[TempCleanup] Removed {file_info['path']}")
2005
+ except Exception as e:
2006
+ print(f"[TempCleanup] Failed to remove {file_info['path']}: {str(e)}")
2007
+
2008
+ # Clear the global dictionary
2009
+ temp_media_files.clear()
2010
+ print("[TempCleanup] Cleared temporary media files registry")
2011
+
2012
+ except Exception as e:
2013
+ print(f"[TempCleanup] Error during cleanup: {str(e)}")
2014
+
2015
+ def generate_image_with_qwen(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
2016
+ """Generate image using Qwen image model via Hugging Face InferenceClient and upload to HF for permanent URL"""
2017
  try:
2018
  # Check if HF_TOKEN is available
2019
  if not os.getenv('HF_TOKEN'):
 
2033
  )
2034
 
2035
  # Resize image to reduce size while maintaining quality
2036
+ max_size = 1024 # Increased size since we're not using data URIs
2037
  if image.width > max_size or image.height > max_size:
2038
  image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
2039
 
2040
+ # Convert PIL Image to bytes for upload
2041
  import io
 
 
2042
  buffer = io.BytesIO()
2043
+ # Save as JPEG with good quality since we're not embedding
2044
+ image.convert('RGB').save(buffer, format='JPEG', quality=90, optimize=True)
2045
+ image_bytes = buffer.getvalue()
2046
 
2047
+ # Create temporary URL for preview (will be uploaded to HF during deploy)
2048
+ filename = f"generated_image_{image_index}.jpg"
2049
+ temp_url = upload_media_to_hf(image_bytes, filename, "image", token, use_temp=True)
2050
+
2051
+ # Check if creation was successful
2052
+ if temp_url.startswith("Error"):
2053
+ return temp_url
2054
+
2055
+ # Return HTML img tag with temporary URL
2056
+ return f'<img src="{temp_url}" alt="{prompt}" style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;" loading="lazy" />'
2057
 
2058
  except Exception as e:
2059
  print(f"Image generation error: {str(e)}")
2060
  return f"Error generating image: {str(e)}"
2061
 
2062
+ def generate_image_to_image(input_image_data, prompt: str, token: gr.OAuthToken | None = None) -> str:
2063
  """Generate an image using image-to-image with Qwen-Image-Edit via Hugging Face InferenceClient.
2064
 
2065
  Returns an HTML <img> tag with optimized base64 JPEG data, similar to text-to-image output.
 
2119
  model="Qwen/Qwen-Image-Edit",
2120
  )
2121
 
2122
+ # Resize/optimize (larger since not using data URIs)
2123
+ max_size = 1024
2124
  if image.width > max_size or image.height > max_size:
2125
  image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
2126
 
2127
  out_buf = io.BytesIO()
2128
+ image.convert('RGB').save(out_buf, format='JPEG', quality=90, optimize=True)
2129
+ image_bytes = out_buf.getvalue()
2130
+
2131
+ # Create temporary URL for preview (will be uploaded to HF during deploy)
2132
+ filename = "image_to_image_result.jpg"
2133
+ temp_url = upload_media_to_hf(image_bytes, filename, "image", token, use_temp=True)
2134
+
2135
+ # Check if creation was successful
2136
+ if temp_url.startswith("Error"):
2137
+ return temp_url
2138
 
2139
+ return f"<img src=\"{temp_url}\" alt=\"{prompt}\" style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\" loading=\"lazy\" />"
 
 
2140
  except Exception as e:
2141
  print(f"Image-to-image generation error: {str(e)}")
2142
  return f"Error generating image (image-to-image): {str(e)}"
2143
 
2144
+ def generate_video_from_image(input_image_data, prompt: str, session_id: Optional[str] = None, token: gr.OAuthToken | None = None) -> str:
2145
  """Generate a video from an input image and prompt using Hugging Face InferenceClient.
2146
 
2147
  Returns an HTML <video> tag whose source points to a local file URL (file://...).
 
2235
  )
2236
  print(f"[Image2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
2237
 
2238
+ # Create temporary URL for preview (will be uploaded to HF during deploy)
2239
+ filename = "image_to_video_result.mp4"
2240
+ temp_url = upload_media_to_hf(video_bytes, filename, "video", token, use_temp=True)
 
 
2241
 
2242
+ # Check if creation was successful
2243
+ if temp_url.startswith("Error"):
2244
+ return temp_url
2245
 
2246
  video_html = (
2247
  f'<video controls autoplay muted loop playsinline '
2248
  f'style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;" '
2249
  f'onloadstart="this.style.backgroundColor=\'#f0f0f0\'" '
2250
  f'onerror="this.style.display=\'none\'; console.error(\'Video failed to load\')">'
2251
+ f'<source src="{temp_url}" type="video/mp4" />'
2252
  f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
2253
  f'</video>'
2254
  )
2255
 
2256
+ print(f"[Image2Video] Successfully generated video HTML tag with temporary URL: {temp_url}")
2257
 
2258
  # Validate the generated video HTML
2259
  if not validate_video_html(video_html):
 
2268
  print(f"Image-to-video generation error: {str(e)}")
2269
  return f"Error generating video (image-to-video): {str(e)}"
2270
 
2271
+ def generate_video_from_text(prompt: str, session_id: Optional[str] = None, token: gr.OAuthToken | None = None) -> str:
2272
  """Generate a video from a text prompt using Hugging Face InferenceClient.
2273
 
2274
  Returns an HTML <video> tag with compressed data URI for deployment compatibility.
 
2296
  "`pip install -U huggingface_hub` and try again."
2297
  )
2298
 
2299
+ model_id = "Wan-AI/Wan2.2-T2V-A14B"
2300
  prompt_str = (prompt or "").strip()
2301
  print(f"[Text2Video] Calling text_to_video with model={model_id}, prompt length={len(prompt_str)}")
2302
  video_bytes = text_to_video_method(
 
2305
  )
2306
  print(f"[Text2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
2307
 
2308
+ # Create temporary URL for preview (will be uploaded to HF during deploy)
2309
+ filename = "text_to_video_result.mp4"
2310
+ temp_url = upload_media_to_hf(video_bytes, filename, "video", token, use_temp=True)
 
 
2311
 
2312
+ # Check if creation was successful
2313
+ if temp_url.startswith("Error"):
2314
+ return temp_url
2315
 
2316
  video_html = (
2317
  f'<video controls autoplay muted loop playsinline '
2318
  f'style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;" '
2319
  f'onloadstart="this.style.backgroundColor=\'#f0f0f0\'" '
2320
  f'onerror="this.style.display=\'none\'; console.error(\'Video failed to load\')">'
2321
+ f'<source src="{temp_url}" type="video/mp4" />'
2322
  f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
2323
  f'</video>'
2324
  )
2325
 
2326
+ print(f"[Text2Video] Successfully generated video HTML tag with temporary URL: {temp_url}")
2327
 
2328
  # Validate the generated video HTML
2329
  if not validate_video_html(video_html):
 
2338
  print(f"Text-to-video generation error: {str(e)}")
2339
  return f"Error generating video (text-to-video): {str(e)}"
2340
 
2341
+ def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_id: Optional[str] = None, token: gr.OAuthToken | None = None) -> str:
2342
  """Generate music from a text prompt using ElevenLabs Music API and return an HTML <audio> tag.
2343
 
2344
  Returns compressed data URI for deployment compatibility.
 
2364
  except Exception as e:
2365
  return f"Error generating music: {getattr(e, 'response', resp).text if hasattr(e, 'response') else resp.text}"
2366
 
2367
+ # Create temporary URL for preview (will be uploaded to HF during deploy)
2368
+ filename = "generated_music.mp3"
2369
+ temp_url = upload_media_to_hf(resp.content, filename, "audio", token, use_temp=True)
2370
 
2371
+ # Check if creation was successful
2372
+ if temp_url.startswith("Error"):
2373
+ return temp_url
 
 
 
 
2374
 
2375
  audio_html = (
2376
  "<div class=\"anycoder-music\" style=\"max-width:420px;margin:16px auto;padding:12px 16px;border:1px solid #e5e7eb;border-radius:12px;background:linear-gradient(180deg,#fafafa,#f3f4f6);box-shadow:0 2px 8px rgba(0,0,0,0.06)\">"
 
2378
  " <span>🎵 Generated music</span>"
2379
  " </div>"
2380
  f" <audio controls autoplay loop style=\"width:100%;outline:none;\">"
2381
+ f" <source src=\"{temp_url}\" type=\"audio/mpeg\" />"
2382
  " Your browser does not support the audio element."
2383
  " </audio>"
2384
  "</div>"
2385
  )
2386
 
2387
+ print(f"[Music] Successfully generated music HTML tag with temporary URL: {temp_url}")
2388
  return audio_html
2389
  except Exception as e:
2390
  return f"Error generating music: {str(e)}"
 
2458
  matches = re.findall(pattern, html_content, re.IGNORECASE)
2459
  placeholder_images.extend(matches)
2460
 
2461
+ # Filter out HF URLs from placeholders (they are real generated content)
2462
+ placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
2463
+
2464
  # If no placeholder images found, look for any img tags
2465
  if not placeholder_images:
2466
  img_pattern = r'<img[^>]*>'
 
2489
  # Generate images for each prompt
2490
  generated_images = []
2491
  for i, prompt in enumerate(image_prompts):
2492
+ image_html = generate_image_with_qwen(prompt, i, token=None) # TODO: Pass token from parent context
2493
  if not image_html.startswith("Error"):
2494
  generated_images.append((i, image_html))
2495
 
 
2566
  matches = re.findall(pattern, html_content, re.IGNORECASE)
2567
  if matches:
2568
  placeholder_images.extend(matches)
2569
+
2570
+ # Filter out HF URLs from placeholders (they are real generated content)
2571
+ placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
2572
+
2573
+ # Filter out HF URLs from placeholders (they are real generated content)
2574
+ placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
2575
 
2576
  # Fallback to any <img> if no placeholders
2577
  if not placeholder_images:
 
2579
  placeholder_images = re.findall(img_pattern, html_content)
2580
 
2581
  # Generate a single image
2582
+ image_html = generate_image_with_qwen(prompt, 0, token=None) # TODO: Pass token from parent context
2583
  if image_html.startswith("Error"):
2584
  return ""
2585
 
 
2646
  matches = re.findall(pattern, html_content, re.IGNORECASE)
2647
  if matches:
2648
  placeholder_images.extend(matches)
2649
+
2650
+ # Filter out HF URLs from placeholders (they are real generated content)
2651
+ placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
2652
 
2653
  if not placeholder_images:
2654
  img_pattern = r'<img[^>]*>'
2655
  placeholder_images = re.findall(img_pattern, html_content)
2656
 
2657
+ video_html = generate_video_from_text(prompt, session_id=session_id, token=None) # TODO: Pass token from parent context
2658
  if video_html.startswith("Error"):
2659
  return ""
2660
 
 
2737
  if not prompt or not prompt.strip():
2738
  return ""
2739
 
2740
+ audio_html = generate_music_from_text(prompt, session_id=session_id, token=None) # TODO: Pass token from parent context
2741
  if audio_html.startswith("Error"):
2742
  return ""
2743
 
 
2801
  for pattern in placeholder_patterns:
2802
  matches = re.findall(pattern, html_content, re.IGNORECASE)
2803
  placeholder_images.extend(matches)
2804
+
2805
+ # Filter out HF URLs from placeholders (they are real generated content)
2806
+ placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
2807
 
2808
  if not placeholder_images:
2809
  img_pattern = r'<img[^>]*>'
2810
  placeholder_images = re.findall(img_pattern, html_content)
2811
+ # Filter HF URLs from fallback images too
2812
+ placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
2813
 
2814
  div_placeholder_patterns = [
2815
  r'<div[^>]*class=["\'][^"\']*(?:image|img|photo|picture)[^"\']*["\'][^>]*>.*?</div>',
 
2828
  prompts = extract_image_prompts_from_text(user_prompt, 1)
2829
  if not prompts:
2830
  return ""
2831
+ image_html = generate_image_to_image(input_image_data, prompts[0], token=None) # TODO: Pass token from parent context
2832
  if image_html.startswith("Error"):
2833
  return ""
2834
  return f"{SEARCH_START}\n\n{DIVIDER}\n<div class=\"generated-images\">{image_html}</div>\n{REPLACE_END}"
 
2839
 
2840
  generated_images = []
2841
  for i, prompt in enumerate(image_prompts):
2842
+ image_html = generate_image_to_image(input_image_data, prompt, token=None) # TODO: Pass token from parent context
2843
  if not image_html.startswith("Error"):
2844
  generated_images.append((i, image_html))
2845
 
 
2897
  matches = re.findall(pattern, html_content, re.IGNORECASE)
2898
  if matches:
2899
  placeholder_images.extend(matches)
2900
+
2901
+ # Filter out HF URLs from placeholders (they are real generated content)
2902
+ placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
2903
 
2904
  if not placeholder_images:
2905
  img_pattern = r'<img[^>]*>'
2906
  placeholder_images = re.findall(img_pattern, html_content)
2907
  print(f"[Image2Video] Found {len(placeholder_images)} candidate <img> elements")
2908
 
2909
+ video_html = generate_video_from_image(input_image_data, user_prompt, session_id=session_id, token=None) # TODO: Pass token from parent context
2910
  try:
2911
  has_file_src = 'src="' in video_html and video_html.count('src="') >= 1 and 'data:video/mp4;base64' not in video_html.split('src="', 1)[1]
2912
  print(f"[Image2Video] Generated video HTML length={len(video_html)}; has_file_src={has_file_src}")
 
2954
  print("[Image2Video] No <body> tag; appending video via replacement block")
2955
  return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
2956
 
2957
+ def apply_generated_media_to_html(html_content: str, user_prompt: str, enable_text_to_image: bool, enable_image_to_image: bool, input_image_data, image_to_image_prompt: str | None = None, text_to_image_prompt: str | None = None, enable_image_to_video: bool = False, image_to_video_prompt: str | None = None, session_id: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None, enable_text_to_music: bool = False, text_to_music_prompt: Optional[str] = None, token: gr.OAuthToken | None = None) -> str:
2958
  """Apply text/image/video/music replacements to HTML content.
2959
 
2960
  - Works with single-document HTML strings
 
2991
  i2v_prompt = (image_to_video_prompt or user_prompt or "").strip()
2992
  print(f"[MediaApply] Running image-to-video with prompt len={len(i2v_prompt)}")
2993
  try:
2994
+ video_html_tag = generate_video_from_image(input_image_data, i2v_prompt, session_id=session_id, token=token)
2995
  if not (video_html_tag or "").startswith("Error"):
2996
  # Validate video HTML before attempting placement
2997
  if validate_video_html(video_html_tag):
 
3034
  t2v_prompt = (text_to_video_prompt or user_prompt or "").strip()
3035
  print(f"[MediaApply] Running text-to-video with prompt len={len(t2v_prompt)}")
3036
  try:
3037
+ video_html_tag = generate_video_from_text(t2v_prompt, session_id=session_id, token=token)
3038
  if not (video_html_tag or "").startswith("Error"):
3039
  # Validate video HTML before attempting placement
3040
  if validate_video_html(video_html_tag):
 
3065
  t2m_prompt = (text_to_music_prompt or user_prompt or "").strip()
3066
  print(f"[MediaApply] Running text-to-music with prompt len={len(t2m_prompt)}")
3067
  try:
3068
+ audio_html_tag = generate_music_from_text(t2m_prompt, session_id=session_id, token=token)
3069
  if not (audio_html_tag or "").startswith("Error"):
3070
  blocks_tm = llm_place_media(result, audio_html_tag, media_kind="audio")
3071
  else:
 
3089
  if enable_image_to_image and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
3090
  i2i_prompt = (image_to_image_prompt or user_prompt or "").strip()
3091
  try:
3092
+ image_html_tag = generate_image_to_image(input_image_data, i2i_prompt, token=token)
3093
  if not (image_html_tag or "").startswith("Error"):
3094
  blocks2 = llm_place_media(result, image_html_tag, media_kind="image")
3095
  else:
 
3110
  print(f"[MediaApply] Running text-to-image with prompt len={len(t2i_prompt)}")
3111
  # Single-image flow for text-to-image (LLM placement first, fallback deterministic)
3112
  try:
3113
+ image_html_tag = generate_image_with_qwen(t2i_prompt, 0, token=token)
3114
  if not (image_html_tag or "").startswith("Error"):
3115
  blocks = llm_place_media(result, image_html_tag, media_kind="image")
3116
  else:
 
4435
  text_to_video_prompt=text_to_video_prompt,
4436
  enable_text_to_music=enable_text_to_music,
4437
  text_to_music_prompt=text_to_music_prompt,
4438
+ token=None,
4439
  )
4440
 
4441
  yield {
 
4462
  text_to_video_prompt=text_to_video_prompt,
4463
  enable_text_to_music=enable_text_to_music,
4464
  text_to_music_prompt=text_to_music_prompt,
4465
+ token=None,
4466
  )
4467
 
4468
  preview_val = None
 
4889
  text_to_video_prompt=text_to_video_prompt,
4890
  enable_text_to_music=enable_text_to_music,
4891
  text_to_music_prompt=text_to_music_prompt,
4892
+ token=None,
4893
  )
4894
 
4895
  # Update history with the cleaned content
 
6539
  show_progress="hidden",
6540
  ).then(
6541
  generation_code,
6542
+ inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt, text_to_music_toggle, text_to_music_prompt],
6543
  outputs=[code_output, history, sandbox, history_output]
6544
  ).then(
6545
  end_generation_ui,
 
7305
  import tempfile
7306
  import os
7307
 
7308
+ # Upload temporary media files to HF and replace URLs (only for Static HTML, not Transformers.js)
7309
+ if sdk == "static" and sdk_name == "Static (HTML)":
7310
+ print("[Deploy] Uploading temporary media files to HF and updating URLs for multi-file static HTML app")
7311
+ # Update the index.html file with permanent media URLs
7312
+ if 'index.html' in files:
7313
+ files['index.html'] = upload_temp_files_to_hf_and_replace_urls(files['index.html'], token)
7314
+
7315
  try:
7316
  with tempfile.TemporaryDirectory() as tmpdir:
7317
  # Write each file preserving subdirectories if any
 
7340
 
7341
  # Fallback: single-file static HTML (upload index.html only)
7342
  file_name = "index.html"
7343
+
7344
+ # Upload temporary media files to HF and replace URLs (only for Static HTML, not Transformers.js)
7345
+ if sdk == "static" and sdk_name == "Static (HTML)":
7346
+ print("[Deploy] Uploading temporary media files to HF and updating URLs for single-file static HTML app")
7347
+ code = upload_temp_files_to_hf_and_replace_urls(code, token)
7348
+
7349
  max_attempts = 3
7350
  for attempt in range(max_attempts):
7351
  import tempfile