Spaces:
Running
Running
File size: 5,352 Bytes
17d10a7 a15d204 d448add db46bfb cf3593c dfa5d3e c243adb dfa5d3e cf3593c dfa5d3e cf3593c f0b5707 613bd9e f0b5707 dfa5d3e 7bbdf94 613bd9e f0b5707 613bd9e cf3593c d0384c8 dfa5d3e cf3593c 17d10a7 dfa5d3e 17d10a7 dfa5d3e cf3593c 17d10a7 d448add cf3593c d448add dfa5d3e 17d10a7 f0b5707 cf3593c d448add 17d10a7 cf3593c d448add cf3593c dfa5d3e cf3593c 3fe530b 7bbdf94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import gradio as gr
import os
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
pipeline,
AutoProcessor,
MusicgenForConditionalGeneration
)
from scipy.io.wavfile import write
import tempfile
from dotenv import load_dotenv
import spaces # Assumes Hugging Face Spaces library supports `@spaces.GPU`
# Load environment variables (e.g., Hugging Face token)
load_dotenv()
hf_token = os.getenv("HF_TOKEN")
# ---------------------------------------------------------------------
# Load Llama 3 Model with Zero GPU
# ---------------------------------------------------------------------
@spaces.GPU(duration=120)
def load_llama_pipeline_zero_gpu(model_id: str, token: str):
try:
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
model = AutoModelForCausalLM.from_pretrained(
model_id,
use_auth_token=token,
torch_dtype=torch.float16,
device_map="auto", # Automatically handles GPU allocation
trust_remote_code=True
)
return pipeline("text-generation", model=model, tokenizer=tokenizer)
except Exception as e:
return str(e)
# ---------------------------------------------------------------------
# Generate Radio Script
# ---------------------------------------------------------------------
def generate_script(user_input: str, pipeline_llama):
try:
system_prompt = (
"You are a top-tier radio imaging producer using Llama 3. "
"Take the user's concept and craft a short, creative promo script."
)
combined_prompt = f"{system_prompt}\nUser concept: {user_input}\nRefined script:"
result = pipeline_llama(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
return result[0]['generated_text'].split("Refined script:")[-1].strip()
except Exception as e:
return f"Error generating script: {e}"
# ---------------------------------------------------------------------
# Load MusicGen Model
# ---------------------------------------------------------------------
@spaces.GPU(duration=120)
def load_musicgen_model():
try:
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
return model, processor
except Exception as e:
return None, str(e)
# ---------------------------------------------------------------------
# Generate Audio
# ---------------------------------------------------------------------
@spaces.GPU(duration=120)
def generate_audio(prompt: str, audio_length: int, mg_model, mg_processor):
try:
mg_model.to("cuda") # Move the model to GPU
inputs = mg_processor(text=[prompt], padding=True, return_tensors="pt")
outputs = mg_model.generate(**inputs, max_new_tokens=audio_length)
mg_model.to("cpu") # Return the model to CPU
sr = mg_model.config.audio_encoder.sampling_rate
audio_data = outputs[0, 0].cpu().numpy()
normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
write(temp_wav.name, sr, normalized_audio)
return temp_wav.name
except Exception as e:
return f"Error generating audio: {e}"
# ---------------------------------------------------------------------
# Gradio Interface
# ---------------------------------------------------------------------
def radio_imaging_app(user_prompt, llama_model_id, hf_token, audio_length):
# Load Llama 3 Pipeline with Zero GPU
pipeline_llama = load_llama_pipeline_zero_gpu(llama_model_id, hf_token)
if isinstance(pipeline_llama, str):
return pipeline_llama, None
# Generate Script
script = generate_script(user_prompt, pipeline_llama)
# Load MusicGen
mg_model, mg_processor = load_musicgen_model()
if isinstance(mg_processor, str):
return script, mg_processor
# Generate Audio
audio_data = generate_audio(script, audio_length, mg_model, mg_processor)
if isinstance(audio_data, str):
return script, audio_data
return script, audio_data
# ---------------------------------------------------------------------
# Interface
# ---------------------------------------------------------------------
with gr.Blocks() as demo:
gr.Markdown("# 🎧 AI Radio Imaging with Llama 3 + MusicGen (Zero GPU)")
user_prompt = gr.Textbox(label="Enter your promo idea", placeholder="E.g., A 15-second hype jingle for a morning talk show.")
llama_model_id = gr.Textbox(label="Llama 3 Model ID", value="meta-llama/Meta-Llama-3-70B")
hf_token = gr.Textbox(label="Hugging Face Token", type="password")
audio_length = gr.Slider(label="Audio Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
generate_button = gr.Button("Generate Promo Script and Audio")
script_output = gr.Textbox(label="Generated Script")
audio_output = gr.Audio(label="Generated Audio", type="filepath")
generate_button.click(
fn=radio_imaging_app,
inputs=[user_prompt, llama_model_id, hf_token, audio_length],
outputs=[script_output, audio_output]
)
demo.launch(debug=True)
|