Spaces:

Bils
/

AIPromoStudio

Sleeping

App Files Files Community

Bils commited on Jan 9

Commit

3fe530b

verified ·

1 Parent(s): 4a143c9

Update app.py

Browse files

Files changed (1) hide show

app.py +224 -178

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import streamlit as st
 import torch
 import scipy.io.wavfile
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
@@ -8,149 +10,200 @@ from transformers import (
     AutoProcessor,
     MusicgenForConditionalGeneration
 )
 # ---------------------------------------------------------------------
-# Page Configuration
 # ---------------------------------------------------------------------
 st.set_page_config(
     page_icon="🎧",
-    layout="wide",
-    page_title="Radio Imaging Audio Generator - Llama 3",
-    initial_sidebar_state="expanded",
 )
 # ---------------------------------------------------------------------
-# Custom CSS for a Catchy UI
 # ---------------------------------------------------------------------
-CUSTOM_CSS = """
 <style>
 body {
-    background-color: #FAFCFF;
     color: #1F2937;
-    font-family: 'Segoe UI', Tahoma, sans-serif;
 }
 h1, h2, h3, h4, h5, h6 {
     color: #3B82F6;
     margin-bottom: 0.5em;
 }
 .stButton>button {
     background-color: #3B82F6 !important;
     color: #FFFFFF !important;
-    border-radius: 8px !important;
-    font-size: 16px !important;
-    margin: 0.5em 0;
 }
 .sidebar .sidebar-content {
     background: #E0F2FE;
 }
-.material-card {
-    border: 1px solid #D1D5DB;
-    border-radius: 8px;
-    padding: 1rem;
-    margin-bottom: 1rem;
-    background-color: #ffffff;
 }
 .footer-note {
     text-align: center;
-    opacity: 0.6;
     font-size: 14px;
-    margin-top: 30px;
 }
 </style>
 """
-st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
 # ---------------------------------------------------------------------
-# Header Section
 # ---------------------------------------------------------------------
-st.markdown(
     """
-    <h1>🎙 Radio Imaging Audio Generator <span style="font-size: 24px; color: #F59E0B;">(Beta with Llama 3)</span></h1>
-    <p style='font-size:18px;'>
-        Generate custom radio ads, station promos, and jingles in multiple languages
-        using the **hypothetical Llama 3.3** Instruct model & MusicGen!
-    </p>
-    """,
-    unsafe_allow_html=True
-)
 st.markdown("---")
 # ---------------------------------------------------------------------
-# Instructions Section
 # ---------------------------------------------------------------------
-with st.expander("📘 How to Use This Web App"):
     st.markdown(
         """
-        1. **Enter a concept** in any language: Describe the style, mood, length, etc.
-        2. **Choose Language**: If you want a Spanish script, select Spanish below (multi-language).
-        3. **Refine with Llama 3**: Let the model transform your brief into a catchy script.
-        4. **Set Audio Options**: Choose a style (Rock, Pop, Classical...) and max tokens for MusicGen output.
-        5. **Generate Audio**: Listen & optionally download or upload the WAV file.
-        **Future Enhancements**:
-        - **User Authentication**: Restrict access or track usage with logins.
-        - **Advanced Fine-tuning**: Adjust Llama or MusicGen for specialized station branding.
-        - **Cloud Storage**: Upload final WAVs to a server or cloud bucket for easy sharing.
         """
     )
 # ---------------------------------------------------------------------
-# Sidebar: Model Selection & Options
 # ---------------------------------------------------------------------
 with st.sidebar:
-    st.header("🔧 Model & Audio Config")
-    # Llama 3 model ID on Hugging Face (hypothetical)
     llama_model_id = st.text_input(
-        "Llama 3 Instruct Model ID",
-        value="meta-llama/Llama-3.3-70B-Instruct",
-        help="Requires license acceptance on Hugging Face, if/when available."
     )
     device_option = st.selectbox(
         "Hardware Device",
         ["auto", "cpu"],
-        help="If running locally with a GPU, choose 'auto'. CPU-only might be slow for large models."
     )
-    st.markdown("---")
-    # Multi-language prompt
-    language = st.selectbox(
-        "Choose Output Language",
-        ["English", "Spanish", "French", "German", "Other (explain in your prompt)"]
     )
-    st.markdown("---")
-    # Audio style and tokens
     music_style = st.selectbox(
         "Preferred Music Style",
         ["Pop", "Rock", "Electronic", "Classical", "Hip-Hop", "Reggae", "Ambient", "Other"]
     )
-    audio_tokens = st.slider(
-        "MusicGen Max Tokens (Approx. Track Length)",
-        min_value=128, max_value=1024, value=512, step=64
-    )
 # ---------------------------------------------------------------------
-# Prompt Input
 # ---------------------------------------------------------------------
-st.markdown("## ✍🏻 Write Your Concept Brief")
 prompt = st.text_area(
-    "Describe the radio imaging or jingle you want to create.",
-    placeholder="e.g. 'An energetic 15-second pop jingle in Spanish for a morning radio show...'"
 )
 # ---------------------------------------------------------------------
-# Text Generation with Llama 3
 # ---------------------------------------------------------------------
 @st.cache_resource
 def load_llama_pipeline(model_id: str, device: str):
     """
-    Load the Llama or other open-source model as a text-generation pipeline.
-    This is hypothetical for Llama 3.3.
-    Must accept license on HF if the model is restricted.
     """
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
@@ -158,135 +211,128 @@ def load_llama_pipeline(model_id: str, device: str):
         torch_dtype=torch.float16 if device == "auto" else torch.float32,
         device_map=device
     )
-    gen_pipeline = pipeline(
         "text-generation",
         model=model,
         tokenizer=tokenizer,
         device_map=device
     )
-    return gen_pipeline
-def generate_description(user_prompt: str, pipeline_gen, language_choice: str):
     """
-    Use the pipeline to create a refined description for MusicGen,
-    with multi-language capabilities.
     """
-    # Instruction for Llama (system prompt):
-    system_prompt = (
-        "You are a creative ad copywriter specialized in radio imaging. "
-        "Refine the user's concept into a concise script. "
-        "Incorporate the language choice and creative elements for a promotional audio spot."
     )
-    # Combine user prompt + language + the system instructions
-    combined_prompt = (
-        f"{system_prompt}\n"
-        f"Language to use: {language_choice}\n"
-        f"User Concept: {user_prompt}\n"
-        f"Your refined ad script:"
-    )
-    result = pipeline_gen(
-        combined_prompt,
         max_new_tokens=300,
         do_sample=True,
         temperature=0.8
     )
-    generated_text = result[0]["generated_text"]
-    # Attempt to isolate the script portion
-    if "script:" in generated_text.lower():
-        generated_text = generated_text.split("script:", 1)[-1].strip()
-    # Add a sign-off or brand line
-    generated_text += "\n\n(Generated by Radio Imaging Audio Generator - Powered by Llama 3)"
-    return generated_text
-# Button: Generate Description
-if st.button("📄 Refine Description with Llama 3"):
-    if not prompt.strip():
-        st.error("Please provide a concept before generating a description.")
-    else:
-        with st.spinner("Generating a refined description..."):
-            try:
-                pipeline_llama = load_llama_pipeline(llama_model_id, device_option)
-                refined_text = generate_description(prompt, pipeline_llama, language)
-                st.session_state['refined_prompt'] = refined_text
-                st.success("Description successfully refined!")
-                st.write(refined_text)
-                st.download_button(
-                    "📥 Download Description",
-                    refined_text,
-                    file_name="refined_description.txt"
-                )
-            except Exception as e:
-                st.error(f"Error while generating with Llama 3: {e}")
-st.markdown("---")
 # ---------------------------------------------------------------------
-# MusicGen: Generate Audio
 # ---------------------------------------------------------------------
-@st.cache_resource
-def load_musicgen_model():
-    """Load and cache the MusicGen model and processor."""
-    mg_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
-    mg_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
-    return mg_model, mg_processor
-if st.button("▶ Generate Audio with MusicGen"):
-    if 'refined_prompt' not in st.session_state or not st.session_state['refined_prompt']:
-        st.error("Please generate or have a refined script before creating audio.")
-    else:
-        descriptive_text = st.session_state['refined_prompt']
-        with st.spinner("Generating your audio..."):
-            try:
-                musicgen_model, processor = load_musicgen_model()
-                # Incorporate the style preference into the final text
-                final_text_for_music = f"{descriptive_text}\nStyle preference: {music_style}"
-                # Use the refined prompt + style as input
-                inputs = processor(
-                    text=[final_text_for_music],
-                    padding=True,
-                    return_tensors="pt"
-                )
-                # Adjust max_new_tokens for track length
-                audio_values = musicgen_model.generate(**inputs, max_new_tokens=audio_tokens)
-                sampling_rate = musicgen_model.config.audio_encoder.sampling_rate
-                # Save & display the audio
-                audio_filename = f"radio_imaging_output_{music_style.lower()}.wav"
-                scipy.io.wavfile.write(
-                    audio_filename,
-                    rate=sampling_rate,
-                    data=audio_values[0, 0].numpy()
-                )
-                st.success("Audio successfully generated!")
-                st.audio(audio_filename)
-                # Optionally, prompt to "Upload to Cloud" or "Save to Directory"
-                if st.checkbox("Upload this WAV to cloud storage? (Demo)"):
-                    with st.spinner("Uploading... (This is a placeholder)"):
-                        # Pseudocode for your custom logic, e.g.:
-                        # upload_to_s3(audio_filename, bucket_name="radio-imaging-bucket")
-                        st.success("File uploaded to your cloud storage (placeholder).")
-            except Exception as e:
-                st.error(f"Error while generating audio: {e}")
 # ---------------------------------------------------------------------
-# Footer Section
 # ---------------------------------------------------------------------
 st.markdown("---")
 st.markdown(
-    "<div class='footer-note'>"
-    "✅ Built with a hypothetical Llama 3.3 & MusicGen · "
-    "Multi-language, advanced styles, and a hint of future expansions · "
-    "Happy producing!"
-    "</div>",
     unsafe_allow_html=True
 )
-# Hide Streamlit's default menu and footer if you wish
-st.markdown("<style>#MainMenu {visibility: hidden;} footer {visibility: hidden;}</style>", unsafe_allow_html=True)

 import streamlit as st
 import torch
 import scipy.io.wavfile
+import requests
+from io import BytesIO
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
     AutoProcessor,
     MusicgenForConditionalGeneration
 )
+from streamlit_lottie import st_lottie  # pip install streamlit-lottie
 # ---------------------------------------------------------------------
+# 1) Page Configuration
 # ---------------------------------------------------------------------
 st.set_page_config(
+    page_title="Modern Radio Imaging Generator - Llama 3 & MusicGen",
     page_icon="🎧",
+    layout="wide"
 )
 # ---------------------------------------------------------------------
+# 2) Custom CSS for a Sleek, Modern Look
 # ---------------------------------------------------------------------
+MODERN_CSS = """
 <style>
+/* Body styling */
 body {
+    background: linear-gradient(to bottom right, #ffffff, #f3f4f6);
+    font-family: 'Helvetica Neue', Arial, sans-serif;
     color: #1F2937;
 }
+/* Make the container narrower for a sleek look */
+.block-container {
+    max-width: 1100px;
+}
+/* Heading style */
 h1, h2, h3, h4, h5, h6 {
     color: #3B82F6;
     margin-bottom: 0.5em;
 }
+/* Buttons */
 .stButton>button {
     background-color: #3B82F6 !important;
     color: #FFFFFF !important;
+    border-radius: 0.8rem !important;
+    font-size: 1rem !important;
+    padding: 0.6rem 1.2rem !important;
 }
+/* Sidebar customization */
 .sidebar .sidebar-content {
     background: #E0F2FE;
 }
+/* Text input areas */
+textarea, input, select {
+    border-radius: 0.5rem !important;
+}
+/* Animate some elements on hover (just an example) */
+.stButton>button:hover {
+    background-color: #2563EB !important;
+    transition: background-color 0.3s ease-in-out;
+}
+/* Lottie container style */
+.lottie-container {
+    display: flex;
+    justify-content: center;
+    margin: 1rem 0;
 }
+/* Footer note */
 .footer-note {
     text-align: center;
+    opacity: 0.7;
     font-size: 14px;
+    margin-top: 2rem;
 }
+/* Hide default Streamlit branding if desired */
+#MainMenu, footer {visibility: hidden;}
 </style>
 """
+st.markdown(MODERN_CSS, unsafe_allow_html=True)
 # ---------------------------------------------------------------------
+# 3) Lottie Animation Loader
 # ---------------------------------------------------------------------
+@st.cache_data
+def load_lottie_url(url: str):
     """
+    Loads a Lottie animation JSON from a given URL.
+    """
+    r = requests.get(url)
+    if r.status_code != 200:
+        return None
+    return r.json()
+# Example Lottie animations (feel free to replace with your own):
+LOTTIE_URL_HEADER = "https://assets1.lottiefiles.com/packages/lf20_amhnytsm.json"  # music-themed animation
+lottie_music = load_lottie_url(LOTTIE_URL_HEADER)
+# ---------------------------------------------------------------------
+# 4) Header & Intro with a Lottie Animation
+# ---------------------------------------------------------------------
+col_header1, col_header2 = st.columns([3, 2], gap="medium")
+with col_header1:
+    st.markdown(
+        """
+        <h1>🎙 Radio Imaging Generator (Beta)</h1>
+        <p style='font-size:18px;'>
+            Create catchy radio promos, ads, and station jingles with
+            a modern UI, Llama 3 text generation, and MusicGen audio!
+        </p>
+        """,
+        unsafe_allow_html=True
+    )
+with col_header2:
+    if lottie_music:
+        with st.container():
+            st_lottie(lottie_music, height=180, key="header_lottie")
+    else:
+        # Fallback if Lottie fails to load
+        st.markdown("*(Animation unavailable)*")
 st.markdown("---")
 # ---------------------------------------------------------------------
+# 5) Explanation in an Expander
 # ---------------------------------------------------------------------
+with st.expander("📘 How to Use This App"):
     st.markdown(
         """
+        **Steps**:
+        1. **Model & Language**: In the sidebar, choose the Llama model ID (e.g. a real Llama 2) and the device.
+        2. **Enter Concept**: Provide a short description of the ad or jingle you want.
+        3. **Refine**: Click on "Refine with Llama 3" to get a polished script in your chosen language or style.
+        4. **Generate Audio**: Use MusicGen to create a short audio snippet from that refined script.
+        5. **Listen & Download**: Enjoy or download the result as a WAV file.
+        **Note**:
+        - If "Llama 3.3" doesn't exist, you'll get errors. Use a real model from [Hugging Face](https://huggingface.co/models)
+          like `meta-llama/Llama-2-7b-chat-hf`.
+        - Some large models require GPU (or specialized hardware) for feasible speeds.
+        - This example uses [streamlit-lottie](https://github.com/andfanilo/streamlit-lottie) for animation.
         """
     )
 # ---------------------------------------------------------------------
+# 6) Sidebar Configuration
 # ---------------------------------------------------------------------
 with st.sidebar:
+    st.header("🔧 Llama 3 & Audio Settings")
+    # Model input
     llama_model_id = st.text_input(
+        "Llama Model ID",
+        value="meta-llama/Llama-3.3-70B-Instruct",  # Fictitious, please replace with a real model
+        help="Replace with a real model, e.g. meta-llama/Llama-2-7b-chat-hf"
     )
     device_option = st.selectbox(
         "Hardware Device",
         ["auto", "cpu"],
+        index=0,
+        help="If local GPU is available, choose 'auto'. CPU might be slow for large models."
     )
+    # Multi-language or style
+    language_choice = st.selectbox(
+        "Choose Language",
+        ["English", "Spanish", "French", "German", "Other (describe in prompt)"]
     )
+    # Music style & max tokens
     music_style = st.selectbox(
         "Preferred Music Style",
         ["Pop", "Rock", "Electronic", "Classical", "Hip-Hop", "Reggae", "Ambient", "Other"]
     )
+    audio_tokens = st.slider("MusicGen Max Tokens (Track Length)", 128, 1024, 512, 64)
 # ---------------------------------------------------------------------
+# 7) Prompt for the Radio Imaging Concept
 # ---------------------------------------------------------------------
+st.markdown("## ✍️ Your Radio Concept")
 prompt = st.text_area(
+    "Describe the theme, audience, length, energy level, etc.",
+    placeholder="E.g. 'A high-energy 10-second pop jingle for a morning radio show...'"
 )
 # ---------------------------------------------------------------------
+# 8) Load Llama Pipeline
 # ---------------------------------------------------------------------
 @st.cache_resource
 def load_llama_pipeline(model_id: str, device: str):
     """
+    Loads the specified Llama or other HF model as a text-generation pipeline.
+    This references a hypothetical Llama 3.3.
     """
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         torch_dtype=torch.float16 if device == "auto" else torch.float32,
         device_map=device
     )
+    pipe = pipeline(
         "text-generation",
         model=model,
         tokenizer=tokenizer,
         device_map=device
     )
+    return pipe
+def refine_description_with_llama(user_prompt: str, pipeline_llama, lang: str):
     """
+    Create a polished script using Llama.
+    Incorporate a language preference or style instructions.
     """
+    system_msg = (
+        "You are an expert radio imaging script writer. "
+        "Refine the user's concept into a concise, compelling piece. "
+        "Ensure to reflect any language or style requests."
     )
+    combined = f"{system_msg}\nLanguage: {lang}\nUser Concept: {user_prompt}\nRefined Script:"
+    result = pipeline_llama(
+        combined,
         max_new_tokens=300,
         do_sample=True,
         temperature=0.8
     )
+    text = result[0]["generated_text"]
+    # Attempt to isolate the final portion
+    if "Refined Script:" in text:
+        text = text.split("Refined Script:")[-1].strip()
+    text += "\n\n(Generated with Llama 3 - Modern Radio Generator)"
+    return text
 # ---------------------------------------------------------------------
+# 9) Buttons & Outputs
 # ---------------------------------------------------------------------
+col_gen1, col_gen2 = st.columns(2)
+with col_gen1:
+    if st.button("📄 Refine with Llama 3"):
+        if not prompt.strip():
+            st.error("Please provide a brief concept first.")
+        else:
+            with st.spinner("Refining your script..."):
+                try:
+                    pipeline_llama = load_llama_pipeline(llama_model_id, device_option)
+                    refined_text = refine_description_with_llama(prompt, pipeline_llama, language_choice)
+                    st.session_state['refined_prompt'] = refined_text
+                    st.success("Refined text generated!")
+                    st.write(refined_text)
+                    st.download_button(
+                        "💾 Download Script",
+                        refined_text,
+                        file_name="refined_jingle_script.txt"
+                    )
+                except Exception as e:
+                    st.error(f"Error: {e}")
+with col_gen2:
+    if st.button("▶ Generate Audio with MusicGen"):
+        if 'refined_prompt' not in st.session_state or not st.session_state['refined_prompt']:
+            st.error("No refined prompt found. Please generate/refine your script first.")
+        else:
+            final_text_for_music = st.session_state['refined_prompt']
+            final_text_for_music += f"\nPreferred style: {music_style}"
+            with st.spinner("Generating audio..."):
+                try:
+                    mg_model, mg_processor = None, None
+                    # Load MusicGen model once
+                    mg_model, mg_processor = load_musicgen_model()
+                    inputs = mg_processor(
+                        text=[final_text_for_music],
+                        padding=True,
+                        return_tensors="pt"
+                    )
+                    audio_output = mg_model.generate(**inputs, max_new_tokens=audio_tokens)
+                    sr = mg_model.config.audio_encoder.sampling_rate
+                    audio_filename = f"radio_imaging_{music_style.lower()}.wav"
+                    scipy.io.wavfile.write(
+                        audio_filename,
+                        rate=sr,
+                        data=audio_output[0, 0].numpy()
+                    )
+                    st.success("Audio generated! Listen below:")
+                    st.audio(audio_filename)
+                    # Optional Save/Upload prompt
+                    if st.checkbox("Upload this WAV to a cloud (demo)?"):
+                        with st.spinner("Uploading..."):
+                            # Placeholder for your own S3 or cloud logic
+                            st.success("Uploaded (placeholder).")
+                except Exception as e:
+                    st.error(f"Error generating audio: {e}")
 # ---------------------------------------------------------------------
+# 10) Load & Cache MusicGen
+# ---------------------------------------------------------------------
+@st.cache_resource
+def load_musicgen_model():
+    """
+    Load and cache the MusicGen model & processor.
+    Using 'facebook/musicgen-small' as example.
+    """
+    mgm = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+    mgp = AutoProcessor.from_pretrained("facebook/musicgen-small")
+    return mgm, mgp
+# ---------------------------------------------------------------------
+# 11) Footer
 # ---------------------------------------------------------------------
 st.markdown("---")
 st.markdown(
+    """
+    <div class='footer-note'>
+        © 2025 Modern Radio Generator - Built with Llama & MusicGen |
+        <a href='https://example.com' target='_blank'>YourCompany</a>
+    </div>
+    """,
     unsafe_allow_html=True
 )