Spaces:

LTH001
/

isom-5240

Sleeping

File size: 2,492 Bytes

0546ecd
893d08c
dfc070a
cb3f72b
 
0546ecd
dfc070a
cb3f72b
0546ecd
cb3f72b
 
 
dfc070a
cb3f72b
0546ecd
 
cb3f72b
 
 
 
0546ecd
 
 
cb3f72b
63497b6
 
 
0546ecd
63497b6
0546ecd
63497b6
6803535
63497b6
6803535
 
0546ecd
63497b6
 
dfc070a
cb3f72b
63497b6
 
dfc070a
cb3f72b
dfc070a
cb3f72b
0546ecd
 
cb3f72b
 
 
 
 
 
 
 
 
 
 
 
63497b6
 
 
 
0546ecd
63497b6
 
 
dfc070a
cb3f72b

# app.py
import streamlit as st
from transformers import pipeline
from PIL import Image
import io
from scipy.io.wavfile import write as write_wav

def generate_image_caption(image):
    """Generates a caption for the given image"""
    img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
    result = img2caption(image)
    return result[0]['generated_text']

def text2story(text):
    """Generates a children's story from text input"""
    story_prompt = f"Create a funny 100-word story for 8-year-olds about: {text}. Include: 1) A silly character 2) Magical object 3) Sound effects 4) Happy ending"
    
    pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
    story_text = pipe(
        story_prompt,
        max_new_tokens=200,
        temperature=0.9,
        top_k=50
    )[0]['generated_text']
    return story_text.split("Happy ending")[-1].strip()

def story_to_speech(story_text):
    """Converts story text to audio using TTS"""
    tts_pipe = pipeline("text-to-speech", model="suno/bark-small")
    audio_output = tts_pipe(story_text[:400])
    
    # Convert to bytes using numpy directly
    audio_bytes = io.BytesIO()
    audio_np = (audio_output["audio"] * 32767).astype(np.int16)
    write_wav(audio_bytes, audio_output["sampling_rate"], audio_np)
    audio_bytes.seek(0)
    
    return audio_bytes

def main():
    st.title("📖 Image Story Generator with Audio")
    st.write("Upload an image to get a magical story read aloud!")

    uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

    if uploaded_image:
        image = Image.open(uploaded_image).convert("RGB")
        st.image(image, use_column_width=True)
        
        with st.spinner("✨ Analyzing image..."):
            caption = generate_image_caption(image)
        
        st.subheader("Image Understanding")
        st.write(caption)
        
        with st.spinner("📖 Writing story..."):
            story = text2story(caption)
        
        st.subheader("Magical Story")
        st.write(story)
        
        if st.button("🎧 Read Story Aloud"):
            with st.spinner("🔊 Generating audio..."):
                try:
                    audio_bytes = story_to_speech(story)
                    st.audio(audio_bytes, format="audio/wav")
                except Exception as e:
                    st.error(f"Error generating audio: {str(e)}")

if __name__ == "__main__":
    main()