|
|
|
import streamlit as st |
|
from transformers import pipeline |
|
from PIL import Image |
|
import io |
|
from scipy.io.wavfile import write as write_wav |
|
|
|
def generate_image_caption(image): |
|
"""Generates a caption for the given image""" |
|
img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") |
|
result = img2caption(image) |
|
return result[0]['generated_text'] |
|
|
|
def text2story(text): |
|
"""Generates a children's story from text input""" |
|
story_prompt = f"Create a funny 100-word story for 8-year-olds about: {text}. Include: 1) A silly character 2) Magical object 3) Sound effects 4) Happy ending" |
|
|
|
pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2") |
|
story_text = pipe( |
|
story_prompt, |
|
max_new_tokens=200, |
|
temperature=0.9, |
|
top_k=50 |
|
)[0]['generated_text'] |
|
return story_text.split("Happy ending")[-1].strip() |
|
|
|
def story_to_speech(story_text): |
|
"""Converts story text to audio using TTS""" |
|
tts_pipe = pipeline("text-to-speech", model="suno/bark-small") |
|
audio_output = tts_pipe(story_text[:400]) |
|
|
|
|
|
audio_bytes = io.BytesIO() |
|
audio_np = (audio_output["audio"] * 32767).astype(np.int16) |
|
write_wav(audio_bytes, audio_output["sampling_rate"], audio_np) |
|
audio_bytes.seek(0) |
|
|
|
return audio_bytes |
|
|
|
def main(): |
|
st.title("π Image Story Generator with Audio") |
|
st.write("Upload an image to get a magical story read aloud!") |
|
|
|
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) |
|
|
|
if uploaded_image: |
|
image = Image.open(uploaded_image).convert("RGB") |
|
st.image(image, use_column_width=True) |
|
|
|
with st.spinner("β¨ Analyzing image..."): |
|
caption = generate_image_caption(image) |
|
|
|
st.subheader("Image Understanding") |
|
st.write(caption) |
|
|
|
with st.spinner("π Writing story..."): |
|
story = text2story(caption) |
|
|
|
st.subheader("Magical Story") |
|
st.write(story) |
|
|
|
if st.button("π§ Read Story Aloud"): |
|
with st.spinner("π Generating audio..."): |
|
try: |
|
audio_bytes = story_to_speech(story) |
|
st.audio(audio_bytes, format="audio/wav") |
|
except Exception as e: |
|
st.error(f"Error generating audio: {str(e)}") |
|
|
|
if __name__ == "__main__": |
|
main() |