File size: 2,492 Bytes
0546ecd 893d08c dfc070a cb3f72b 0546ecd dfc070a cb3f72b 0546ecd cb3f72b dfc070a cb3f72b 0546ecd cb3f72b 0546ecd cb3f72b 63497b6 0546ecd 63497b6 0546ecd 63497b6 6803535 63497b6 6803535 0546ecd 63497b6 dfc070a cb3f72b 63497b6 dfc070a cb3f72b dfc070a cb3f72b 0546ecd cb3f72b 63497b6 0546ecd 63497b6 dfc070a cb3f72b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# app.py
import streamlit as st
from transformers import pipeline
from PIL import Image
import io
from scipy.io.wavfile import write as write_wav
def generate_image_caption(image):
"""Generates a caption for the given image"""
img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
result = img2caption(image)
return result[0]['generated_text']
def text2story(text):
"""Generates a children's story from text input"""
story_prompt = f"Create a funny 100-word story for 8-year-olds about: {text}. Include: 1) A silly character 2) Magical object 3) Sound effects 4) Happy ending"
pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
story_text = pipe(
story_prompt,
max_new_tokens=200,
temperature=0.9,
top_k=50
)[0]['generated_text']
return story_text.split("Happy ending")[-1].strip()
def story_to_speech(story_text):
"""Converts story text to audio using TTS"""
tts_pipe = pipeline("text-to-speech", model="suno/bark-small")
audio_output = tts_pipe(story_text[:400])
# Convert to bytes using numpy directly
audio_bytes = io.BytesIO()
audio_np = (audio_output["audio"] * 32767).astype(np.int16)
write_wav(audio_bytes, audio_output["sampling_rate"], audio_np)
audio_bytes.seek(0)
return audio_bytes
def main():
st.title("π Image Story Generator with Audio")
st.write("Upload an image to get a magical story read aloud!")
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_image:
image = Image.open(uploaded_image).convert("RGB")
st.image(image, use_column_width=True)
with st.spinner("β¨ Analyzing image..."):
caption = generate_image_caption(image)
st.subheader("Image Understanding")
st.write(caption)
with st.spinner("π Writing story..."):
story = text2story(caption)
st.subheader("Magical Story")
st.write(story)
if st.button("π§ Read Story Aloud"):
with st.spinner("π Generating audio..."):
try:
audio_bytes = story_to_speech(story)
st.audio(audio_bytes, format="audio/wav")
except Exception as e:
st.error(f"Error generating audio: {str(e)}")
if __name__ == "__main__":
main() |