Spaces:
Sleeping
Sleeping
File size: 4,137 Bytes
cb3f72b 893d08c dfc070a cb3f72b 63497b6 dfc070a cb3f72b 63497b6 cb3f72b dfc070a cb3f72b 63497b6 cb3f72b 63497b6 cb3f72b 63497b6 cb3f72b 63497b6 cb3f72b 63497b6 dfc070a cb3f72b 63497b6 dfc070a 63497b6 cb3f72b dfc070a cb3f72b 63497b6 cb3f72b 63497b6 cb3f72b 63497b6 cb3f72b 63497b6 cb3f72b 63497b6 cb3f72b 63497b6 dfc070a cb3f72b 63497b6 cb3f72b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# import part
import streamlit as st
from transformers import pipeline
from PIL import Image
import io
import numpy as np
import soundfile as sf # For handling audio file operations
# function part
def generate_image_caption(image):
"""Generates a caption for the given image using a pre-trained model.
Args:
image: PIL Image object
Returns:
str: Generated caption text
"""
# Initialize image-to-text pipeline with BLIP model
img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
result = img2caption(image)
return result[0]['generated_text']
def text2story(text):
"""Generates a children's story from text input using story generation model.
Args:
text: Input text prompt
Returns:
str: Generated story text
"""
# Craft prompt with specific requirements for children's stories
story_prompt = f"Create a funny 100-word story for 8-year-olds about: {text}. Include: "
story_prompt += "1) A silly character 2) Magical object 3) Sound effects 4) Happy ending"
# Initialize text generation pipeline
pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
# Generate story with controlled randomness parameters
story_text = pipe(
story_prompt,
max_new_tokens=200, # Limit story length
temperature=0.9, # Control randomness (higher = more creative)
top_k=50 # Limit vocabulary choices
)[0]['generated_text']
# Clean output by splitting at the required ending marker
return story_text.split("Happy ending")[-1].strip()
def story_to_speech(story_text):
"""Converts story text to audio using text-to-speech model.
Args:
story_text: Story text to convert
Returns:
BytesIO: Audio data in WAV format
"""
# Initialize Bark text-to-speech pipeline
tts_pipe = pipeline("text-to-speech", model="suno/bark-small")
# Generate audio array (numpy array of sound samples)
audio_output = tts_pipe(story_text, max_length=400) # Limit text length for stability
# Convert numpy array to playable audio bytes
audio_bytes = io.BytesIO()
sf.write(
audio_bytes,
audio_output["audio"],
audio_output["sampling_rate"],
format='WAV'
)
audio_bytes.seek(0) # Reset pointer for Streamlit audio player
return audio_bytes
def main():
"""Main function for Streamlit application workflow"""
# Configure page header
st.title("π Image Story Generator with Audio")
st.write("Upload an image to get a magical story read aloud!")
# Image upload widget (supports JPG/PNG)
uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_image:
# Process image
image = Image.open(uploaded_image).convert("RGB") # Ensure RGB format
st.image(image, use_column_width=True) # Display uploaded image
# Image analysis section
with st.spinner("β¨ Analyzing image..."):
caption = generate_image_caption(image)
# Display image understanding
st.subheader("Image Understanding")
st.write(caption)
# Story generation section
with st.spinner("π Writing story..."):
story = text2story(caption)
# Display generated story
st.subheader("Magical Story")
st.write(story)
# Audio generation section
if st.button("π§ Read Story Aloud"):
with st.spinner("π Generating audio..."):
try:
# Convert story to audio (trim to 400 characters for model stability)
audio_bytes = story_to_speech(story[:400])
# Display audio player
st.audio(audio_bytes, format="audio/wav")
except Exception as e:
st.error(f"Error generating audio: {str(e)}")
if __name__ == "__main__":
# Start the Streamlit application
main() |