Spaces:
Sleeping
Sleeping
# ====================================== | |
# Package Import | |
# ====================================== | |
import streamlit as st | |
from PIL import Image | |
import time | |
from transformers import pipeline | |
from typing import Tuple | |
from datasets import load_dataset | |
import soundfile as sf | |
import torch | |
# ====================================== | |
# Basic Initialization | |
# ====================================== | |
# Initialize image captioning pipeline with pretrained model | |
_image_caption_pipeline = pipeline( | |
task="image-to-text", | |
model="cnmoro/tiny-image-captioning" | |
) | |
# Global model configuration constants | |
_text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen1.5-0.5B",max_new_tokens=100) | |
# Initialize TTS components once to avoid reloading | |
_SPEECH_PIPELINE = pipeline("text-to-speech", model="microsoft/speecht5_tts") | |
_EMBEDDINGS_DATASET = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") | |
_DEFAULT_SPEAKER_EMBEDDING = torch.tensor(_EMBEDDINGS_DATASET[7306]["xvector"]).unsqueeze(0) | |
# ====================================== | |
# Function settings | |
# ====================================== | |
def generate_image_caption(input_image): | |
""" | |
Generate a textual description for an input image using a pretrained model. | |
Args: | |
input_image (Union[PIL.Image.Image, str]): Image to process. Can be either: | |
- A PIL Image object | |
- A string containing a filesystem path to an image file | |
Returns: | |
str: Generated caption text in natural language | |
Example: | |
>>> from PIL import Image | |
>>> img = Image.open("photo.jpg") | |
>>> caption = generate_image_caption(img) | |
>>> print(f"Caption: {caption}") | |
""" | |
# Process image through the captioning pipeline | |
inference_results = _image_caption_pipeline(input_image) | |
# Extract text from the first (and only) result dictionary | |
caption_text = inference_results[0]['generated_text'] | |
return caption_text | |
def generate_story_content(system_prompt: str, user_prompt: str) -> str: | |
""" | |
Generates a children's story based on provided system and user prompts. | |
Args: | |
system_prompt: Defines the assistant's role and writing constraints | |
user_prompt: Describes the story scenario and specific elements to include | |
Returns: | |
Generated story text without any thinking process metadata | |
Raises: | |
RuntimeError: If text generation fails at any stage | |
Example: | |
>>> story = generate_story_content( | |
... "You are a helpful children's author...", | |
... "Kids playing with dogs in a sunny meadow..." | |
... ) | |
""" | |
try: | |
# Prepare chat message structure | |
conversation_history = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": user_prompt}, | |
] | |
# Generate the story | |
story=_text_generation_pipeline(conversation_history) | |
# Extract the stroy result | |
stroy_result=story[0]["generated_text"][2]["content"] | |
# Process and clean output | |
return stroy_result | |
except Exception as error: | |
raise RuntimeError(f"Story generation failed: {str(error)}") from error | |
def generate_audio_from_story(story_text: str, output_path: str = "output.wav") -> str: | |
""" | |
Convert text story to speech audio file using text-to-speech synthesis. | |
Args: | |
story_text: Input story text to synthesize | |
output_path: Path to save generated audio (default: 'output.wav') | |
Returns: | |
Path to generated audio file | |
Raises: | |
ValueError: For empty/invalid input text | |
RuntimeError: If audio generation fails | |
Example: | |
>>> generate_audio_from_story("Children playing in the park", "story_audio.wav") | |
'story_audio.wav' | |
""" | |
# Validate input text | |
if not isinstance(story_text, str) or not story_text.strip(): | |
raise ValueError("Input story text must be a non-empty string") | |
try: | |
# Generate speech with default speaker profile | |
speech_output = _SPEECH_PIPELINE( | |
story_text, | |
forward_params={"speaker_embeddings": _DEFAULT_SPEAKER_EMBEDDING} | |
) | |
# Save audio to WAV file | |
sf.write( | |
output_path, | |
speech_output["audio"], | |
samplerate=speech_output["sampling_rate"] | |
) | |
return output_path | |
except Exception as error: | |
raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error | |
# ====================================== | |
# Page Configuration & Custom Styling | |
# ====================================== | |
st.set_page_config( | |
page_title="Magic Story Generator", | |
page_icon="๐ง", | |
layout="centered", | |
initial_sidebar_state="collapsed" | |
) | |
# Custom CSS styling for child-friendly interface | |
st.markdown(""" | |
<style> | |
/* Prompt buttons styling */ | |
.prompt-btn { | |
background: #4CAF50 !important; | |
border-radius: 15px !important; | |
padding: 15px 30px !important; | |
font-size: 1.1rem !important; | |
margin: 10px; | |
} | |
/* Story container styling */ | |
.story-container { | |
background: #FFF3E0; | |
border-radius: 20px; | |
padding: 25px; | |
margin: 20px 0; | |
box-shadow: 0 4px 8px rgba(0,0,0,0.1); | |
} | |
/* Image caption styling */ | |
.image-caption { | |
border-left: 4px solid #4CAF50; | |
padding-left: 1rem; | |
font-size: 1.1rem; | |
color: #2c3e50; | |
margin: 1.5rem 0; | |
} | |
/* Progress spinner styling */ | |
.stSpinner > div { | |
font-size: 1.2rem !important; | |
color: #9C27B0 !important; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# ====================================== | |
# Main Application Interface | |
# ====================================== | |
st.title("๐ง Welcome to Magic Story Maker!") | |
# File upload section | |
with st.container(): | |
st.subheader("Step 1: Upload Your Picture") | |
uploaded_image = st.file_uploader("Choose an image...", | |
type=["png", "jpg", "jpeg"], | |
label_visibility="collapsed") | |
# Initialize session state for confirmation status | |
if 'confirmed' not in st.session_state: | |
st.session_state.confirmed = False | |
# Main processing flow | |
if uploaded_image is not None: | |
# Display uploaded image | |
with st.spinner("โจ Magical image processing..."): | |
image = Image.open(uploaded_image) | |
st.image(image, caption="Your Magical Image", use_column_width=True) | |
# Prompt selection section | |
with st.container(): | |
st.subheader("Step 2: Choose Story Style") | |
# Create three columns for prompt buttons | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
if st.button("๐ Learning Story", | |
help="Generate educational story with life lessons", | |
key="edu_btn"): | |
st.session_state.selected_prompt = "educational" | |
st.session_state.confirmed = False | |
with col2: | |
if st.button("๐ Fantasy Adventure", | |
help="Create magical adventure story", | |
key="fantasy_btn"): | |
st.session_state.selected_prompt = "adventure" | |
st.session_state.confirmed = False | |
with col3: | |
if st.button("๐ป Animal Friends", | |
help="Make story about friendly animals", | |
key="animal_btn"): | |
st.session_state.selected_prompt = "animal" | |
st.session_state.confirmed = False | |
# Add confirmation button | |
with st.container(): | |
st.subheader("Step 3: Confirm Selection") | |
if st.button("๐ฎ Start Magic Creation!", | |
help="Click to generate story after choosing style", | |
type="primary"): | |
st.session_state.confirmed = True | |
# Only show generation when confirmed | |
if st.session_state.get('confirmed', False): | |
# Generate image caption with loading state | |
with st.spinner("๐ Analyzing image and generating description..."): | |
image_caption = generate_image_caption(image) | |
# Display caption results using CSS class | |
st.subheader("๐ Image Understanding") | |
st.markdown(f'<div class="story-container image-caption">{image_caption}</div>', | |
unsafe_allow_html=True) | |
st.write("") # Add spacing | |
# Define prompt templates | |
PROMPT_TEMPLATES = { | |
"educational": { | |
"system": "You are a children's educator. Create a simple 100-word story that teaches basic life skills or moral lessons.", | |
"icon": "๐" | |
}, | |
"adventure": { | |
"system": "You are a fantasy writer. Create a 100-word magical adventure story suitable for children.", | |
"icon": "๐ " | |
}, | |
"animal": { | |
"system": "You are an animal expert. Create a 100-word story about friendly animals learning together.", | |
"icon": "๐ป" | |
} | |
} | |
# Safe access with default fallback | |
selected_prompt = st.session_state.get("selected_prompt", "educational") | |
# Story generation section | |
with st.spinner(f"{PROMPT_TEMPLATES[selected_prompt]['icon']} Creating your story..."): | |
# Generate story content using the caption | |
selected_template = PROMPT_TEMPLATES[selected_prompt] | |
story_text = generate_story_content( | |
system_prompt=selected_template["system"], | |
user_prompt=image_caption | |
) | |
# Display formatted story | |
st.subheader("โจ Your Magical Story") | |
st.markdown(f'<div class="story-container">{story_text}</div>', | |
unsafe_allow_html=True) | |
# Audio generation section | |
with st.spinner("๐ฎ Preparing story narration..."): | |
audio_file = generate_audio_from_story(story_text, "story_audio.wav") | |
st.subheader("๐ง Listen to Your Story") | |
st.audio(audio_file) | |
else: | |
# Show waiting message | |
st.info("โน๏ธ Please select a story style and click the confirmation button to continue") | |
# Help section | |
st.markdown("---") | |
st.subheader("๐ How to Use:") | |
st.info(""" | |
1. Upload any picture (animals, nature, or people work best!) | |
2. Choose your favorite story style | |
3. Click the confirmation button | |
4. Wait for image analysis to complete | |
5. Enjoy your personalized story and audio! | |
""") |