Spaces:
Sleeping
Sleeping
File size: 10,795 Bytes
ac70fac 70fa8c1 243f6f7 70fa8c1 ac70fac 70fa8c1 4204a24 70fa8c1 5bd24ba 70fa8c1 ac70fac 70fa8c1 243f6f7 70fa8c1 5bd24ba 70fa8c1 5bd24ba 70fa8c1 ac70fac 5227f85 ac70fac 70fa8c1 ac70fac 1ccbf4f ac70fac eeb972c ac70fac 70fa8c1 ac70fac 1ccbf4f 70fa8c1 ac70fac 5227f85 ac70fac 70fa8c1 ac70fac 70fa8c1 ac70fac eeb972c ac70fac 5227f85 ac70fac 5227f85 ac70fac 5227f85 eeb972c 5227f85 c1d7ca6 5227f85 c1d7ca6 5227f85 c1d7ca6 5227f85 ac70fac 5227f85 ac70fac 5227f85 eeb972c 5227f85 ac70fac 5227f85 ac70fac 5227f85 eeb972c ac70fac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 |
# ======================================
# Package Import
# ======================================
import streamlit as st
from PIL import Image
import time
from transformers import pipeline
from typing import Tuple
from datasets import load_dataset
import soundfile as sf
import torch
# ======================================
# Basic Initialization
# ======================================
# Initialize image captioning pipeline with pretrained model
_image_caption_pipeline = pipeline(
task="image-to-text",
model="cnmoro/tiny-image-captioning"
)
# Global model configuration constants
_text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen1.5-0.5B",max_new_tokens=100)
# Initialize TTS components once to avoid reloading
_SPEECH_PIPELINE = pipeline("text-to-speech", model="microsoft/speecht5_tts")
_EMBEDDINGS_DATASET = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
_DEFAULT_SPEAKER_EMBEDDING = torch.tensor(_EMBEDDINGS_DATASET[7306]["xvector"]).unsqueeze(0)
# ======================================
# Function settings
# ======================================
def generate_image_caption(input_image):
"""
Generate a textual description for an input image using a pretrained model.
Args:
input_image (Union[PIL.Image.Image, str]): Image to process. Can be either:
- A PIL Image object
- A string containing a filesystem path to an image file
Returns:
str: Generated caption text in natural language
Example:
>>> from PIL import Image
>>> img = Image.open("photo.jpg")
>>> caption = generate_image_caption(img)
>>> print(f"Caption: {caption}")
"""
# Process image through the captioning pipeline
inference_results = _image_caption_pipeline(input_image)
# Extract text from the first (and only) result dictionary
caption_text = inference_results[0]['generated_text']
return caption_text
def generate_story_content(system_prompt: str, user_prompt: str) -> str:
"""
Generates a children's story based on provided system and user prompts.
Args:
system_prompt: Defines the assistant's role and writing constraints
user_prompt: Describes the story scenario and specific elements to include
Returns:
Generated story text without any thinking process metadata
Raises:
RuntimeError: If text generation fails at any stage
Example:
>>> story = generate_story_content(
... "You are a helpful children's author...",
... "Kids playing with dogs in a sunny meadow..."
... )
"""
try:
# Prepare chat message structure
conversation_history = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
# Generate the story
story=_text_generation_pipeline(conversation_history)
# Extract the stroy result
stroy_result=story[0]["generated_text"][2]["content"]
# Process and clean output
return stroy_result
except Exception as error:
raise RuntimeError(f"Story generation failed: {str(error)}") from error
def generate_audio_from_story(story_text: str, output_path: str = "output.wav") -> str:
"""
Convert text story to speech audio file using text-to-speech synthesis.
Args:
story_text: Input story text to synthesize
output_path: Path to save generated audio (default: 'output.wav')
Returns:
Path to generated audio file
Raises:
ValueError: For empty/invalid input text
RuntimeError: If audio generation fails
Example:
>>> generate_audio_from_story("Children playing in the park", "story_audio.wav")
'story_audio.wav'
"""
# Validate input text
if not isinstance(story_text, str) or not story_text.strip():
raise ValueError("Input story text must be a non-empty string")
try:
# Generate speech with default speaker profile
speech_output = _SPEECH_PIPELINE(
story_text,
forward_params={"speaker_embeddings": _DEFAULT_SPEAKER_EMBEDDING}
)
# Save audio to WAV file
sf.write(
output_path,
speech_output["audio"],
samplerate=speech_output["sampling_rate"]
)
return output_path
except Exception as error:
raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error
# ======================================
# Page Configuration & Custom Styling
# ======================================
st.set_page_config(
page_title="Magic Story Generator",
page_icon="๐ง",
layout="centered",
initial_sidebar_state="collapsed"
)
# Custom CSS styling for child-friendly interface
st.markdown("""
<style>
/* Prompt buttons styling */
.prompt-btn {
background: #4CAF50 !important;
border-radius: 15px !important;
padding: 15px 30px !important;
font-size: 1.1rem !important;
margin: 10px;
}
/* Story container styling */
.story-container {
background: #FFF3E0;
border-radius: 20px;
padding: 25px;
margin: 20px 0;
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
}
/* Image caption styling */
.image-caption {
border-left: 4px solid #4CAF50;
padding-left: 1rem;
font-size: 1.1rem;
color: #2c3e50;
margin: 1.5rem 0;
}
/* Progress spinner styling */
.stSpinner > div {
font-size: 1.2rem !important;
color: #9C27B0 !important;
}
</style>
""", unsafe_allow_html=True)
# ======================================
# Main Application Interface
# ======================================
st.title("๐ง Welcome to Magic Story Maker!")
# File upload section
with st.container():
st.subheader("Step 1: Upload Your Picture")
uploaded_image = st.file_uploader("Choose an image...",
type=["png", "jpg", "jpeg"],
label_visibility="collapsed")
# Initialize session state for confirmation status
if 'confirmed' not in st.session_state:
st.session_state.confirmed = False
# Main processing flow
if uploaded_image is not None:
# Display uploaded image
with st.spinner("โจ Magical image processing..."):
image = Image.open(uploaded_image)
st.image(image, caption="Your Magical Image", use_column_width=True)
# Prompt selection section
with st.container():
st.subheader("Step 2: Choose Story Style")
# Create three columns for prompt buttons
col1, col2, col3 = st.columns(3)
with col1:
if st.button("๐ Learning Story",
help="Generate educational story with life lessons",
key="edu_btn"):
st.session_state.selected_prompt = "educational"
st.session_state.confirmed = False
with col2:
if st.button("๐ Fantasy Adventure",
help="Create magical adventure story",
key="fantasy_btn"):
st.session_state.selected_prompt = "adventure"
st.session_state.confirmed = False
with col3:
if st.button("๐ป Animal Friends",
help="Make story about friendly animals",
key="animal_btn"):
st.session_state.selected_prompt = "animal"
st.session_state.confirmed = False
# Add confirmation button
with st.container():
st.subheader("Step 3: Confirm Selection")
if st.button("๐ฎ Start Magic Creation!",
help="Click to generate story after choosing style",
type="primary"):
st.session_state.confirmed = True
# Only show generation when confirmed
if st.session_state.get('confirmed', False):
# Generate image caption with loading state
with st.spinner("๐ Analyzing image and generating description..."):
image_caption = generate_image_caption(image)
# Display caption results using CSS class
st.subheader("๐ Image Understanding")
st.markdown(f'<div class="story-container image-caption">{image_caption}</div>',
unsafe_allow_html=True)
st.write("") # Add spacing
# Define prompt templates
PROMPT_TEMPLATES = {
"educational": {
"system": "You are a children's educator. Create a simple 100-word story that teaches basic life skills or moral lessons.",
"icon": "๐"
},
"adventure": {
"system": "You are a fantasy writer. Create a 100-word magical adventure story suitable for children.",
"icon": "๐ "
},
"animal": {
"system": "You are an animal expert. Create a 100-word story about friendly animals learning together.",
"icon": "๐ป"
}
}
# Safe access with default fallback
selected_prompt = st.session_state.get("selected_prompt", "educational")
# Story generation section
with st.spinner(f"{PROMPT_TEMPLATES[selected_prompt]['icon']} Creating your story..."):
# Generate story content using the caption
selected_template = PROMPT_TEMPLATES[selected_prompt]
story_text = generate_story_content(
system_prompt=selected_template["system"],
user_prompt=image_caption
)
# Display formatted story
st.subheader("โจ Your Magical Story")
st.markdown(f'<div class="story-container">{story_text}</div>',
unsafe_allow_html=True)
# Audio generation section
with st.spinner("๐ฎ Preparing story narration..."):
audio_file = generate_audio_from_story(story_text, "story_audio.wav")
st.subheader("๐ง Listen to Your Story")
st.audio(audio_file)
else:
# Show waiting message
st.info("โน๏ธ Please select a story style and click the confirmation button to continue")
# Help section
st.markdown("---")
st.subheader("๐ How to Use:")
st.info("""
1. Upload any picture (animals, nature, or people work best!)
2. Choose your favorite story style
3. Click the confirmation button
4. Wait for image analysis to complete
5. Enjoy your personalized story and audio!
""") |