ccclllwww commited on
Commit
ac70fac
ยท
verified ยท
1 Parent(s): d5a4509

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -27
app.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import streamlit as st
2
  from PIL import Image
3
  import time
@@ -7,6 +11,10 @@ from datasets import load_dataset
7
  import soundfile as sf
8
  import torch
9
 
 
 
 
 
10
  # Initialize image captioning pipeline with pretrained model
11
  # Model source: Hugging Face Model Hub
12
  _image_caption_pipeline = pipeline(
@@ -31,6 +39,10 @@ _SPEECH_PIPELINE = pipeline("text-to-speech", model="microsoft/speecht5_tts")
31
  _EMBEDDINGS_DATASET = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
32
  _DEFAULT_SPEAKER_EMBEDDING = torch.tensor(_EMBEDDINGS_DATASET[7306]["xvector"]).unsqueeze(0)
33
 
 
 
 
 
34
  def generate_image_caption(input_image):
35
  """
36
  Generate a textual description for an input image using a pretrained model.
@@ -196,35 +208,138 @@ def generate_audio_from_story(story_text: str, output_path: str = "output.wav")
196
  raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error
197
 
198
 
199
- # App title
200
- st.title("Best Story Teller")
 
 
 
 
 
 
 
201
 
202
- # Write some text
203
- st.write("Upload a picture and start your journey of creativeness and imagination")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
- # File uploader for image and audio
206
- uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
 
 
207
 
208
- # Display image with spinner
 
 
 
 
 
209
  if uploaded_image is not None:
210
- with st.spinner("Loading image..."):
 
211
  image = Image.open(uploaded_image)
212
- st.image(image, caption="Uploaded Image", use_column_width=True)
213
- with st.spinner("Captioning image..."):
214
- caption_from_file = generate_image_caption(image)
215
- with st.spinner("Adding some magics and imagination..."):
216
- system_prompt = """"Create magical children's stories (ages 4-8) from image captions. Include:
217
- Friendly animal/human characters
218
- Simple plot with happy resolution
219
- Sensory details ("glowing firefly wings") and sounds ("Splash!")
220
- Gentle lessons about friendship/courage/nature
221
- Choices ("Should the fox knock or sing?")
222
- Key image elements woven into the story
223
- Diverse characters, no stereotypes
224
- End with happy twist + rhyming moral ("Brave small paws, when they try/Can touch the stars in the sky"). Keep language warm and playful."""
225
- user_prompt = caption_from_file
226
- story = generate_story_content(system_prompt, user_prompt)
227
- st.write(story)
228
- with st.spinner("Finding the best voice actor"):
229
- generated_audio = generate_audio_from_story(story,"childrens_story.wav")
230
- st.audio(generated_audio)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ======================================
2
+ # Package Import
3
+ # ======================================
4
+
5
  import streamlit as st
6
  from PIL import Image
7
  import time
 
11
  import soundfile as sf
12
  import torch
13
 
14
+ # ======================================
15
+ # Basic Initialization
16
+ # ======================================
17
+
18
  # Initialize image captioning pipeline with pretrained model
19
  # Model source: Hugging Face Model Hub
20
  _image_caption_pipeline = pipeline(
 
39
  _EMBEDDINGS_DATASET = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
40
  _DEFAULT_SPEAKER_EMBEDDING = torch.tensor(_EMBEDDINGS_DATASET[7306]["xvector"]).unsqueeze(0)
41
 
42
+ # ======================================
43
+ # Function settings
44
+ # ======================================
45
+
46
  def generate_image_caption(input_image):
47
  """
48
  Generate a textual description for an input image using a pretrained model.
 
208
  raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error
209
 
210
 
211
+ # ======================================
212
+ # Page Configuration & Custom Styling
213
+ # ======================================
214
+ st.set_page_config(
215
+ page_title="Magic Story Generator",
216
+ page_icon="๐Ÿงš",
217
+ layout="wide",
218
+ initial_sidebar_state="collapsed"
219
+ )
220
 
221
+ # Custom CSS styling for child-friendly interface
222
+ st.markdown("""
223
+ <style>
224
+ /* Primary title styling */
225
+ .main-title {
226
+ color: #E91E63;
227
+ font-size: 2.8rem;
228
+ text-align: center;
229
+ padding: 20px;
230
+ text-shadow: 2px 2px #FFC107;
231
+ }
232
+
233
+ /* Prompt buttons styling */
234
+ .prompt-btn {
235
+ background: #4CAF50 !important;
236
+ border-radius: 15px !important;
237
+ padding: 15px 30px !important;
238
+ font-size: 1.1rem !important;
239
+ margin: 10px;
240
+ }
241
+
242
+ /* Story container styling */
243
+ .story-container {
244
+ background: #FFF3E0;
245
+ border-radius: 20px;
246
+ padding: 25px;
247
+ margin: 20px 0;
248
+ box-shadow: 0 4px 8px rgba(0,0,0,0.1);
249
+ }
250
+
251
+ /* Progress spinner styling */
252
+ .stSpinner > div {
253
+ font-size: 1.2rem !important;
254
+ color: #9C27B0 !important;
255
+ }
256
+ </style>
257
+ """, unsafe_allow_html=True)
258
 
259
+ # ======================================
260
+ # Main Application Interface
261
+ # ======================================
262
+ st.markdown('<p class="main-title">๐Ÿงš Welcome to Magic Story Maker!</p>', unsafe_allow_html=True)
263
 
264
+ # File upload section
265
+ with st.container():
266
+ st.subheader("Step 1: Upload Your Picture")
267
+ uploaded_image = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"],label_visibility="collapsed")
268
+
269
+ # Main processing flow
270
  if uploaded_image is not None:
271
+ # Display uploaded image
272
+ with st.spinner("โœจ Magical image processing..."):
273
  image = Image.open(uploaded_image)
274
+ st.image(image, caption="Your Magical Image", use_column_width=True)
275
+
276
+ # Prompt selection section
277
+ with st.container():
278
+ st.subheader("Step 2: Choose Story Style")
279
+
280
+ # Create three columns for prompt buttons
281
+ col1, col2, col3 = st.columns(3)
282
+ with col1:
283
+ if st.button("๐Ÿ“š Learning Story",
284
+ help="Generate educational story with life lessons",
285
+ key="edu_btn"):
286
+ st.session_state.selected_prompt = "educational"
287
+ with col2:
288
+ if st.button("๐ŸŒ  Fantasy Adventure",
289
+ help="Create magical adventure story",
290
+ key="fantasy_btn"):
291
+ st.session_state.selected_prompt = "adventure"
292
+ with col3:
293
+ if st.button("๐Ÿป Animal Friends",
294
+ help="Make story about friendly animals",
295
+ key="animal_btn"):
296
+ st.session_state.selected_prompt = "animal"
297
+
298
+ # Define prompt templates
299
+ PROMPT_TEMPLATES = {
300
+ "educational": {
301
+ "system": "You are a children's educator. Create a simple 150-word story that teaches basic life skills or moral lessons.",
302
+ "icon": "๐Ÿ“š"
303
+ },
304
+ "adventure": {
305
+ "system": "You are a fantasy writer. Create a 150-word magical adventure story suitable for children.",
306
+ "icon": "๐ŸŒ "
307
+ },
308
+ "animal": {
309
+ "system": "You are an animal expert. Create a 150-word story about friendly animals learning together.",
310
+ "icon": "๐Ÿป"
311
+ }
312
+ }
313
+
314
+ # Story generation section
315
+ with st.spinner(f"{PROMPT_TEMPLATES[st.session_state.selected_prompt]['icon']} Creating your story..."):
316
+ # Generate image caption
317
+ image_caption = generate_image_caption(image)
318
+
319
+ # Generate story content
320
+ selected_template = PROMPT_TEMPLATES[st.session_state.selected_prompt]
321
+ story_text = generate_story_content(
322
+ system_prompt=selected_template["system"],
323
+ user_prompt=image_caption
324
+ )
325
+
326
+ # Display formatted story
327
+ st.subheader("Step 3: Your Magical Story")
328
+ st.markdown(f'<div class="story-container">{story_text}</div>',
329
+ unsafe_allow_html=True)
330
+
331
+ # Audio generation section
332
+ with st.spinner("๐Ÿ”ฎ Preparing story narration..."):
333
+ audio_file = generate_audio_from_story(story_text, "story_audio.wav")
334
+ st.subheader("๐ŸŽง Listen to Your Story")
335
+ st.audio(audio_file)
336
+
337
+ # Help section
338
+ st.markdown("---")
339
+ st.subheader("๐ŸŒŸ How to Use:")
340
+ st.info("""
341
+ 1. Upload any picture (animals, nature, or people work best!)
342
+ 2. Choose your favorite story style
343
+ 3. Wait for magic to happen!
344
+ 4. Listen to your personalized story
345
+ """)