LTH001 commited on
Commit
63497b6
Β·
verified Β·
1 Parent(s): 21fe50e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -11
app.py CHANGED
@@ -3,50 +3,114 @@ import streamlit as st
3
  from transformers import pipeline
4
  from PIL import Image
5
  import io
 
 
6
 
7
-
8
  # function part
9
  def generate_image_caption(image):
10
- """Generates a caption for the given image using a pre-trained model."""
 
 
 
 
 
 
11
  img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
12
  result = img2caption(image)
13
  return result[0]['generated_text']
14
 
15
  def text2story(text):
16
- """Generates a children's story from text input with genre adaptation"""
 
 
 
 
 
 
17
  story_prompt = f"Create a funny 100-word story for 8-year-olds about: {text}. Include: "
18
  story_prompt += "1) A silly character 2) Magical object 3) Sound effects 4) Happy ending"
19
 
 
20
  pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
 
 
21
  story_text = pipe(
22
  story_prompt,
23
- max_new_tokens=200,
24
- temperature=0.9,
25
- top_k=50
26
  )[0]['generated_text']
27
- return story_text.split("Happy ending")[-1].strip() # Clean output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def main():
30
- st.title("πŸ“– Image Story Generator")
31
- st.write("Upload an image and get a magical children's story!")
 
 
32
 
 
33
  uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
34
 
35
  if uploaded_image:
36
- image = Image.open(uploaded_image).convert("RGB")
37
- st.image(image, use_column_width=True)
 
38
 
 
39
  with st.spinner("✨ Analyzing image..."):
40
  caption = generate_image_caption(image)
41
 
 
42
  st.subheader("Image Understanding")
43
  st.write(caption)
44
 
 
45
  with st.spinner("πŸ“– Writing story..."):
46
  story = text2story(caption)
47
 
 
48
  st.subheader("Magical Story")
49
  st.write(story)
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  if __name__ == "__main__":
 
52
  main()
 
3
  from transformers import pipeline
4
  from PIL import Image
5
  import io
6
+ import numpy as np
7
+ import soundfile as sf # For handling audio file operations
8
 
 
9
  # function part
10
  def generate_image_caption(image):
11
+ """Generates a caption for the given image using a pre-trained model.
12
+ Args:
13
+ image: PIL Image object
14
+ Returns:
15
+ str: Generated caption text
16
+ """
17
+ # Initialize image-to-text pipeline with BLIP model
18
  img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
19
  result = img2caption(image)
20
  return result[0]['generated_text']
21
 
22
  def text2story(text):
23
+ """Generates a children's story from text input using story generation model.
24
+ Args:
25
+ text: Input text prompt
26
+ Returns:
27
+ str: Generated story text
28
+ """
29
+ # Craft prompt with specific requirements for children's stories
30
  story_prompt = f"Create a funny 100-word story for 8-year-olds about: {text}. Include: "
31
  story_prompt += "1) A silly character 2) Magical object 3) Sound effects 4) Happy ending"
32
 
33
+ # Initialize text generation pipeline
34
  pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
35
+
36
+ # Generate story with controlled randomness parameters
37
  story_text = pipe(
38
  story_prompt,
39
+ max_new_tokens=200, # Limit story length
40
+ temperature=0.9, # Control randomness (higher = more creative)
41
+ top_k=50 # Limit vocabulary choices
42
  )[0]['generated_text']
43
+
44
+ # Clean output by splitting at the required ending marker
45
+ return story_text.split("Happy ending")[-1].strip()
46
+
47
+ def story_to_speech(story_text):
48
+ """Converts story text to audio using text-to-speech model.
49
+ Args:
50
+ story_text: Story text to convert
51
+ Returns:
52
+ BytesIO: Audio data in WAV format
53
+ """
54
+ # Initialize Bark text-to-speech pipeline
55
+ tts_pipe = pipeline("text-to-speech", model="suno/bark-small")
56
+
57
+ # Generate audio array (numpy array of sound samples)
58
+ audio_output = tts_pipe(story_text, max_length=400) # Limit text length for stability
59
+
60
+ # Convert numpy array to playable audio bytes
61
+ audio_bytes = io.BytesIO()
62
+ sf.write(
63
+ audio_bytes,
64
+ audio_output["audio"],
65
+ audio_output["sampling_rate"],
66
+ format='WAV'
67
+ )
68
+ audio_bytes.seek(0) # Reset pointer for Streamlit audio player
69
+
70
+ return audio_bytes
71
 
72
  def main():
73
+ """Main function for Streamlit application workflow"""
74
+ # Configure page header
75
+ st.title("πŸ“– Image Story Generator with Audio")
76
+ st.write("Upload an image to get a magical story read aloud!")
77
 
78
+ # Image upload widget (supports JPG/PNG)
79
  uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
80
 
81
  if uploaded_image:
82
+ # Process image
83
+ image = Image.open(uploaded_image).convert("RGB") # Ensure RGB format
84
+ st.image(image, use_column_width=True) # Display uploaded image
85
 
86
+ # Image analysis section
87
  with st.spinner("✨ Analyzing image..."):
88
  caption = generate_image_caption(image)
89
 
90
+ # Display image understanding
91
  st.subheader("Image Understanding")
92
  st.write(caption)
93
 
94
+ # Story generation section
95
  with st.spinner("πŸ“– Writing story..."):
96
  story = text2story(caption)
97
 
98
+ # Display generated story
99
  st.subheader("Magical Story")
100
  st.write(story)
101
+
102
+ # Audio generation section
103
+ if st.button("🎧 Read Story Aloud"):
104
+ with st.spinner("πŸ”Š Generating audio..."):
105
+ try:
106
+ # Convert story to audio (trim to 400 characters for model stability)
107
+ audio_bytes = story_to_speech(story[:400])
108
+
109
+ # Display audio player
110
+ st.audio(audio_bytes, format="audio/wav")
111
+ except Exception as e:
112
+ st.error(f"Error generating audio: {str(e)}")
113
 
114
  if __name__ == "__main__":
115
+ # Start the Streamlit application
116
  main()