LTH001 commited on
Commit
0546ecd
Β·
verified Β·
1 Parent(s): ffd9308

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -63
app.py CHANGED
@@ -1,117 +1,70 @@
1
- # import part
2
- !pip install streamlit pyngrok soundfile # soundfile for audio conversion
3
  import streamlit as st
4
  from transformers import pipeline
5
  from PIL import Image
6
  import io
7
- import numpy as np
8
- import soundfile as sf # For handling audio file operations
9
 
10
- # function part
11
  def generate_image_caption(image):
12
- """Generates a caption for the given image using a pre-trained model.
13
- Args:
14
- image: PIL Image object
15
- Returns:
16
- str: Generated caption text
17
- """
18
- # Initialize image-to-text pipeline with BLIP model
19
  img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
20
  result = img2caption(image)
21
  return result[0]['generated_text']
22
 
23
  def text2story(text):
24
- """Generates a children's story from text input using story generation model.
25
- Args:
26
- text: Input text prompt
27
- Returns:
28
- str: Generated story text
29
- """
30
- # Craft prompt with specific requirements for children's stories
31
- story_prompt = f"Create a funny 100-word story for 8-year-olds about: {text}. Include: "
32
- story_prompt += "1) A silly character 2) Magical object 3) Sound effects 4) Happy ending"
33
 
34
- # Initialize text generation pipeline
35
  pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
36
-
37
- # Generate story with controlled randomness parameters
38
  story_text = pipe(
39
  story_prompt,
40
- max_new_tokens=200, # Limit story length
41
- temperature=0.9, # Control randomness (higher = more creative)
42
- top_k=50 # Limit vocabulary choices
43
  )[0]['generated_text']
44
-
45
- # Clean output by splitting at the required ending marker
46
  return story_text.split("Happy ending")[-1].strip()
47
 
48
  def story_to_speech(story_text):
49
- """Converts story text to audio using text-to-speech model.
50
- Args:
51
- story_text: Story text to convert
52
- Returns:
53
- BytesIO: Audio data in WAV format
54
- """
55
- # Initialize Bark text-to-speech pipeline
56
  tts_pipe = pipeline("text-to-speech", model="suno/bark-small")
 
57
 
58
- # Generate audio array (numpy array of sound samples)
59
- audio_output = tts_pipe(story_text, max_length=400) # Limit text length for stability
60
-
61
- # Convert numpy array to playable audio bytes
62
  audio_bytes = io.BytesIO()
63
- sf.write(
64
- audio_bytes,
65
- audio_output["audio"],
66
- audio_output["sampling_rate"],
67
- format='WAV'
68
- )
69
- audio_bytes.seek(0) # Reset pointer for Streamlit audio player
70
 
71
  return audio_bytes
72
 
73
  def main():
74
- """Main function for Streamlit application workflow"""
75
- # Configure page header
76
  st.title("πŸ“– Image Story Generator with Audio")
77
  st.write("Upload an image to get a magical story read aloud!")
78
 
79
- # Image upload widget (supports JPG/PNG)
80
  uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
81
 
82
  if uploaded_image:
83
- # Process image
84
- image = Image.open(uploaded_image).convert("RGB") # Ensure RGB format
85
- st.image(image, use_column_width=True) # Display uploaded image
86
 
87
- # Image analysis section
88
  with st.spinner("✨ Analyzing image..."):
89
  caption = generate_image_caption(image)
90
 
91
- # Display image understanding
92
  st.subheader("Image Understanding")
93
  st.write(caption)
94
 
95
- # Story generation section
96
  with st.spinner("πŸ“– Writing story..."):
97
  story = text2story(caption)
98
 
99
- # Display generated story
100
  st.subheader("Magical Story")
101
  st.write(story)
102
 
103
- # Audio generation section
104
  if st.button("🎧 Read Story Aloud"):
105
  with st.spinner("πŸ”Š Generating audio..."):
106
  try:
107
- # Convert story to audio (trim to 400 characters for model stability)
108
- audio_bytes = story_to_speech(story[:400])
109
-
110
- # Display audio player
111
  st.audio(audio_bytes, format="audio/wav")
112
  except Exception as e:
113
  st.error(f"Error generating audio: {str(e)}")
114
 
115
  if __name__ == "__main__":
116
- # Start the Streamlit application
117
  main()
 
1
+ # app.py
 
2
  import streamlit as st
3
  from transformers import pipeline
4
  from PIL import Image
5
  import io
6
+ from scipy.io.wavfile import write as write_wav
 
7
 
 
8
  def generate_image_caption(image):
9
+ """Generates a caption for the given image"""
 
 
 
 
 
 
10
  img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
11
  result = img2caption(image)
12
  return result[0]['generated_text']
13
 
14
  def text2story(text):
15
+ """Generates a children's story from text input"""
16
+ story_prompt = f"Create a funny 100-word story for 8-year-olds about: {text}. Include: 1) A silly character 2) Magical object 3) Sound effects 4) Happy ending"
 
 
 
 
 
 
 
17
 
 
18
  pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
 
 
19
  story_text = pipe(
20
  story_prompt,
21
+ max_new_tokens=200,
22
+ temperature=0.9,
23
+ top_k=50
24
  )[0]['generated_text']
 
 
25
  return story_text.split("Happy ending")[-1].strip()
26
 
27
  def story_to_speech(story_text):
28
+ """Converts story text to audio using TTS"""
 
 
 
 
 
 
29
  tts_pipe = pipeline("text-to-speech", model="suno/bark-small")
30
+ audio_output = tts_pipe(story_text[:400])
31
 
32
+ # Convert numpy array to bytes using scipy
 
 
 
33
  audio_bytes = io.BytesIO()
34
+ write_wav(audio_bytes, audio_output["sampling_rate"], audio_output["audio"])
35
+ audio_bytes.seek(0)
 
 
 
 
 
36
 
37
  return audio_bytes
38
 
39
  def main():
 
 
40
  st.title("πŸ“– Image Story Generator with Audio")
41
  st.write("Upload an image to get a magical story read aloud!")
42
 
 
43
  uploaded_image = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
44
 
45
  if uploaded_image:
46
+ image = Image.open(uploaded_image).convert("RGB")
47
+ st.image(image, use_column_width=True)
 
48
 
 
49
  with st.spinner("✨ Analyzing image..."):
50
  caption = generate_image_caption(image)
51
 
 
52
  st.subheader("Image Understanding")
53
  st.write(caption)
54
 
 
55
  with st.spinner("πŸ“– Writing story..."):
56
  story = text2story(caption)
57
 
 
58
  st.subheader("Magical Story")
59
  st.write(story)
60
 
 
61
  if st.button("🎧 Read Story Aloud"):
62
  with st.spinner("πŸ”Š Generating audio..."):
63
  try:
64
+ audio_bytes = story_to_speech(story)
 
 
 
65
  st.audio(audio_bytes, format="audio/wav")
66
  except Exception as e:
67
  st.error(f"Error generating audio: {str(e)}")
68
 
69
  if __name__ == "__main__":
 
70
  main()