ccclllwww commited on
Commit
1351e1c
·
verified ·
1 Parent(s): 38c8a94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -25
app.py CHANGED
@@ -6,10 +6,6 @@ import streamlit as st
6
  from PIL import Image
7
  import time
8
  from transformers import pipeline
9
- from typing import Tuple
10
- from datasets import load_dataset
11
- import soundfile as sf
12
- import torch
13
 
14
  # ======================================
15
  # Basic Initialization
@@ -25,9 +21,7 @@ _image_caption_pipeline = pipeline(
25
  _text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen3-1.7B")
26
 
27
  # Initialize TTS components once to avoid reloading
28
- _SPEECH_PIPELINE = pipeline("text-to-speech", model="microsoft/speecht5_tts")
29
- _EMBEDDINGS_DATASET = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
30
- _DEFAULT_SPEAKER_EMBEDDING = torch.tensor(_EMBEDDINGS_DATASET[7306]["xvector"]).unsqueeze(0)
31
 
32
  # ======================================
33
  # Function settings
@@ -97,13 +91,12 @@ def generate_story_content(system_prompt: str, user_prompt: str) -> str:
97
  except Exception as error:
98
  raise RuntimeError(f"Story generation failed: {str(error)}") from error
99
 
100
- def generate_audio_from_story(story_text: str, output_path: str = "output.wav") -> str:
101
  """
102
  Convert text story to speech audio file using text-to-speech synthesis.
103
 
104
  Args:
105
  story_text: Input story text to synthesize
106
- output_path: Path to save generated audio (default: 'output.wav')
107
 
108
  Returns:
109
  Path to generated audio file
@@ -121,20 +114,10 @@ def generate_audio_from_story(story_text: str, output_path: str = "output.wav")
121
  raise ValueError("Input story text must be a non-empty string")
122
 
123
  try:
124
- # Generate speech with default speaker profile
125
- speech_output = _SPEECH_PIPELINE(
126
- story_text,
127
- forward_params={"speaker_embeddings": _DEFAULT_SPEAKER_EMBEDDING}
128
- )
129
-
130
- # Save audio to WAV file
131
- sf.write(
132
- output_path,
133
- speech_output["audio"],
134
- samplerate=speech_output["sampling_rate"]
135
- )
136
-
137
- return output_path
138
 
139
  except Exception as error:
140
  raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error
@@ -290,9 +273,9 @@ if uploaded_image is not None:
290
 
291
  # Audio generation section
292
  with st.spinner("🔮 Preparing story narration..."):
293
- audio_file = generate_audio_from_story(story_text, "story_audio.wav")
294
  st.subheader("🎧 Listen to Your Story")
295
- st.audio(audio_file)
296
  else:
297
  # Show waiting message
298
  st.info("ℹ️ Please select a story style and click the confirmation button to continue")
 
6
  from PIL import Image
7
  import time
8
  from transformers import pipeline
 
 
 
 
9
 
10
  # ======================================
11
  # Basic Initialization
 
21
  _text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen3-1.7B")
22
 
23
  # Initialize TTS components once to avoid reloading
24
+ _SPEECH_PIPELINE = pipeline("text-to-speech", model="facebook/mms-tts-eng")
 
 
25
 
26
  # ======================================
27
  # Function settings
 
91
  except Exception as error:
92
  raise RuntimeError(f"Story generation failed: {str(error)}") from error
93
 
94
+ def generate_audio_from_story(story_text: str) -> str:
95
  """
96
  Convert text story to speech audio file using text-to-speech synthesis.
97
 
98
  Args:
99
  story_text: Input story text to synthesize
 
100
 
101
  Returns:
102
  Path to generated audio file
 
114
  raise ValueError("Input story text must be a non-empty string")
115
 
116
  try:
117
+ # Generate speech
118
+ speech_output = _SPEECH_PIPELINE( story_text )
119
+
120
+ return speech_output
 
 
 
 
 
 
 
 
 
 
121
 
122
  except Exception as error:
123
  raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error
 
273
 
274
  # Audio generation section
275
  with st.spinner("🔮 Preparing story narration..."):
276
+ audio_file = generate_audio_from_story(story_text)
277
  st.subheader("🎧 Listen to Your Story")
278
+ st.audio(data=audio_file["audio"],sample_rate=audio_file["sampling_rate"])
279
  else:
280
  # Show waiting message
281
  st.info("ℹ️ Please select a story style and click the confirmation button to continue")