Spaces:

ccclllwww
/

Assignment_V1

Sleeping

App Files Files Community

ccclllwww commited on May 2

Commit

1351e1c

verified ·

1 Parent(s): 38c8a94

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -25

app.py CHANGED Viewed

@@ -6,10 +6,6 @@ import streamlit as st
 from PIL import Image
 import time
 from transformers import pipeline
-from typing import Tuple
-from datasets import load_dataset
-import soundfile as sf
-import torch
 # ======================================
 # Basic Initialization
@@ -25,9 +21,7 @@ _image_caption_pipeline = pipeline(
 _text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen3-1.7B")
 # Initialize TTS components once to avoid reloading
-_SPEECH_PIPELINE = pipeline("text-to-speech", model="microsoft/speecht5_tts")
-_EMBEDDINGS_DATASET = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
-_DEFAULT_SPEAKER_EMBEDDING = torch.tensor(_EMBEDDINGS_DATASET[7306]["xvector"]).unsqueeze(0)
 # ======================================
 # Function settings
@@ -97,13 +91,12 @@ def generate_story_content(system_prompt: str, user_prompt: str) -> str:
     except Exception as error:
         raise RuntimeError(f"Story generation failed: {str(error)}") from error
-def generate_audio_from_story(story_text: str, output_path: str = "output.wav") -> str:
     """
     Convert text story to speech audio file using text-to-speech synthesis.
     Args:
         story_text: Input story text to synthesize
-        output_path: Path to save generated audio (default: 'output.wav')
     Returns:
         Path to generated audio file
@@ -121,20 +114,10 @@ def generate_audio_from_story(story_text: str, output_path: str = "output.wav")
         raise ValueError("Input story text must be a non-empty string")
     try:
-        # Generate speech with default speaker profile
-        speech_output = _SPEECH_PIPELINE(
-            story_text,
-            forward_params={"speaker_embeddings": _DEFAULT_SPEAKER_EMBEDDING}
-        )
-        # Save audio to WAV file
-        sf.write(
-            output_path,
-            speech_output["audio"],
-            samplerate=speech_output["sampling_rate"]
-        )
-        return output_path
     except Exception as error:
         raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error
@@ -290,9 +273,9 @@ if uploaded_image is not None:
         # Audio generation section
         with st.spinner("🔮 Preparing story narration..."):
-            audio_file = generate_audio_from_story(story_text, "story_audio.wav")
             st.subheader("🎧 Listen to Your Story")
-            st.audio(audio_file)
     else:
         # Show waiting message
         st.info("ℹ️ Please select a story style and click the confirmation button to continue")

 from PIL import Image
 import time
 from transformers import pipeline
 # ======================================
 # Basic Initialization
 _text_generation_pipeline = pipeline("text-generation", model="Qwen/Qwen3-1.7B")
 # Initialize TTS components once to avoid reloading
+_SPEECH_PIPELINE = pipeline("text-to-speech", model="facebook/mms-tts-eng")
 # ======================================
 # Function settings
     except Exception as error:
         raise RuntimeError(f"Story generation failed: {str(error)}") from error
+def generate_audio_from_story(story_text: str) -> str:
     """
     Convert text story to speech audio file using text-to-speech synthesis.
     Args:
         story_text: Input story text to synthesize
     Returns:
         Path to generated audio file
         raise ValueError("Input story text must be a non-empty string")
     try:
+        # Generate speech
+        speech_output = _SPEECH_PIPELINE( story_text )
+        return speech_output
     except Exception as error:
         raise RuntimeError(f"Audio synthesis failed: {str(error)}") from error
         # Audio generation section
         with st.spinner("🔮 Preparing story narration..."):
+            audio_file = generate_audio_from_story(story_text)
             st.subheader("🎧 Listen to Your Story")
+            st.audio(data=audio_file["audio"],sample_rate=audio_file["sampling_rate"])
     else:
         # Show waiting message
         st.info("ℹ️ Please select a story style and click the confirmation button to continue")