NihalGazi commited on
Commit
b0938c1
·
verified ·
1 Parent(s): e3b7f9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -65
app.py CHANGED
@@ -6,45 +6,45 @@ import urllib.parse
6
  import tempfile
7
  import os
8
 
9
- # --- Constants ---
 
 
 
 
 
 
 
 
10
  VOICES = [
11
  "alloy", "echo", "fable", "onyx", "nova", "shimmer", # Standard OpenAI Voices
12
- "coral", "verse", "ballad", "ash", "sage", "amuch", "dan" # Additional Pollinations Voices? (Assuming based on list)
13
  ]
14
 
15
- NSFW_URL_TEMPLATE = "https://text.pollinations.ai/Is this an inappropriate text-to-speech prompt \"{prompt}\". If yes then write \"YES\" only otherwise \"NO\" only"
16
- TTS_URL_TEMPLATE = "https://text.pollinations.ai/only repeat what i say now say with proper emphasis in a \"{emotion}\" emotion this statement - \"{prompt}\"?model=openai-audio&voice={voice}&seed={seed}"
17
 
18
- # --- Helper Functions ---
19
 
20
  def check_nsfw(prompt: str) -> bool:
21
- """Checks if the prompt is NSFW using the Pollinations API."""
22
  try:
23
- # URL encode the prompt for safety
24
  encoded_prompt = urllib.parse.quote(prompt)
25
  url = NSFW_URL_TEMPLATE.format(prompt=encoded_prompt)
26
- print(f"DEBUG: Checking NSFW URL: {url}") # Optional: for debugging
27
 
28
- response = requests.get(url, timeout=20) # Added timeout
29
- response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
30
 
31
  result = response.text.strip().upper()
32
- print(f"DEBUG: NSFW Check Response: '{result}'") # Optional: for debugging
33
 
34
  if result == "YES":
35
  return True
36
  elif result == "NO":
37
  return False
38
  else:
39
- # Handle unexpected responses from the NSFW checker
40
  print(f"Warning: Unexpected response from NSFW checker: {response.text}")
41
- # Defaulting to safe might be risky, maybe default to NSFW? Or raise error?
42
- # Let's default to considering it potentially unsafe if unsure.
43
- return True # Treat unexpected responses as potentially NSFW
44
 
45
  except requests.exceptions.RequestException as e:
46
  print(f"Error during NSFW check: {e}")
47
- # If the check fails, maybe treat as unsafe to be cautious
48
  raise gr.Error(f"Failed to check prompt safety: {e}")
49
  except Exception as e:
50
  print(f"Unexpected error during NSFW check: {e}")
@@ -52,9 +52,9 @@ def check_nsfw(prompt: str) -> bool:
52
 
53
 
54
  def generate_audio(prompt: str, voice: str, emotion: str, seed: int) -> bytes:
55
- """Generates audio using the Pollinations Text-to-Speech API."""
 
56
  try:
57
- # URL encode the prompt and emotion
58
  encoded_prompt = urllib.parse.quote(prompt)
59
  encoded_emotion = urllib.parse.quote(emotion)
60
 
@@ -64,118 +64,105 @@ def generate_audio(prompt: str, voice: str, emotion: str, seed: int) -> bytes:
64
  voice=voice,
65
  seed=seed
66
  )
67
- print(f"DEBUG: Generating Audio URL: {url}") # Optional: for debugging
68
 
69
- response = requests.get(url, timeout=60) # Increased timeout for audio generation
70
- response.raise_for_status() # Raise an exception for bad status codes
71
 
72
- # Check if response content type suggests audio
73
  content_type = response.headers.get('content-type', '').lower()
74
  if 'audio' not in content_type:
75
  print(f"Warning: Unexpected content type received: {content_type}")
76
- print(f"Response Text: {response.text[:500]}") # Log beginning of text response
77
  raise gr.Error(f"API did not return audio. Response: {response.text[:200]}")
78
 
79
- return response.content # Return raw audio bytes
80
 
81
  except requests.exceptions.RequestException as e:
82
  print(f"Error during audio generation: {e}")
83
- # Try to get more info from response if available
84
  error_details = ""
85
  if hasattr(e, 'response') and e.response is not None:
86
- error_details = e.response.text[:200] # Get first 200 chars of error response
87
  raise gr.Error(f"Failed to generate audio: {e}. Details: {error_details}")
88
  except Exception as e:
89
  print(f"Unexpected error during audio generation: {e}")
90
  raise gr.Error(f"An unexpected error occurred during audio generation: {e}")
91
 
92
- # --- Main Gradio Function ---
93
 
94
  def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int):
95
- """
96
- Main function for the Gradio app. Checks NSFW, then generates audio.
97
- Returns the path to a temporary audio file or an error message.
98
- """
99
  if not prompt:
100
  raise gr.Error("Prompt cannot be empty.")
101
  if not emotion:
102
- # Default emotion if none provided, or raise error? Let's default.
103
  emotion = "neutral"
104
  print("Warning: No emotion provided, defaulting to 'neutral'.")
105
- # raise gr.Error("Emotion cannot be empty.") # Alternative: require emotion
106
  if not voice:
107
  raise gr.Error("Please select a voice.")
108
 
109
- # 1. Determine Seed
110
  seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)
111
  print(f"Using Seed: {seed}")
112
 
113
- # 2. Check NSFW
114
  print("Checking prompt safety...")
115
  try:
116
  is_nsfw = check_nsfw(prompt)
117
  except gr.Error as e:
118
- # Propagate errors raised by check_nsfw
119
- return None, str(e) # Return None for audio, error message for text
120
 
121
  if is_nsfw:
122
  print("Prompt flagged as inappropriate.")
123
- # Return None for audio output, and a message for a text output
124
  return None, "Error: The prompt was flagged as inappropriate and cannot be processed."
125
 
126
- # 3. Generate Audio (only if not NSFW)
127
  print("Prompt is safe. Generating audio...")
128
  try:
129
  audio_bytes = generate_audio(prompt, voice, emotion, seed)
130
 
131
- # 4. Save audio to a temporary file for Gradio
132
- # Suffix is important for Gradio to recognize the format. Assuming MP3 based on common web usage.
133
- # If the API returns WAV, change suffix to ".wav"
134
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
135
  temp_audio_file.write(audio_bytes)
136
  temp_file_path = temp_audio_file.name
137
  print(f"Audio saved temporarily to: {temp_file_path}")
138
 
139
- # Return the path to the temp file for the Audio component, and success message for Text
140
  return temp_file_path, f"Audio generated successfully with voice '{voice}', emotion '{emotion}', and seed {seed}."
141
 
142
  except gr.Error as e:
143
- # Handle errors raised by generate_audio
144
- return None, str(e) # Return None for audio, error message for text
145
  except Exception as e:
146
  print(f"Unexpected error in main function: {e}")
147
  return None, f"An unexpected error occurred: {e}"
148
 
149
 
150
- # --- Gradio Interface ---
151
 
152
  def toggle_seed_input(use_random_seed):
153
- """Updates the visibility of the specific seed input field."""
154
- return gr.update(visible=not use_random_seed, value=12345) # Reset to default when shown
155
 
156
  with gr.Blocks() as app:
157
- gr.Markdown("# Text-to-Speech with NSFW Check")
158
  gr.Markdown(
159
  "Enter text, choose a voice and emotion, and generate audio. "
160
- "The text will be checked for appropriateness before generation."
 
161
  )
162
 
163
  with gr.Row():
164
  with gr.Column(scale=2):
165
  prompt_input = gr.Textbox(label="Prompt", placeholder="Enter the text you want to convert to speech...")
166
  emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...")
167
- voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy") # Default voice
168
  with gr.Column(scale=1):
169
  random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
170
- seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0) # Integer seed
171
 
172
  submit_button = gr.Button("Generate Audio", variant="primary")
173
 
174
  with gr.Row():
175
- audio_output = gr.Audio(label="Generated Audio", type="filepath") # Use filepath as we save temp file
176
- status_output = gr.Textbox(label="Status") # To display errors or success messages
 
177
 
178
- # --- Event Listeners ---
179
  random_seed_checkbox.change(
180
  fn=toggle_seed_input,
181
  inputs=[random_seed_checkbox],
@@ -191,23 +178,28 @@ with gr.Blocks() as app:
191
  random_seed_checkbox,
192
  seed_input
193
  ],
194
- outputs=[audio_output, status_output] # Output to both components
195
  )
196
 
 
197
  gr.Examples(
198
  examples=[
199
  ["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", True, 12345],
200
- ["What a beautiful day to build Gradio apps.", "shimmer", "happy", True, 12345],
201
- ["I am feeling a bit down today.", "fable", "sad", False, 9876],
202
- ["This technology is absolutely amazing!", "nova", "excited", True, 12345],
203
  ],
204
  inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input],
205
- outputs=[audio_output, status_output], # Outputs match the click function
206
- fn=text_to_speech_app, # The function to call for examples
207
- cache_examples=False, # Might be good to disable caching if APIs change or have quotas
208
  )
209
 
210
- # --- Launch the App ---
211
  if __name__ == "__main__":
212
- app.launch()
213
-
 
 
 
 
 
6
  import tempfile
7
  import os
8
 
9
+ NSFW_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
10
+ TTS_URL_TEMPLATE = os.getenv("TTS_API_URL_TEMPLATE")
11
+
12
+
13
+ if not NSFW_URL_TEMPLATE:
14
+ raise ValueError("Missing Secret: NSFW_API_URL_TEMPLATE is not set in Hugging Face Space secrets.")
15
+ if not TTS_URL_TEMPLATE:
16
+ raise ValueError("Missing Secret: TTS_API_URL_TEMPLATE is not set in Hugging Face Space secrets.")
17
+ # VOICES
18
  VOICES = [
19
  "alloy", "echo", "fable", "onyx", "nova", "shimmer", # Standard OpenAI Voices
20
+ "coral", "verse", "ballad", "ash", "sage", "amuch", "dan" # Some additional pre-trained
21
  ]
22
 
 
 
23
 
 
24
 
25
  def check_nsfw(prompt: str) -> bool:
26
+ global NSFW_URL_TEMPLATE
27
  try:
 
28
  encoded_prompt = urllib.parse.quote(prompt)
29
  url = NSFW_URL_TEMPLATE.format(prompt=encoded_prompt)
30
+ print(f"DEBUG: Checking NSFW URL: {url.split('?')[0]}... (query params hidden)")
31
 
32
+ response = requests.get(url, timeout=20)
33
+ response.raise_for_status()
34
 
35
  result = response.text.strip().upper()
36
+ print(f"DEBUG: NSFW Check Response: '{result}'")
37
 
38
  if result == "YES":
39
  return True
40
  elif result == "NO":
41
  return False
42
  else:
 
43
  print(f"Warning: Unexpected response from NSFW checker: {response.text}")
44
+ return True # unexpected responses = potentially NSFW
 
 
45
 
46
  except requests.exceptions.RequestException as e:
47
  print(f"Error during NSFW check: {e}")
 
48
  raise gr.Error(f"Failed to check prompt safety: {e}")
49
  except Exception as e:
50
  print(f"Unexpected error during NSFW check: {e}")
 
52
 
53
 
54
  def generate_audio(prompt: str, voice: str, emotion: str, seed: int) -> bytes:
55
+ # Generates audio using the API from server
56
+ global TTS_URL_TEMPLATE
57
  try:
 
58
  encoded_prompt = urllib.parse.quote(prompt)
59
  encoded_emotion = urllib.parse.quote(emotion)
60
 
 
64
  voice=voice,
65
  seed=seed
66
  )
67
+ print(f"DEBUG: Generating Audio URL: {url.split('?')[0]}... (query params hidden)")
68
 
69
+ response = requests.get(url, timeout=60)
70
+ response.raise_for_status()
71
 
 
72
  content_type = response.headers.get('content-type', '').lower()
73
  if 'audio' not in content_type:
74
  print(f"Warning: Unexpected content type received: {content_type}")
75
+ print(f"Response Text: {response.text[:500]}")
76
  raise gr.Error(f"API did not return audio. Response: {response.text[:200]}")
77
 
78
+ return response.content
79
 
80
  except requests.exceptions.RequestException as e:
81
  print(f"Error during audio generation: {e}")
 
82
  error_details = ""
83
  if hasattr(e, 'response') and e.response is not None:
84
+ error_details = e.response.text[:200]
85
  raise gr.Error(f"Failed to generate audio: {e}. Details: {error_details}")
86
  except Exception as e:
87
  print(f"Unexpected error during audio generation: {e}")
88
  raise gr.Error(f"An unexpected error occurred during audio generation: {e}")
89
 
90
+
91
 
92
  def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int):
93
+
 
 
 
94
  if not prompt:
95
  raise gr.Error("Prompt cannot be empty.")
96
  if not emotion:
 
97
  emotion = "neutral"
98
  print("Warning: No emotion provided, defaulting to 'neutral'.")
 
99
  if not voice:
100
  raise gr.Error("Please select a voice.")
101
 
 
102
  seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)
103
  print(f"Using Seed: {seed}")
104
 
105
+ # check NSFW
106
  print("Checking prompt safety...")
107
  try:
108
  is_nsfw = check_nsfw(prompt)
109
  except gr.Error as e:
110
+ return None, str(e)
 
111
 
112
  if is_nsfw:
113
  print("Prompt flagged as inappropriate.")
 
114
  return None, "Error: The prompt was flagged as inappropriate and cannot be processed."
115
 
116
+ # if not nsfw
117
  print("Prompt is safe. Generating audio...")
118
  try:
119
  audio_bytes = generate_audio(prompt, voice, emotion, seed)
120
 
121
+ # audio save to a temporary file
 
 
122
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
123
  temp_audio_file.write(audio_bytes)
124
  temp_file_path = temp_audio_file.name
125
  print(f"Audio saved temporarily to: {temp_file_path}")
126
 
 
127
  return temp_file_path, f"Audio generated successfully with voice '{voice}', emotion '{emotion}', and seed {seed}."
128
 
129
  except gr.Error as e:
130
+ return None, str(e)
 
131
  except Exception as e:
132
  print(f"Unexpected error in main function: {e}")
133
  return None, f"An unexpected error occurred: {e}"
134
 
135
 
136
+
137
 
138
  def toggle_seed_input(use_random_seed):
139
+
140
+ return gr.update(visible=not use_random_seed, value=12345)
141
 
142
  with gr.Blocks() as app:
143
+ gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited")
144
  gr.Markdown(
145
  "Enter text, choose a voice and emotion, and generate audio. "
146
+ "The text will be checked for appropriateness before generation. "
147
+ "Use it as much as you want."
148
  )
149
 
150
  with gr.Row():
151
  with gr.Column(scale=2):
152
  prompt_input = gr.Textbox(label="Prompt", placeholder="Enter the text you want to convert to speech...")
153
  emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...")
154
+ voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy")
155
  with gr.Column(scale=1):
156
  random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
157
+ seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0)
158
 
159
  submit_button = gr.Button("Generate Audio", variant="primary")
160
 
161
  with gr.Row():
162
+ audio_output = gr.Audio(label="Generated Audio", type="filepath")
163
+ status_output = gr.Textbox(label="Status")
164
+
165
 
 
166
  random_seed_checkbox.change(
167
  fn=toggle_seed_input,
168
  inputs=[random_seed_checkbox],
 
178
  random_seed_checkbox,
179
  seed_input
180
  ],
181
+ outputs=[audio_output, status_output]
182
  )
183
 
184
+
185
  gr.Examples(
186
  examples=[
187
  ["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", True, 12345],
188
+ ["Surely you wouldn't want *that*. [laughs]", "shimmer", "sarcastic and mocking", True, 12345],
189
+ ["I am feeling...a bit down today. [sighs]", "fable", "sad and depressed, with stammering", False, 9876],
190
+ ["This technology is absolutely amazing!", "nova", "excited and joyful", True, 12345],
191
  ],
192
  inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input],
193
+ outputs=[audio_output, status_output],
194
+ fn=text_to_speech_app,
195
+ cache_examples=False,
196
  )
197
 
198
+
199
  if __name__ == "__main__":
200
+
201
+ if NSFW_URL_TEMPLATE and TTS_URL_TEMPLATE:
202
+ app.launch()
203
+ else:
204
+ print("ERROR: Cannot launch app. Required API URL secrets are missing.")
205
+