|
|
|
import gradio as gr |
|
import requests |
|
import random |
|
import urllib.parse |
|
import tempfile |
|
import os |
|
|
|
NSFW_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE") |
|
TTS_URL_TEMPLATE = os.getenv("TTS_API_URL_TEMPLATE") |
|
|
|
|
|
if not NSFW_URL_TEMPLATE: |
|
raise ValueError("Missing Secret: NSFW_API_URL_TEMPLATE is not set in Hugging Face Space secrets.") |
|
if not TTS_URL_TEMPLATE: |
|
raise ValueError("Missing Secret: TTS_API_URL_TEMPLATE is not set in Hugging Face Space secrets.") |
|
|
|
VOICES = [ |
|
"alloy", "echo", "fable", "onyx", "nova", "shimmer", |
|
"coral", "verse", "ballad", "ash", "sage", "amuch", "dan" |
|
] |
|
|
|
|
|
|
|
def check_nsfw(prompt: str) -> bool: |
|
global NSFW_URL_TEMPLATE |
|
try: |
|
encoded_prompt = urllib.parse.quote(prompt) |
|
url = NSFW_URL_TEMPLATE.format(prompt=encoded_prompt) |
|
print(f"DEBUG: Checking NSFW URL: {url.split('?')[0]}... (query params hidden)") |
|
|
|
response = requests.get(url, timeout=20) |
|
response.raise_for_status() |
|
|
|
result = response.text.strip().upper() |
|
print(f"DEBUG: NSFW Check Response: '{result}'") |
|
|
|
if result == "YES": |
|
return True |
|
elif result == "NO": |
|
return False |
|
else: |
|
print(f"Warning: Unexpected response from NSFW checker: {response.text}") |
|
return True |
|
|
|
except requests.exceptions.RequestException as e: |
|
print(f"Error during NSFW check: {e}") |
|
raise gr.Error(f"Failed to check prompt safety.") |
|
except Exception as e: |
|
print(f"Unexpected error during NSFW check: {e}") |
|
raise gr.Error(f"An unexpected error occurred during safety check. Please wait for a second and try again.") |
|
|
|
|
|
def generate_audio(prompt: str, voice: str, emotion: str, seed: int) -> bytes: |
|
|
|
global TTS_URL_TEMPLATE |
|
try: |
|
encoded_prompt = urllib.parse.quote(prompt) |
|
encoded_emotion = urllib.parse.quote(emotion) |
|
|
|
url = TTS_URL_TEMPLATE.format( |
|
prompt=encoded_prompt, |
|
emotion=encoded_emotion, |
|
voice=voice, |
|
seed=seed |
|
) |
|
print(f"DEBUG: Generating Audio URL: {url.split('?')[0]}... (query params hidden)") |
|
|
|
response = requests.get(url, timeout=60) |
|
response.raise_for_status() |
|
|
|
content_type = response.headers.get('content-type', '').lower() |
|
if 'audio' not in content_type: |
|
print(f"Warning: Unexpected content type received: {content_type}") |
|
print(f"Response Text: {response.text[:500]}") |
|
raise gr.Error(f"API did not return audio.") |
|
|
|
return response.content |
|
|
|
except requests.exceptions.RequestException as e: |
|
print(f"Error during audio generation: {e}") |
|
error_details = "" |
|
if hasattr(e, 'response') and e.response is not None: |
|
error_details = e.response.text[:200] |
|
raise gr.Error(f"Failed to generate audio. Please wait for a second and try again.") |
|
except Exception as e: |
|
print(f"Unexpected error during audio generation: {e}") |
|
raise gr.Error(f"An unexpected error occurred during audio generation. Please wait for a second and try again.") |
|
|
|
|
|
|
|
def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int): |
|
|
|
print("\n\n\n"+prompt+"\n\n\n") |
|
if not prompt: |
|
raise gr.Error("Prompt cannot be empty.") |
|
if not emotion: |
|
emotion = "neutral" |
|
print("Warning: No emotion provided, defaulting to 'neutral'.") |
|
if not voice: |
|
raise gr.Error("Please select a voice.") |
|
|
|
seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed) |
|
print(f"Using Seed: {seed}") |
|
|
|
|
|
print("Checking prompt safety...") |
|
try: |
|
|
|
is_nsfw = False |
|
except gr.Error as e: |
|
return None, f"There was an error. Please wait for a second and try again." |
|
|
|
if is_nsfw: |
|
print("Prompt flagged as inappropriate.") |
|
return None, "Error: The prompt was flagged as inappropriate and cannot be processed." |
|
|
|
|
|
print("Prompt is safe. Generating audio...") |
|
try: |
|
audio_bytes = generate_audio(prompt, voice, emotion, seed) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file: |
|
temp_audio_file.write(audio_bytes) |
|
temp_file_path = temp_audio_file.name |
|
print(f"Audio saved temporarily to: {temp_file_path}") |
|
|
|
return temp_file_path, f"Audio generated successfully with voice '{voice}', emotion '{emotion}', and seed {seed}." |
|
|
|
except gr.Error as e: |
|
return None, str(e) |
|
except Exception as e: |
|
print(f"Unexpected error in main function: {e}") |
|
return None, f"An unexpected error occurred: {e}" |
|
|
|
|
|
|
|
|
|
def toggle_seed_input(use_random_seed): |
|
|
|
return gr.update(visible=not use_random_seed, value=12345) |
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited") |
|
gr.Markdown( |
|
"""Enter text, choose a voice and emotion, and generate audio. |
|
The text will be checked for appropriateness before generation. |
|
Use it as much as you want. |
|
|
|
|
|
**Like & follow** for more AI projects: |
|
|
|
|
|
• Instagram: [@nihal_gazi_io](https://www.instagram.com/nihal_gazi_io/) |
|
• Discord: nihal_gazi_io""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
prompt_input = gr.Textbox(label="Prompt", placeholder="Enter the text you want to convert to speech...") |
|
emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...") |
|
voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy") |
|
with gr.Column(scale=1): |
|
random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True) |
|
seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0) |
|
|
|
submit_button = gr.Button("Generate Audio", variant="primary") |
|
|
|
with gr.Row(): |
|
audio_output = gr.Audio(label="Generated Audio", type="filepath") |
|
status_output = gr.Textbox(label="Status") |
|
|
|
|
|
random_seed_checkbox.change( |
|
fn=toggle_seed_input, |
|
inputs=[random_seed_checkbox], |
|
outputs=[seed_input] |
|
) |
|
|
|
submit_button.click( |
|
fn=text_to_speech_app, |
|
inputs=[ |
|
prompt_input, |
|
voice_dropdown, |
|
emotion_input, |
|
random_seed_checkbox, |
|
seed_input |
|
], |
|
outputs=[audio_output, status_output], |
|
concurrency_limit=30 |
|
) |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345], |
|
["Surely *you* wouldn't want *that*. [laughs]", "shimmer", "sarcastic and mocking", True, 12345], |
|
["[sobbing] I am feeling... [sighs] a bit down today [cry]", "ballad", "sad and depressed, with stammering", True, 662437], |
|
["This technology is absolutely amazing!", "nova", "excited and joyful", True, 12345], |
|
], |
|
inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input], |
|
outputs=[audio_output, status_output], |
|
fn=text_to_speech_app, |
|
cache_examples=False, |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
if NSFW_URL_TEMPLATE and TTS_URL_TEMPLATE: |
|
app.launch() |
|
else: |
|
print("ERROR: Cannot launch app. Required API URL secrets are missing.") |
|
|