Spaces:

stevenhillis
/

intone_mvp

Sleeping

stevenhillis commited on Sep 20, 2023

Commit

f3e678b

1 Parent(s): b94d1ec

update endpoint; use files object to avoid binary

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,17 +9,15 @@ import numpy as np
 from scipy.io import wavfile
-base_url = "https://api.sandbox.deepgram.com/nlu"
 token_str = os.environ['DG_TOKEN']
 def tts_fn(text, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps):
     texts = [text]
     sr = prompt_audio[0]
     prompt_audio = prompt_audio[1].astype(np.float32, order='C') / 32768.0
-    byte_io = io.BytesIO(bytes())
-    wavfile.write(byte_io, sr, prompt_audio)
-    prompt_audio_bytes = byte_io.read()
-    params={'synthesize': 'true', 'text': urllib.parse.quote(text), 'pitch_steps': int(pitch_steps), 'soundstorm_steps': inference_steps, 'temperature': inference_temperature, 'prompt_seconds': prompt_seconds}
-    response = requests.post(base_url, data=prompt_audio_bytes, params=params, headers={'Authorization': f'Token {token_str}'}).json()
     try:
         sample_rate = int(response['results'][0]['sample_rate'])
         audio = (np.array(response['results'][0]['audio']).transpose() / 1.414 * 32767).astype(np.int16)

 from scipy.io import wavfile
+base_url = "https://api.sandbox.deepgram.com/tts"
 token_str = os.environ['DG_TOKEN']
 def tts_fn(text, prompt_audio, prompt_seconds, inference_steps, inference_temperature, pitch_steps):
     texts = [text]
     sr = prompt_audio[0]
     prompt_audio = prompt_audio[1].astype(np.float32, order='C') / 32768.0
+    params={'synthesize': 'true', 'pitch_steps': int(pitch_steps), 'soundstorm_steps': inference_steps, 'temperature': inference_temperature, 'prompt_seconds': prompt_seconds}
+    files=[('texts', ('texts', json.dumps(texts), 'application/json')), ('prompt_audio', ('prompt_audio', json.dumps(prompt_audio), 'application/json'))]
+    response = requests.post(base_url, files=files, params=params, headers={'Authorization': f'Token {token_str}'}).json()
     try:
         sample_rate = int(response['results'][0]['sample_rate'])
         audio = (np.array(response['results'][0]['audio']).transpose() / 1.414 * 32767).astype(np.int16)