Spaces:
Running
on
T4
Running
on
T4
Commit
·
e147434
1
Parent(s):
dbb8f6f
Persist changes
Browse files- app.py +13 -7
- aya_vision_utils.py +11 -5
app.py
CHANGED
@@ -68,7 +68,8 @@ eleven_labs_client = ElevenLabs(
|
|
68 |
BUCKET_NAME = os.getenv("BUCKET_NAME")
|
69 |
AUDIO_BUCKET = os.getenv("AUDIO_BUCKET")
|
70 |
IMAGE_STORAGE_PATH = os.getenv("IMAGE_STORAGE_PATH")
|
71 |
-
|
|
|
72 |
SAVING_ENABLED = True
|
73 |
|
74 |
storage_client = storage.Client()
|
@@ -274,7 +275,7 @@ def clean_text(text, remove_bullets=False, remove_newline=False):
|
|
274 |
|
275 |
return cleaned_text
|
276 |
|
277 |
-
def convert_text_to_speech(transcript, text, language="english"):
|
278 |
|
279 |
# do language detection to determine voice of speech response
|
280 |
if text:
|
@@ -298,13 +299,18 @@ def convert_text_to_speech(transcript, text, language="english"):
|
|
298 |
audio_path = elevenlabs_generate_audio(text)
|
299 |
|
300 |
if SAVING_ENABLED:
|
301 |
-
|
|
|
|
|
|
|
|
|
|
|
302 |
|
303 |
-
blob = audio_bucket.blob(
|
304 |
blob.upload_from_filename(audio_path)
|
305 |
-
|
306 |
|
307 |
-
insert_aya_audio(connection, transcript, text,
|
308 |
|
309 |
return audio_path
|
310 |
else:
|
@@ -595,7 +601,7 @@ with demo:
|
|
595 |
show_progress="full",
|
596 |
).then(
|
597 |
convert_text_to_speech,
|
598 |
-
inputs=[e2e_audio_file_trans, e2e_audio_file_aya_response],
|
599 |
outputs=[e2e_aya_audio_response],
|
600 |
show_progress="full",
|
601 |
)
|
|
|
68 |
BUCKET_NAME = os.getenv("BUCKET_NAME")
|
69 |
AUDIO_BUCKET = os.getenv("AUDIO_BUCKET")
|
70 |
IMAGE_STORAGE_PATH = os.getenv("IMAGE_STORAGE_PATH")
|
71 |
+
AUDIO_INPUT_STORAGE_PATH = os.getenv("AUDIO_INPUT_STORAGE_PATH")
|
72 |
+
AUDIO_RESPONSE_STORAGE_PATH = os.getenv("AUDIO_RESPONSE_STORAGE_PATH")
|
73 |
SAVING_ENABLED = True
|
74 |
|
75 |
storage_client = storage.Client()
|
|
|
275 |
|
276 |
return cleaned_text
|
277 |
|
278 |
+
def convert_text_to_speech(input_audio_file, transcript, text, language="english"):
|
279 |
|
280 |
# do language detection to determine voice of speech response
|
281 |
if text:
|
|
|
299 |
audio_path = elevenlabs_generate_audio(text)
|
300 |
|
301 |
if SAVING_ENABLED:
|
302 |
+
input_audio_file_unique_id = str(uuid.uuid4())
|
303 |
+
response_audio_file_unique_id = str(uuid.uuid4())
|
304 |
+
|
305 |
+
blob = audio_bucket.blob(AUDIO_INPUT_STORAGE_PATH + input_audio_file_unique_id + "_" + input_audio_file)
|
306 |
+
blob.upload_from_filename(input_audio_file)
|
307 |
+
gcp_input_audio_file_path = f"gs://{BUCKET_NAME}/{AUDIO_INPUT_STORAGE_PATH}{input_audio_file_unique_id}_{input_audio_file}"
|
308 |
|
309 |
+
blob = audio_bucket.blob(AUDIO_RESPONSE_STORAGE_PATH + response_audio_file_unique_id + "_" + audio_path)
|
310 |
blob.upload_from_filename(audio_path)
|
311 |
+
gcp_response_audio_file_path = f"gs://{BUCKET_NAME}/{AUDIO_RESPONSE_STORAGE_PATH}{response_audio_file_unique_id}_{audio_path}"
|
312 |
|
313 |
+
insert_aya_audio(connection, transcript, text, gcp_response_audio_file_path, gcp_input_audio_file_path)
|
314 |
|
315 |
return audio_path
|
316 |
else:
|
|
|
601 |
show_progress="full",
|
602 |
).then(
|
603 |
convert_text_to_speech,
|
604 |
+
inputs=[e2e_audio_file, e2e_audio_file_trans, e2e_audio_file_aya_response],
|
605 |
outputs=[e2e_aya_audio_response],
|
606 |
show_progress="full",
|
607 |
)
|
aya_vision_utils.py
CHANGED
@@ -107,15 +107,21 @@ def get_base64_image_size(base64_string):
|
|
107 |
return size_bytes
|
108 |
|
109 |
|
110 |
-
def insert_aya_audio(connection, user_prompt, text_response, audio_response_file_path):
|
111 |
with connection.begin():
|
112 |
connection.execute(
|
113 |
sqlalchemy.text("""
|
114 |
-
INSERT INTO aya_audio (user_prompt, text_response, audio_response_file_path, timestamp)
|
115 |
-
VALUES (:user_prompt, :text_response, :audio_response_file_path, :timestamp)
|
116 |
"""),
|
117 |
-
{
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
def insert_aya_image(connection, user_prompt, generated_img_desc, image_response_file_path):
|
121 |
with connection.begin():
|
|
|
107 |
return size_bytes
|
108 |
|
109 |
|
110 |
+
def insert_aya_audio(connection, user_prompt, text_response, audio_response_file_path, input_audio_file_path):
|
111 |
with connection.begin():
|
112 |
connection.execute(
|
113 |
sqlalchemy.text("""
|
114 |
+
INSERT INTO aya_audio (user_prompt, text_response, input_audio_file_path, audio_response_file_path, timestamp)
|
115 |
+
VALUES (:user_prompt, :text_response, :input_audio_file_path, :audio_response_file_path, :timestamp)
|
116 |
"""),
|
117 |
+
{
|
118 |
+
"user_prompt": user_prompt,
|
119 |
+
"text_response": text_response,
|
120 |
+
"input_audio_file_path": input_audio_file_path,
|
121 |
+
"audio_response_file_path": audio_response_file_path,
|
122 |
+
"timestamp": datetime.now()
|
123 |
+
}
|
124 |
+
)
|
125 |
|
126 |
def insert_aya_image(connection, user_prompt, generated_img_desc, image_response_file_path):
|
127 |
with connection.begin():
|