Spaces:

CohereLabs
/

aya_expanse

Running on T4

App Files Files Community

shivalikasingh commited on 14 days ago

Commit

e147434

1 Parent(s): dbb8f6f

Persist changes

Browse files

Files changed (2) hide show

app.py +13 -7
aya_vision_utils.py +11 -5

app.py CHANGED Viewed

@@ -68,7 +68,8 @@ eleven_labs_client = ElevenLabs(
 BUCKET_NAME = os.getenv("BUCKET_NAME")
 AUDIO_BUCKET = os.getenv("AUDIO_BUCKET")
 IMAGE_STORAGE_PATH = os.getenv("IMAGE_STORAGE_PATH")
-AUDIO_STORAGE_PATH = os.getenv("AUDIO_STORAGE_PATH")
 SAVING_ENABLED = True
 storage_client = storage.Client()
@@ -274,7 +275,7 @@ def clean_text(text, remove_bullets=False, remove_newline=False):
     return cleaned_text
-def convert_text_to_speech(transcript, text, language="english"):
     # do language detection to determine voice of speech response
     if text:
@@ -298,13 +299,18 @@ def convert_text_to_speech(transcript, text, language="english"):
             audio_path = elevenlabs_generate_audio(text)
         if SAVING_ENABLED:
-            unique_id = str(uuid.uuid4())
-            blob = audio_bucket.blob(AUDIO_STORAGE_PATH + unique_id + "_" + audio_path)
             blob.upload_from_filename(audio_path)
-            gcp_audio_path = f"gs://{BUCKET_NAME}/{AUDIO_STORAGE_PATH}{unique_id}_{audio_path}"
-            insert_aya_audio(connection, transcript, text, gcp_audio_path)
         return audio_path
     else:
@@ -595,7 +601,7 @@ with demo:
         show_progress="full",
     ).then(
         convert_text_to_speech,
-        inputs=[e2e_audio_file_trans, e2e_audio_file_aya_response],
         outputs=[e2e_aya_audio_response],
         show_progress="full",
     )

 BUCKET_NAME = os.getenv("BUCKET_NAME")
 AUDIO_BUCKET = os.getenv("AUDIO_BUCKET")
 IMAGE_STORAGE_PATH = os.getenv("IMAGE_STORAGE_PATH")
+AUDIO_INPUT_STORAGE_PATH = os.getenv("AUDIO_INPUT_STORAGE_PATH")
+AUDIO_RESPONSE_STORAGE_PATH = os.getenv("AUDIO_RESPONSE_STORAGE_PATH")
 SAVING_ENABLED = True
 storage_client = storage.Client()
     return cleaned_text
+def convert_text_to_speech(input_audio_file, transcript, text, language="english"):
     # do language detection to determine voice of speech response
     if text:
             audio_path = elevenlabs_generate_audio(text)
         if SAVING_ENABLED:
+            input_audio_file_unique_id = str(uuid.uuid4())
+            response_audio_file_unique_id = str(uuid.uuid4())
+            blob = audio_bucket.blob(AUDIO_INPUT_STORAGE_PATH + input_audio_file_unique_id + "_" + input_audio_file)
+            blob.upload_from_filename(input_audio_file)
+            gcp_input_audio_file_path = f"gs://{BUCKET_NAME}/{AUDIO_INPUT_STORAGE_PATH}{input_audio_file_unique_id}_{input_audio_file}"
+            blob = audio_bucket.blob(AUDIO_RESPONSE_STORAGE_PATH + response_audio_file_unique_id + "_" + audio_path)
             blob.upload_from_filename(audio_path)
+            gcp_response_audio_file_path = f"gs://{BUCKET_NAME}/{AUDIO_RESPONSE_STORAGE_PATH}{response_audio_file_unique_id}_{audio_path}"
+            insert_aya_audio(connection, transcript, text, gcp_response_audio_file_path, gcp_input_audio_file_path)
         return audio_path
     else:
         show_progress="full",
     ).then(
         convert_text_to_speech,
+        inputs=[e2e_audio_file, e2e_audio_file_trans, e2e_audio_file_aya_response],
         outputs=[e2e_aya_audio_response],
         show_progress="full",
     )

aya_vision_utils.py CHANGED Viewed

@@ -107,15 +107,21 @@ def get_base64_image_size(base64_string):
     return size_bytes
-def insert_aya_audio(connection, user_prompt, text_response, audio_response_file_path):
     with connection.begin():
         connection.execute(
             sqlalchemy.text("""
-                INSERT INTO aya_audio (user_prompt, text_response, audio_response_file_path, timestamp)
-                VALUES (:user_prompt, :text_response, :audio_response_file_path, :timestamp)
             """),
-            {"user_prompt": user_prompt, "text_response": text_response, "audio_response_file_path": audio_response_file_path, "timestamp": datetime.now()}
-        )
 def insert_aya_image(connection, user_prompt, generated_img_desc, image_response_file_path):
     with connection.begin():

     return size_bytes
+def insert_aya_audio(connection, user_prompt, text_response, audio_response_file_path, input_audio_file_path):
     with connection.begin():
         connection.execute(
             sqlalchemy.text("""
+                INSERT INTO aya_audio (user_prompt, text_response, input_audio_file_path, audio_response_file_path, timestamp)
+                VALUES (:user_prompt, :text_response, :input_audio_file_path, :audio_response_file_path, :timestamp)
             """),
+            {
+                "user_prompt": user_prompt,
+                "text_response": text_response,
+                "input_audio_file_path": input_audio_file_path,
+                "audio_response_file_path": audio_response_file_path,
+                "timestamp": datetime.now()
+            }
+        )
 def insert_aya_image(connection, user_prompt, generated_img_desc, image_response_file_path):
     with connection.begin():