shivalikasingh commited on
Commit
e147434
·
1 Parent(s): dbb8f6f

Persist changes

Browse files
Files changed (2) hide show
  1. app.py +13 -7
  2. aya_vision_utils.py +11 -5
app.py CHANGED
@@ -68,7 +68,8 @@ eleven_labs_client = ElevenLabs(
68
  BUCKET_NAME = os.getenv("BUCKET_NAME")
69
  AUDIO_BUCKET = os.getenv("AUDIO_BUCKET")
70
  IMAGE_STORAGE_PATH = os.getenv("IMAGE_STORAGE_PATH")
71
- AUDIO_STORAGE_PATH = os.getenv("AUDIO_STORAGE_PATH")
 
72
  SAVING_ENABLED = True
73
 
74
  storage_client = storage.Client()
@@ -274,7 +275,7 @@ def clean_text(text, remove_bullets=False, remove_newline=False):
274
 
275
  return cleaned_text
276
 
277
- def convert_text_to_speech(transcript, text, language="english"):
278
 
279
  # do language detection to determine voice of speech response
280
  if text:
@@ -298,13 +299,18 @@ def convert_text_to_speech(transcript, text, language="english"):
298
  audio_path = elevenlabs_generate_audio(text)
299
 
300
  if SAVING_ENABLED:
301
- unique_id = str(uuid.uuid4())
 
 
 
 
 
302
 
303
- blob = audio_bucket.blob(AUDIO_STORAGE_PATH + unique_id + "_" + audio_path)
304
  blob.upload_from_filename(audio_path)
305
- gcp_audio_path = f"gs://{BUCKET_NAME}/{AUDIO_STORAGE_PATH}{unique_id}_{audio_path}"
306
 
307
- insert_aya_audio(connection, transcript, text, gcp_audio_path)
308
 
309
  return audio_path
310
  else:
@@ -595,7 +601,7 @@ with demo:
595
  show_progress="full",
596
  ).then(
597
  convert_text_to_speech,
598
- inputs=[e2e_audio_file_trans, e2e_audio_file_aya_response],
599
  outputs=[e2e_aya_audio_response],
600
  show_progress="full",
601
  )
 
68
  BUCKET_NAME = os.getenv("BUCKET_NAME")
69
  AUDIO_BUCKET = os.getenv("AUDIO_BUCKET")
70
  IMAGE_STORAGE_PATH = os.getenv("IMAGE_STORAGE_PATH")
71
+ AUDIO_INPUT_STORAGE_PATH = os.getenv("AUDIO_INPUT_STORAGE_PATH")
72
+ AUDIO_RESPONSE_STORAGE_PATH = os.getenv("AUDIO_RESPONSE_STORAGE_PATH")
73
  SAVING_ENABLED = True
74
 
75
  storage_client = storage.Client()
 
275
 
276
  return cleaned_text
277
 
278
+ def convert_text_to_speech(input_audio_file, transcript, text, language="english"):
279
 
280
  # do language detection to determine voice of speech response
281
  if text:
 
299
  audio_path = elevenlabs_generate_audio(text)
300
 
301
  if SAVING_ENABLED:
302
+ input_audio_file_unique_id = str(uuid.uuid4())
303
+ response_audio_file_unique_id = str(uuid.uuid4())
304
+
305
+ blob = audio_bucket.blob(AUDIO_INPUT_STORAGE_PATH + input_audio_file_unique_id + "_" + input_audio_file)
306
+ blob.upload_from_filename(input_audio_file)
307
+ gcp_input_audio_file_path = f"gs://{BUCKET_NAME}/{AUDIO_INPUT_STORAGE_PATH}{input_audio_file_unique_id}_{input_audio_file}"
308
 
309
+ blob = audio_bucket.blob(AUDIO_RESPONSE_STORAGE_PATH + response_audio_file_unique_id + "_" + audio_path)
310
  blob.upload_from_filename(audio_path)
311
+ gcp_response_audio_file_path = f"gs://{BUCKET_NAME}/{AUDIO_RESPONSE_STORAGE_PATH}{response_audio_file_unique_id}_{audio_path}"
312
 
313
+ insert_aya_audio(connection, transcript, text, gcp_response_audio_file_path, gcp_input_audio_file_path)
314
 
315
  return audio_path
316
  else:
 
601
  show_progress="full",
602
  ).then(
603
  convert_text_to_speech,
604
+ inputs=[e2e_audio_file, e2e_audio_file_trans, e2e_audio_file_aya_response],
605
  outputs=[e2e_aya_audio_response],
606
  show_progress="full",
607
  )
aya_vision_utils.py CHANGED
@@ -107,15 +107,21 @@ def get_base64_image_size(base64_string):
107
  return size_bytes
108
 
109
 
110
- def insert_aya_audio(connection, user_prompt, text_response, audio_response_file_path):
111
  with connection.begin():
112
  connection.execute(
113
  sqlalchemy.text("""
114
- INSERT INTO aya_audio (user_prompt, text_response, audio_response_file_path, timestamp)
115
- VALUES (:user_prompt, :text_response, :audio_response_file_path, :timestamp)
116
  """),
117
- {"user_prompt": user_prompt, "text_response": text_response, "audio_response_file_path": audio_response_file_path, "timestamp": datetime.now()}
118
- )
 
 
 
 
 
 
119
 
120
  def insert_aya_image(connection, user_prompt, generated_img_desc, image_response_file_path):
121
  with connection.begin():
 
107
  return size_bytes
108
 
109
 
110
+ def insert_aya_audio(connection, user_prompt, text_response, audio_response_file_path, input_audio_file_path):
111
  with connection.begin():
112
  connection.execute(
113
  sqlalchemy.text("""
114
+ INSERT INTO aya_audio (user_prompt, text_response, input_audio_file_path, audio_response_file_path, timestamp)
115
+ VALUES (:user_prompt, :text_response, :input_audio_file_path, :audio_response_file_path, :timestamp)
116
  """),
117
+ {
118
+ "user_prompt": user_prompt,
119
+ "text_response": text_response,
120
+ "input_audio_file_path": input_audio_file_path,
121
+ "audio_response_file_path": audio_response_file_path,
122
+ "timestamp": datetime.now()
123
+ }
124
+ )
125
 
126
  def insert_aya_image(connection, user_prompt, generated_img_desc, image_response_file_path):
127
  with connection.begin():