helvekami commited on
Commit
400fc00
·
1 Parent(s): 86fab4a

Updated Gradio App

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -20,7 +20,7 @@ def process_audio(audio):
20
  return "No audio provided. Please upload or record an audio file."
21
 
22
  try:
23
- # Gradio returns a tuple: (sample_rate, numpy_array)
24
  sample_rate, audio_data = audio
25
  except Exception as e:
26
  return f"Error processing audio input: {e}"
@@ -28,7 +28,14 @@ def process_audio(audio):
28
  if audio_data is None or len(audio_data) == 0:
29
  return "Audio data is empty. Please try again with a valid audio file."
30
 
31
- # Convert audio data to float if not already floating-point.
 
 
 
 
 
 
 
32
  if not np.issubdtype(audio_data.dtype, np.floating):
33
  audio_data = audio_data.astype(np.float32)
34
 
@@ -62,12 +69,12 @@ def process_audio(audio):
62
  # Create the Gradio interface.
63
  iface = gr.Interface(
64
  fn=process_audio,
65
- inputs=gr.Audio(type="numpy"), # File upload for audio.
66
  outputs="text",
67
  title="Sarvam AI Shuka Voice Demo",
68
  description="Upload an audio file and get a response using Sarvam AI's Shuka model."
69
  )
70
 
71
  if __name__ == "__main__":
72
- # Set share=True to create a public link, and specify a server port.
73
  iface.launch(share=True, server_port=7861)
 
20
  return "No audio provided. Please upload or record an audio file."
21
 
22
  try:
23
+ # Gradio returns a tuple: (sample_rate, audio_data)
24
  sample_rate, audio_data = audio
25
  except Exception as e:
26
  return f"Error processing audio input: {e}"
 
28
  if audio_data is None or len(audio_data) == 0:
29
  return "Audio data is empty. Please try again with a valid audio file."
30
 
31
+ # Ensure audio_data is a numpy array.
32
+ audio_data = np.asarray(audio_data)
33
+
34
+ # If audio data is multi-dimensional, squeeze to 1D.
35
+ if audio_data.ndim > 1:
36
+ audio_data = np.squeeze(audio_data)
37
+
38
+ # Convert audio data to floating-point if it's not already.
39
  if not np.issubdtype(audio_data.dtype, np.floating):
40
  audio_data = audio_data.astype(np.float32)
41
 
 
69
  # Create the Gradio interface.
70
  iface = gr.Interface(
71
  fn=process_audio,
72
+ inputs=gr.Audio(type="numpy"), # Using file upload for audio input.
73
  outputs="text",
74
  title="Sarvam AI Shuka Voice Demo",
75
  description="Upload an audio file and get a response using Sarvam AI's Shuka model."
76
  )
77
 
78
  if __name__ == "__main__":
79
+ # Set share=True to create a public link and use a non-default port.
80
  iface.launch(share=True, server_port=7861)