Spaces:

helvekami
/

ShukaNote

Running

App Files Files Community

ShukaNote / app.py

helvekami

Updated Gradio App

fbc6758 4 months ago

raw

history blame

2.35 kB

	import gradio as gr
	import transformers
	import librosa
	import torch

	# Load the Shuka model pipeline.
	pipe = transformers.pipeline(
	model="sarvamai/shuka_v1",
	trust_remote_code=True,
	device=0 if torch.cuda.is_available() else -1,
	torch_dtype=torch.bfloat16 if torch.cuda.is_available() else None
	)

	def process_audio(audio):
	"""
	Processes the input audio and returns a text response generated by the Shuka model.
	"""
	if audio is None:
	return "No audio provided. Please upload or record an audio file."

	try:
	# Gradio returns a tuple: (sample_rate, numpy_array)
	sample_rate, audio_data = audio
	except Exception as e:
	return f"Error processing audio input: {e}"

	if audio_data is None or len(audio_data) == 0:
	return "Audio data is empty. Please try again with a valid audio file."

	# Resample to 16000 Hz if necessary
	if sample_rate != 16000:
	try:
	audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
	sample_rate = 16000
	except Exception as e:
	return f"Error during resampling: {e}"

	# Define conversation turns for the model
	turns = [
	{'role': 'system', 'content': 'Respond naturally and informatively.'},
	{'role': 'user', 'content': '<\|audio\|>'}
	]

	try:
	result = pipe({'audio': audio_data, 'turns': turns, 'sampling_rate': sample_rate}, max_new_tokens=512)
	except Exception as e:
	return f"Error during model processing: {e}"

	# Extract generated text
	if isinstance(result, list) and len(result) > 0:
	response = result[0].get('generated_text', '')
	else:
	response = str(result)

	return response

	# Create the Gradio interface.
	# If you wish to record audio directly, you may need to upgrade Gradio to a version that supports "source" for the Audio component.
	iface = gr.Interface(
	fn=process_audio,
	inputs=gr.Audio(type="numpy"), # using file upload input for audio
	outputs="text",
	title="Sarvam AI Shuka Voice Demo",
	description="Upload an audio file and get a response using Sarvam AI's Shuka model."
	)

	if __name__ == "__main__":
	# If port 7860 is in use, you can specify another port (here we use 7861)
	iface.launch(server_port=7861)