Spaces:

archit11
/

shuka_demo

Sleeping

shuka_demo / app.py

Update app.py

ab07d9e verified 11 months ago

1.51 kB

	import transformers
	import librosa
	import gradio as gr
	import spaces

	# Load the model pipeline on GPU:0
	pipe = transformers.pipeline(
	model='sarvamai/shuka_v1',
	trust_remote_code=True,
	device=0,
	torch_dtype='bfloat16'
	)

	@spaces.GPU(duration=120)
	def transcribe_and_respond(audio_file):
	try:
	# Check if the audio file is valid and exists
	if audio_file is None or not isinstance(audio_file, str):
	raise ValueError("Invalid audio file input.")

	# Load the audio using librosa
	audio, sr = librosa.load(audio_file, sr=16000)

	# Prepare the conversation turns
	turns = [
	{'role': 'system', 'content': 'Respond naturally and informatively.'},
	{'role': 'user', 'content': ''}
	]

	# Run inference with the pipeline
	response = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)

	return response
	except Exception as e:
	return f"Error processing audio: {str(e)}"

	# Create the Gradio interface with microphone input
	iface = gr.Interface(
	fn=transcribe_and_respond,
	inputs=gr.Audio(sources="microphone", type="filepath"), # Use the microphone for audio input
	outputs="text", # The output will be a text response
	title="Voice Input for Transcription and Response",
	description="Record your voice, and the model will respond naturally and informatively."
	)

	# Launch the Gradio app
	iface.launch()