Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,897 Bytes
2ed7223 011a958 2ed7223 c621812 8b70c99 011a958 039f770 011a958 62dda31 dc03737 011a958 dc03737 8b70c99 c621812 dc03737 2ed7223 011a958 ab07d9e 8b70c99 2ed7223 011a958 2ed7223 011a958 c621812 011a958 2ed7223 011a958 c621812 011a958 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import transformers
import gradio as gr
import torch
import numpy as np
from typing import Dict, List
import spaces
# Constants
MODEL_NAME = 'sarvamai/shuka_v1'
SAMPLE_RATE = 16000
MAX_NEW_TOKENS = 256
# Load the ShukaPipeline
def load_pipeline():
model = transformers.AutoModel.from_pretrained(MODEL_NAME, trust_remote_code=True)
pipeline = transformers.pipeline(
"shuka-pipeline",
model=model,
torch_dtype=torch.float16,
device=0 if torch.cuda.is_available() else -1,
)
return pipeline
pipe = load_pipeline()
def create_conversation_turns(prompt: str) -> List[Dict[str, str]]:
return [
{'role': 'system', 'content': 'Respond naturally and informatively.'},
{'role': 'user', 'content': prompt}
]
@spaces.GPU(duration=120)
def transcribe_and_respond(audio: np.ndarray) -> str:
try:
# Ensure audio is float32
if audio.dtype != np.float32:
audio = audio.astype(np.float32)
# Create input for the pipeline
turns = create_conversation_turns("<|audio|>")
inputs = {
'audio': audio,
'turns': turns,
'sampling_rate': SAMPLE_RATE
}
# Generate response
response = pipe(inputs, max_new_tokens=MAX_NEW_TOKENS, temperature=0.7, repetition_penalty=1.1)
return response
except Exception as e:
return f"Error processing audio: {str(e)}"
# Create the Gradio interface
iface = gr.Interface(
fn=transcribe_and_respond,
inputs=gr.Audio(sources="microphone", type="numpy", sampling_rate=SAMPLE_RATE),
outputs="text",
title="Live Voice Input for Transcription and Response",
description="Speak into your microphone, and the model will respond naturally and informatively.",
live=True
)
# Launch the app
if __name__ == "__main__":
iface.launch() |