Spaces:
Running
Running
File size: 3,981 Bytes
cfb4e8c c68be50 22c5bdb c68be50 cfb4e8c c68be50 de76a17 c68be50 de76a17 22c5bdb b97cf3c 1bccd9f de76a17 c68be50 22c5bdb c68be50 1bccd9f c68be50 22c5bdb c68be50 1bccd9f c68be50 22c5bdb de76a17 c68be50 ff9e518 c68be50 de76a17 22c5bdb de76a17 1bccd9f de76a17 22c5bdb a2f7134 4201079 a2f7134 2e0b130 a6bd1ac 2e0b130 a2f7134 de76a17 6c152e8 d6282fe 6c152e8 de76a17 c68be50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import gradio as gr
import requests
import json
import os
API_KEY = os.getenv("API_KEY")
if not API_KEY:
raise ValueError("API_KEY environment variable must be set")
def process_audio_stream(audio_path, max_tokens):
"""
Process audio with streaming response via HTTP
"""
if not audio_path:
yield "Please upload or record an audio file first."
return
try:
# Read and prepare audio file
with open(audio_path, 'rb') as audio_file:
files = {
'audio_file': ('audio.wav', audio_file, 'audio/wav')
}
data = {
'prompt': "",
'max_tokens': max_tokens
}
headers = {
'X-API-Key': API_KEY
}
# Make streaming request
response = requests.post(
'https://nexa-omni.nexa4ai.com/process-audio/',
files=files,
data=data,
headers=headers,
stream=True
)
if response.status_code != 200:
yield f"Error: Server returned status code {response.status_code}"
return
# Initialize response
response_text = ""
token_count = 0
# Process the streaming response
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
try:
data = json.loads(line[6:]) # Skip 'data: ' prefix
if data["status"] == "generating":
if token_count < 3 and data["token"] in [" ", " \n", "\n", "<|im_start|>", "assistant"]:
token_count += 1
continue
response_text += data["token"]
gr.update(value=response_text)
yield response_text
elif data["status"] == "complete":
break
elif data["status"] == "error":
yield f"Error: {data['error']}"
break
except json.JSONDecodeError:
continue
except Exception as e:
yield f"Error processing request: {str(e)}"
# Create Gradio interface with specific queue configurations
demo = gr.Interface(
fn=process_audio_stream,
inputs=[
gr.Audio(
type="filepath",
label="Upload or Record Audio",
sources=["upload", "microphone"]
),
gr.Slider(
minimum=50,
maximum=200,
value=50,
step=1,
label="Max Tokens"
)
],
outputs=gr.Textbox(label="Response", interactive=False),
title="NEXA OmniAudio-2.6B",
description=f"""
OmniAudio-2.6B is a compact audio-language model optimized for edge deployment.
Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniAudio-2.6B">NexaAIDev/OmniAudio-2.6B</a>
Blog: <a href="https://nexa.ai/blogs/omniaudio-2.6b">OmniAudio-2.6B Blog</a>
Upload an audio file and optionally provide a prompt to analyze the audio content.""",
examples=[
["example_audios/voice_qa.mp3", 200],
["example_audios/voice_in_conversation.mp3", 200],
["example_audios/creative_content_generation.mp3", 200],
["example_audios/record_summary.mp3", 200],
["example_audios/change_tone.mp3", 200],
]
)
if __name__ == "__main__":
# Configure the queue for better streaming performance
demo.queue(
max_size=20,
).launch(
server_name="0.0.0.0",
server_port=7860,
)
|