|
import gradio as gr |
|
import aiohttp |
|
|
|
async def fetch(session, url, context, question): |
|
payload = { |
|
"context": context, |
|
"question": question |
|
} |
|
async with session.post(url, json=payload) as response: |
|
response.raise_for_status() |
|
data = await response.json() |
|
result = data.get("result", "No result") |
|
inference_time = data.get("inference_time", 0) |
|
return result, f"{inference_time:.2f} seconds" |
|
|
|
async def send_message(context, question, url): |
|
async with aiohttp.ClientSession() as session: |
|
result, time = await fetch(session, url, context, question) |
|
return result, time |
|
|
|
async def send_to_qwen(context, question): |
|
url = "https://qwen.nexaai.com/inference" |
|
return await send_message(context, question, url) |
|
|
|
async def send_to_dolphin(context, question): |
|
url = "https://dolphin.nexaai.com/inference" |
|
return await send_message(context, question, url) |
|
|
|
predefined_questions = [ |
|
"Tell me where the Nexa AI office is?", |
|
"Tell me who founded Nexa AI?", |
|
"Tell me what Nexa AI is famous for?", |
|
"Tell me what is the mission of Nexa AI?" |
|
] |
|
|
|
default_context = """Nexa AI is a Cupertino-based company founded in May 2023 that researches and develops models and tools for on-device AI applications. The company is founded by Alex and Zack. The company is known for its Octopus-series models, which rival large-scale language models in capabilities such as function-calling, multimodality, and action-planning, while remaining efficient and compact for edge device deployment. Nexa AI's mission is to advance on-device AI in collaboration with the global developer community. To this end, the company has created an on-device model hub for users to find, share, and collaborate on open-source AI models optimized for edge devices, as well as an SDK for developers to run and deploy AI models locally.""" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# AI Inference Comparison") |
|
|
|
context = gr.Textbox(value=default_context, label="Background Context", lines=5) |
|
question_dropdown = gr.Dropdown(choices=predefined_questions, label="Select a question") |
|
custom_question = gr.Textbox(placeholder="Or enter your custom question here...", label="Custom Question") |
|
|
|
with gr.Row(): |
|
output1 = gr.Textbox(label="Output from Qwen Model", interactive=False) |
|
output2 = gr.Textbox(label="Output from Dolphin Model", interactive=False) |
|
with gr.Row(): |
|
total_time1 = gr.Textbox(label="Total Time for Qwen Model", interactive=False) |
|
total_time2 = gr.Textbox(label="Total Time for Dolphin Model", interactive=False) |
|
|
|
def update_custom_question(choice): |
|
return choice |
|
|
|
question_dropdown.change(update_custom_question, inputs=question_dropdown, outputs=custom_question) |
|
|
|
with gr.Row(): |
|
send_qwen_button = gr.Button("Send to Qwen") |
|
send_dolphin_button = gr.Button("Send to Dolphin") |
|
|
|
send_qwen_button.click( |
|
send_to_qwen, |
|
inputs=[context, custom_question], |
|
outputs=[output1, total_time1] |
|
) |
|
|
|
send_dolphin_button.click( |
|
send_to_dolphin, |
|
inputs=[context, custom_question], |
|
outputs=[output2, total_time2] |
|
) |
|
|
|
demo.launch() |