Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,720 Bytes
5956319 74995d7 5956319 71b7f64 3bc8972 74b7d0e f4995ab 9c1d271 76789b2 5956319 76789b2 5956319 9c1d271 5956319 9c1d271 434dec3 3bc8972 89bcd26 3d71752 3bc8972 89bcd26 3bc8972 9c1d271 3bc8972 9c1d271 f4c5213 89bcd26 5c1dadc 89bcd26 5c1dadc 89bcd26 5c1dadc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import spaces
import gradio as gr
import torch
import subprocess
import numpy as np
import requests
# Function to start the ochat server
@spaces.GPU
def start_ochat_server():
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
command = [
"python", "-m", "ochat.serving.openai_api_server",
"--model", "openchat/openchat_3.5"
]
# Start the server in a separate process
try:
subprocess.Popen(command)
return "ochat server started successfully"
except Exception as e:
return f"Failed to start ochat server: {e}"
start_ochat_server()
# Function to send a message to the ochat server and get a response
def chat_with_ochat(message):
url = "http://0.0.0.0:18888/v1/chat/completions"
headers = {"Content-Type": "application/json"}
data = {
"model": "openchat_3.5",
"messages": [{"role": "user", "content": message}]
}
try:
response = requests.post(url, json=data, headers=headers)
if response.status_code == 200:
return response.json()['choices'][0]['message']['content']
else:
return f"Error: Server responded with status code {response.status_code}"
except requests.RequestException as e:
return f"Error: {e}"
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.Button("Clear")
history = gr.State([]) # Initialize an empty state for history
msg.submit(chat_with_ochat, inputs=[msg, history], outputs=[chatbot, history])
clear.click(lambda: ("", []), inputs=None, outputs=[chatbot, history])
demo.launch()
|