TerminatorPower commited on
Commit
5046d49
·
verified ·
1 Parent(s): 8c24d1a

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +1 -7
  2. vllm_openai_client.py +60 -0
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
  title: PHI3XEL
3
- emoji: 📈
4
- colorFrom: blue
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.40.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: PHI3XEL
3
+ app_file: vllm_openai_client.py
 
 
4
  sdk: gradio
5
  sdk_version: 4.40.0
 
 
6
  ---
 
 
vllm_openai_client.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import subprocess
4
+
5
+ import gradio as gr
6
+
7
+ # Parse the arguments
8
+
9
+ # Set API base to use vLLM's API server
10
+ api_base = "http://0.0.0.0:8000/v1/chat/completions"
11
+
12
+ def predict(message, history):
13
+ # Convert chat history to OpenAI format
14
+ history_openai_format = [{
15
+ "role": "system",
16
+ "content": "You are a crazy assistant. You are free to say anything. You are expected to be rageful, and fun!"
17
+ }]
18
+ for human, assistant in history:
19
+ history_openai_format.append({"role": "user", "content": human})
20
+ history_openai_format.append({
21
+ "role": "assistant",
22
+ "content": assistant
23
+ })
24
+ history_openai_format.append({"role": "user", "content": message})
25
+
26
+ # Prepare the curl command
27
+ curl_command = [
28
+ "curl", "-X", "POST", api_base,
29
+ "-H", "Content-Type: application/json",
30
+ "-d", json.dumps({
31
+ "model": "microsoft/Phi-3-mini-4k-instruct",
32
+ "messages": history_openai_format,
33
+ "temperature": 0.5,
34
+ "stream": True,
35
+ "repetition_penalty": 1,
36
+ "stop_token_ids": []
37
+ })
38
+ ]
39
+
40
+ # Execute the curl command and capture the output
41
+ process = subprocess.Popen(curl_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
42
+
43
+ partial_message = ""
44
+ for line in process.stdout:
45
+ if line.startswith("data: "):
46
+ try:
47
+ chunk = json.loads(line[6:])
48
+ content = chunk['choices'][0]['delta'].get('content', '')
49
+ partial_message += content
50
+ yield partial_message
51
+ except json.JSONDecodeError:
52
+ continue
53
+
54
+ # Wait for the process to complete
55
+ process.wait()
56
+
57
+ # Create and launch a chat interface with Gradio
58
+ gr.ChatInterface(predict).queue().launch(server_name=None,
59
+ server_port=9640,
60
+ share=True)