File size: 3,452 Bytes
7cfa5bf
8072750
 
 
 
 
 
 
 
 
 
 
dbdc900
8072750
 
 
dbdc900
7cfa5bf
 
 
 
 
 
 
b9c7bd3
7cfa5bf
 
 
 
 
 
 
8072750
 
 
 
7cfa5bf
 
8072750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cfa5bf
8072750
 
7cfa5bf
8072750
 
0c41d6c
8072750
 
0c41d6c
8072750
 
0c41d6c
8072750
dbdc900
 
0c41d6c
 
 
8072750
0c41d6c
 
 
 
8072750
 
 
 
7cfa5bf
 
 
 
 
 
 
 
8072750
 
 
7cfa5bf
 
 
 
 
 
 
 
dbdc900
7cfa5bf
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import gradio as gr
import requests
import json
import os
from dotenv import load_dotenv

load_dotenv()

API_URL = os.getenv("API_URL")
API_TOKEN = os.getenv("API_TOKEN")

if not API_URL or not API_TOKEN:
    raise ValueError("invalid API_URL || API_TOKEN")

print(f"[INFO] starting:")
print(f"[INFO] API_URL: {API_URL[:6]}...{API_URL[-12:]}")
print(f"[INFO] API_TOKEN: {API_TOKEN[:10]}...{API_TOKEN[-10:]}")  

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

def respond(
    message,
    history: list[dict],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    # 添加历史消息
    messages.extend(history)
    
    # 添加当前用户消息
    messages.append({"role": "user", "content": message})

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {API_TOKEN}"
    }

    data = {
        "model": "/data/DMind-1",
        "stream": False,
        "messages": messages,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": 20,
        "min_p": 0.1,
        "max_tokens": 32768
    }

    print(f"[INFO] process user msg...")
    print(f"[INFO] userMsg: {message}")

    try:
        with requests.post(API_URL, headers=headers, json=data) as r:
            print(f"[INFO] response status: {r.status_code}")
            if r.status_code == 200:
                json_response = r.json()
                print(f"[INFO] response json: {json_response}")
                if 'choices' in json_response and len(json_response['choices']) > 0:
                    content = json_response['choices'][0].get('message', {}).get('content', '')
                    print(f"[INFO] response content: {content}")
                    if content:
                        if '<think>' in content and '</think>' in content:
                            content = content.split('</think>')[-1].strip()
                        if '**Final Answer**' in content:
                            content = content.split('**Final Answer**')[-1].strip()
                        print(f"[INFO] final response: {content}")
                        return content
                else:
                    print(f"[ERROR] No choices in response: {json_response}")
            else:
                print(f"[ERROR] Bad status code: {r.status_code}, response: {r.text}")
            return "Service temporarily unavailable"
    except Exception as e:
        print(f"[ERROR] Request error: {e}")
        return "Service error occurred"


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are DMind AI Assistant, built by DMind.AI. Never present as Qwen, ChatGPT, Tongyi, OpenAI, etc.", label="System message"),
        gr.Slider(minimum=1, maximum=32768, value=16384, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.6, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    type="messages"  
)


if __name__ == "__main__":
    demo.launch()