Spaces:

rednote-hilab
/

dots-demo

Running

App Files Files Community

lazyc commited on Jun 5

Commit

1008a71

verified ·

1 Parent(s): 4234311

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -31

app.py CHANGED Viewed

@@ -1,36 +1,185 @@
 import os
 import uuid
 import gradio as gr
 import modelscope_studio.components.antd as antd
 import modelscope_studio.components.antdx as antdx
 import modelscope_studio.components.base as ms
-# from langfuse import Langfuse
-# from langfuse.openai import OpenAI
 from openai import OpenAI
 # =========== Configuration
-# API KEY and API BASE
-client = OpenAI(
-    base_url=os.getenv("API_BASE"),
-    api_key=os.getenv("API_KEY"),
-)
 # MODEL NAME
 model = os.getenv("MODEL_NAME")
 save_history = True
 # =========== Configuration
-is_modelscope_studio = os.getenv('MODELSCOPE_ENVIRONMENT') == 'studio'
 def get_text(text: str, cn_text: str):
     if is_modelscope_studio:
         return cn_text
     return text
 logo_img = os.path.join(os.path.dirname(__file__), "rednote_hilab.png")
 DEFAULT_PROMPTS = [{
@@ -97,7 +246,7 @@ DEFAULT_THEME = {
 def format_history(history):
     messages = [{
         "role": "system",
-        "content": "You are a helpful and harmless assistant.",
     }]
     for item in history:
         if item["role"] == "user":
@@ -131,39 +280,34 @@ class Gradio_Events:
             state: gr.update(value=state_value),
         }
         try:
-            response = client.chat.completions.create(
-                model=model,  # Model-Id
-                messages=history_messages,
-                stream=True)
             thought_done = False
             for chunk in response:
-                # reasoning_content = chunk.choices[0].delta.reasoning_content
-                content = chunk.choices[0].delta.content
                 history[-1]["loading"] = False
                 if content and not thought_done:
-                    thought_done = True
-                    #history[-1]["meta"]["reason_content"] = history[-1]["content"]
-                    # print("Reason: ",history[-1]["meta"]["reason_content"])
                     history[-1]["content"] = ""
-                    # history[-1]["meta"]["thought_end_message"] = get_text("End of Thought", "已深度思考")
-                history[-1]["content"] += content or ""
                 yield {
                     chatbot: gr.update(items=history),
                     state: gr.update(value=state_value)
                 }
             history[-1]["meta"]["end"] = True
-            print("Answer: ",history[-1]["content"])
-            yield {
-                chatbot: gr.update(items=history),
-                state: gr.update(value=state_value),
-            }
         except Exception as e:
             history[-1]["loading"] = False
             history[-1]["meta"]["end"] = True

 import os
 import uuid
+import json
 import gradio as gr
 import modelscope_studio.components.antd as antd
 import modelscope_studio.components.antdx as antdx
 import modelscope_studio.components.base as ms
 from openai import OpenAI
+import requests
+from typing import Generator, Dict, Any
+import logging
+import time
 # =========== Configuration
 # MODEL NAME
 model = os.getenv("MODEL_NAME")
+# 代理服务器配置
+PROXY_BASE_URL = os.getenv("PROXY_API_BASE", "http://localhost:8000")
+PROXY_TIMEOUT = int(os.getenv("PROXY_TIMEOUT", 30))
+MAX_RETRIES = int(os.getenv("MAX_RETRIES", 3))
+# 保存历史
 save_history = True
 # =========== Configuration
+# 配置日志
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class DeltaObject:
+    """模拟OpenAI Delta对象"""
+    def __init__(self, data: dict):
+        self.content = data.get('content')
+        self.role = data.get('role')
+class ChoiceObject:
+    """模拟OpenAI Choice对象"""
+    def __init__(self, choice_data: dict):
+        delta_data = choice_data.get('delta', {})
+        self.delta = DeltaObject(delta_data)
+        self.finish_reason = choice_data.get('finish_reason')
+        self.index = choice_data.get('index', 0)
+class ChunkObject:
+    """模拟OpenAI Chunk对象"""
+    def __init__(self, chunk_data: dict):
+        choices_data = chunk_data.get('choices', [])
+        self.choices = [ChoiceObject(choice) for choice in choices_data]
+        self.id = chunk_data.get('id', '')
+        self.object = chunk_data.get('object', 'chat.completion.chunk')
+        self.created = chunk_data.get('created', 0)
+        self.model = chunk_data.get('model', '')
+class ProxyClient:
+    """代理客户端，用于与中间服务通信"""
+    def __init__(self, base_url: str, timeout: int = 30):
+        self.base_url = base_url.rstrip('/')
+        self.timeout = timeout
+        self.session = requests.Session()
+    def chat_completions_create(self, model: str, messages: list, stream: bool = True, **kwargs):
+        """创建聊天完成请求"""
+        url = f"{self.base_url}/chat/completions"
+        payload = {
+            "model": model,
+            "messages": messages,
+            "stream": stream,
+            **kwargs
+        }
+        try:
+            response = self.session.post(
+                url,
+                json=payload,
+                stream=stream,
+                timeout=self.timeout,
+                headers={"Content-Type": "application/json"}
+            )
+            response.raise_for_status()
+            if stream:
+                return self._parse_stream_response(response)
+            else:
+                return response.json()
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Request failed: {str(e)}")
+            raise Exception(f"Failed to connect to proxy server: {str(e)}")
+    def _parse_stream_response(self, response) -> Generator[ChunkObject, None, None]:
+        """解析流式响应"""
+        try:
+            # 确保响应编码正确
+            response.encoding = 'utf-8'
+            for line in response.iter_lines(decode_unicode=True):
+                if not line:
+                    continue
+                line = line.strip()
+                if line.startswith('data: '):
+                    data = line[6:]  # 移除 'data: ' 前缀
+                    if data == '[DONE]':
+                        break
+                    try:
+                        chunk_data = json.loads(data)
+                        # 检查是否是错误响应
+                        if 'error' in chunk_data:
+                            raise Exception(f"Stream error: {chunk_data.get('detail', chunk_data['error'])}")
+                        # 创建与OpenAI客户端兼容的响应对象
+                        yield ChunkObject(chunk_data)
+                    except json.JSONDecodeError as e:
+                        logger.warning(f"Failed to parse JSON: {data}, error: {str(e)}")
+                        continue
+        except Exception as e:
+            logger.error(f"Error parsing stream response: {str(e)}")
+            raise
+    def health_check(self) -> dict:
+        """健康检查"""
+        try:
+            url = f"{self.base_url}/health"
+            response = self.session.get(url, timeout=5)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            logger.error(f"Health check failed: {str(e)}")
+            return {"status": "unhealthy", "error": str(e)}
+# 初始化代理客户端
+client = ProxyClient(PROXY_BASE_URL, PROXY_TIMEOUT)
+def chat_with_retry(history_messages, max_retries=MAX_RETRIES):
+    """带重试机制的聊天函数"""
+    last_exception = None
+    for attempt in range(max_retries):
+        try:
+            logger.info(f"Chat attempt {attempt + 1}/{max_retries}")
+            # 检查代理服务健康状态
+            health = client.health_check()
+            if health.get("status") != "healthy":
+                raise Exception(f"Proxy service unhealthy: {health}")
+            response = client.chat_completions_create(
+                model=model,
+                messages=history_messages,
+                stream=True
+            )
+            return response
+        except Exception as e:
+            last_exception = e
+            logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
+            if attempt < max_retries - 1:
+                # 指数退避
+                wait_time = 2 ** attempt
+                logger.info(f"Retrying in {wait_time} seconds...")
+                time.sleep(wait_time)
+            else:
+                logger.error(f"All {max_retries} attempts failed")
+    raise last_exception
+is_modelscope_studio = os.getenv('MODELSCOPE_ENVIRONMENT') == 'studio'
 def get_text(text: str, cn_text: str):
     if is_modelscope_studio:
         return cn_text
     return text
 logo_img = os.path.join(os.path.dirname(__file__), "rednote_hilab.png")
 DEFAULT_PROMPTS = [{
 def format_history(history):
     messages = [{
         "role": "system",
+        "content": "You are a helpful assistant",
     }]
     for item in history:
         if item["role"] == "user":
             state: gr.update(value=state_value),
         }
         try:
+            response = chat_with_retry(history_messages)
             thought_done = False
             for chunk in response:
+                # 安全地访问chunk属性
+                if chunk.choices and len(chunk.choices) > 0:
+                    content = chunk.choices[0].delta.content
+                else:
+                    content = None
+                    raise ValueError('Content is None')
                 history[-1]["loading"] = False
                 if content and not thought_done:
+                    thought_done = True
                     history[-1]["content"] = ""
+                if content:
+                    history[-1]["content"] += content
                 yield {
                     chatbot: gr.update(items=history),
                     state: gr.update(value=state_value)
                 }
             history[-1]["meta"]["end"] = True
+            print("Answer: ", history[-1]["content"])
         except Exception as e:
             history[-1]["loading"] = False
             history[-1]["meta"]["end"] = True