Spaces:

sakaltcommunity
/

Qwen2.5

Runtime error

Sakalti commited on Oct 11, 2024

Commit

8edf56a

verified ·

1 Parent(s): ff11ec5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,9 +2,6 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 import time
-"""
-`huggingface_hub` の推論 API サポートについての詳細は、ドキュメントを確認してください: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
 client = InferenceClient("Qwen/Qwen2.5-3b-Instruct")
 def respond(
@@ -14,6 +11,7 @@ def respond(
     max_tokens,
     temperature,
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
@@ -25,21 +23,23 @@ def respond(
     messages.append({"role": "user", "content": message})
-    # ストリーミングを無効にして、単一の応答を取得
-    start_time = time.time()  # 予測時間計測開始
     response = client.chat_completion(
         messages,
         max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
     )
-    elapsed_time = time.time() - start_time  # 予測時間計測終了
-    return response.choices[0].message.content, f"予測時間: {elapsed_time:.2f}秒"
-"""
-ChatInterfaceのカスタマイズ方法については、gradioのドキュメントを確認してください: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
@@ -54,6 +54,11 @@ demo = gr.ChatInterface(
             label="Top-p (核サンプリング)",
         ),
     ],
 )
 if __name__ == "__main__":

 from huggingface_hub import InferenceClient
 import time
 client = InferenceClient("Qwen/Qwen2.5-3b-Instruct")
 def respond(
     max_tokens,
     temperature,
     top_p,
+    progress=gr.Progress()  # 進捗表示用
 ):
     messages = [{"role": "system", "content": system_message}]
     messages.append({"role": "user", "content": message})
+    # AI応答時間計測開始
+    start_time = time.time()
     response = client.chat_completion(
         messages,
         max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
     )
+    elapsed_time = time.time() - start_time  # AI応答時間計測終了
+    # ユーザーに進捗を表示
+    progress(0, f"応答中... {elapsed_time:.2f}秒")  # 初期応答時間表示
+    time.sleep(0.5)  # 応答中に少し待機
+    total_response_time = elapsed_time + 0.5  # 総応答時間を計算
+    return response.choices[0].message.content, f"予測時間: {elapsed_time:.2f}秒 / 総応答時間: {total_response_time:.2f}秒"
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
             label="Top-p (核サンプリング)",
         ),
     ],
+    css="""
+    .gradio-container {
+        background-color: #212121;
+    }
+    """
 )
 if __name__ == "__main__":