Sakalti commited on
Commit
8edf56a
1 Parent(s): ff11ec5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -10
app.py CHANGED
@@ -2,9 +2,6 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import time
4
 
5
- """
6
- `huggingface_hub` の推論 API サポートについての詳細は、ドキュメントを確認してください: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
- """
8
  client = InferenceClient("Qwen/Qwen2.5-3b-Instruct")
9
 
10
  def respond(
@@ -14,6 +11,7 @@ def respond(
14
  max_tokens,
15
  temperature,
16
  top_p,
 
17
  ):
18
  messages = [{"role": "system", "content": system_message}]
19
 
@@ -25,21 +23,23 @@ def respond(
25
 
26
  messages.append({"role": "user", "content": message})
27
 
28
- # ストリーミングを無効にして、単一の応答を取得
29
- start_time = time.time() # 予測時間計測開始
30
  response = client.chat_completion(
31
  messages,
32
  max_tokens=max_tokens,
33
  temperature=temperature,
34
  top_p=top_p,
35
  )
36
- elapsed_time = time.time() - start_time # 予測時間計測終了
37
 
38
- return response.choices[0].message.content, f"予測時間: {elapsed_time:.2f}秒"
 
 
 
 
 
39
 
40
- """
41
- ChatInterfaceのカスタマイズ方法については、gradioのドキュメントを確認してください: https://www.gradio.app/docs/chatinterface
42
- """
43
  demo = gr.ChatInterface(
44
  respond,
45
  additional_inputs=[
@@ -54,6 +54,11 @@ demo = gr.ChatInterface(
54
  label="Top-p (核サンプリング)",
55
  ),
56
  ],
 
 
 
 
 
57
  )
58
 
59
  if __name__ == "__main__":
 
2
  from huggingface_hub import InferenceClient
3
  import time
4
 
 
 
 
5
  client = InferenceClient("Qwen/Qwen2.5-3b-Instruct")
6
 
7
  def respond(
 
11
  max_tokens,
12
  temperature,
13
  top_p,
14
+ progress=gr.Progress() # 進捗表示用
15
  ):
16
  messages = [{"role": "system", "content": system_message}]
17
 
 
23
 
24
  messages.append({"role": "user", "content": message})
25
 
26
+ # AI応答時間計測開始
27
+ start_time = time.time()
28
  response = client.chat_completion(
29
  messages,
30
  max_tokens=max_tokens,
31
  temperature=temperature,
32
  top_p=top_p,
33
  )
34
+ elapsed_time = time.time() - start_time # AI応答時間計測終了
35
 
36
+ # ユーザーに進捗を表示
37
+ progress(0, f"応答中... {elapsed_time:.2f}秒") # 初期応答時間表示
38
+ time.sleep(0.5) # 応答中に少し待機
39
+ total_response_time = elapsed_time + 0.5 # 総応答時間を計算
40
+
41
+ return response.choices[0].message.content, f"予測時間: {elapsed_time:.2f}秒 / 総応答時間: {total_response_time:.2f}秒"
42
 
 
 
 
43
  demo = gr.ChatInterface(
44
  respond,
45
  additional_inputs=[
 
54
  label="Top-p (核サンプリング)",
55
  ),
56
  ],
57
+ css="""
58
+ .gradio-container {
59
+ background-color: #212121;
60
+ }
61
+ """
62
  )
63
 
64
  if __name__ == "__main__":