prithivMLmods commited on
Commit
50fda8e
·
verified ·
1 Parent(s): b547abf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -48
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import spaces
2
  import json
3
  import subprocess
@@ -9,14 +11,35 @@ from llama_cpp_agent.chat_history.messages import Roles
9
  import gradio as gr
10
  from huggingface_hub import hf_hub_download
11
 
12
- # モデルのダウンロード
13
- hf_hub_download(
14
- repo_id="mradermacher/Viper-Coder-32B-Elite13-GGUF",
15
- filename="Viper-Coder-32B-Elite13.Q2_K.gguf",
16
- local_dir="./models"
17
- )
18
 
19
- # 推論関数
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  @spaces.GPU(duration=120)
21
  def respond(
22
  message,
@@ -32,7 +55,7 @@ def respond(
32
  chat_template = MessagesFormatterType.GEMMA_2
33
 
34
  llm = Llama(
35
- model_path=f"models/{model}",
36
  flash_attn=True,
37
  n_gpu_layers=81,
38
  n_batch=1024,
@@ -58,14 +81,8 @@ def respond(
58
  messages = BasicChatHistory()
59
 
60
  for msn in history:
61
- user = {
62
- 'role': Roles.user,
63
- 'content': msn[0]
64
- }
65
- assistant = {
66
- 'role': Roles.assistant,
67
- 'content': msn[1]
68
- }
69
  messages.add_message(user)
70
  messages.add_message(assistant)
71
 
@@ -82,36 +99,21 @@ def respond(
82
  outputs += output
83
  yield outputs
84
 
85
- # Gradioのインターフェースを作成
86
  def create_interface(model_name, description):
87
  return gr.ChatInterface(
88
  respond,
89
  additional_inputs=[
90
  gr.Textbox(value=model_name, label="Model", interactive=False),
91
- gr.Textbox(value="You are a helpful assistant. You should think step-by-step. You should answer all question by prioritizing the principle of openness", label="System message"),
 
 
 
92
  gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
93
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
94
- gr.Slider(
95
- minimum=0.1,
96
- maximum=1.0,
97
- value=0.95,
98
- step=0.05,
99
- label="Top-p",
100
- ),
101
- gr.Slider(
102
- minimum=0,
103
- maximum=100,
104
- value=40,
105
- step=1,
106
- label="Top-k",
107
- ),
108
- gr.Slider(
109
- minimum=0.0,
110
- maximum=2.0,
111
- value=1.1,
112
- step=0.1,
113
- label="Repetition penalty",
114
- ),
115
  ],
116
  retry_btn="Retry",
117
  undo_btn="Undo",
@@ -119,21 +121,18 @@ def create_interface(model_name, description):
119
  submit_btn="Send",
120
  title=f"{model_name}",
121
  description=description,
122
- chatbot=gr.Chatbot(
123
- scale=1,
124
- likeable=False,
125
- show_copy_button=True
126
- )
127
  )
128
 
129
- description = """<p align="center"Viper-Coder-32B-Elite13-GGUF/p>"""
130
- interface = create_interface('mradermacher/Viper-Coder-32B-Elite13-GGUF', description)
 
131
 
132
- # Gradio Blocksで単一のインターフェースを表示
133
  demo = gr.Blocks()
134
 
135
  with demo:
136
  interface.render()
137
 
138
  if __name__ == "__main__":
139
- demo.launch()
 
1
+ import os
2
+ import time
3
  import spaces
4
  import json
5
  import subprocess
 
11
  import gradio as gr
12
  from huggingface_hub import hf_hub_download
13
 
14
+ # Define model details
15
+ MODEL_REPO = "mradermacher/Viper-Coder-32B-Elite13-GGUF"
16
+ MODEL_FILENAME = "Viper-Coder-32B-Elite13.Q2_K.gguf"
17
+ MODEL_DIR = "./models"
18
+ MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILENAME)
 
19
 
20
+ # Ensure the model directory exists
21
+ os.makedirs(MODEL_DIR, exist_ok=True)
22
+
23
+ # Download the model if not already present
24
+ if not os.path.exists(MODEL_PATH):
25
+ print("Downloading the model... This may take some time.")
26
+ try:
27
+ hf_hub_download(
28
+ repo_id=MODEL_REPO,
29
+ filename=MODEL_FILENAME,
30
+ local_dir=MODEL_DIR
31
+ )
32
+ print("Model downloaded successfully!")
33
+ except Exception as e:
34
+ print(f"Error downloading model: {e}")
35
+ exit(1)
36
+
37
+ # Ensure model is fully downloaded before using
38
+ while not os.path.exists(MODEL_PATH):
39
+ print("Waiting for model to be available...")
40
+ time.sleep(5)
41
+
42
+ # Function to handle AI responses
43
  @spaces.GPU(duration=120)
44
  def respond(
45
  message,
 
55
  chat_template = MessagesFormatterType.GEMMA_2
56
 
57
  llm = Llama(
58
+ model_path=MODEL_PATH,
59
  flash_attn=True,
60
  n_gpu_layers=81,
61
  n_batch=1024,
 
81
  messages = BasicChatHistory()
82
 
83
  for msn in history:
84
+ user = {'role': Roles.user, 'content': msn[0]}
85
+ assistant = {'role': Roles.assistant, 'content': msn[1]}
 
 
 
 
 
 
86
  messages.add_message(user)
87
  messages.add_message(assistant)
88
 
 
99
  outputs += output
100
  yield outputs
101
 
102
+ # Function to create Gradio interface
103
  def create_interface(model_name, description):
104
  return gr.ChatInterface(
105
  respond,
106
  additional_inputs=[
107
  gr.Textbox(value=model_name, label="Model", interactive=False),
108
+ gr.Textbox(
109
+ value="You are a helpful assistant. You should think step-by-step. You should answer all questions by prioritizing the principle of openness",
110
+ label="System message"
111
+ ),
112
  gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
113
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
114
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
115
+ gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
116
+ gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  ],
118
  retry_btn="Retry",
119
  undo_btn="Undo",
 
121
  submit_btn="Send",
122
  title=f"{model_name}",
123
  description=description,
124
+ chatbot=gr.Chatbot(scale=1, likeable=False, show_copy_button=True)
 
 
 
 
125
  )
126
 
127
+ # Set interface description
128
+ description = """<p align="center">Viper-Coder-32B-Elite13-GGUF</p>"""
129
+ interface = create_interface(MODEL_REPO, description)
130
 
131
+ # Create Gradio Blocks app
132
  demo = gr.Blocks()
133
 
134
  with demo:
135
  interface.render()
136
 
137
  if __name__ == "__main__":
138
+ demo.launch(share=True)