rubenroy commited on
Commit
a150c28
·
verified ·
1 Parent(s): b026bcc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -6
app.py CHANGED
@@ -12,10 +12,10 @@ model = AutoModelForCausalLM.from_pretrained(
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
 
14
  @spaces.GPU
15
- def generate(prompt, history):
16
  messages = [
17
  {"role": "system", "content": "You are Zurich, a 7 billion parameter Large Language model built on the Qwen 2.5 7B model developed by Alibaba Cloud, and fine-tuned by Ruben Roy. You have been fine-tuned with the GammaCorpus v2 dataset, a dataset filled with structured and filtered multi-turn conversations and was also created by Ruben Roy. You are a helpful assistant."},
18
- {"role": "user", "content": prompt}
19
  ]
20
  text = tokenizer.apply_chat_template(
21
  messages,
@@ -25,7 +25,12 @@ def generate(prompt, history):
25
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
26
  generated_ids = model.generate(
27
  **model_inputs,
28
- max_new_tokens=512
 
 
 
 
 
29
  )
30
  generated_ids = [
31
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
@@ -78,6 +83,28 @@ TITLE_HTML = """
78
  font-size: 0.9rem;
79
  color: #94a3b8;
80
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  </style>
82
 
83
  <div style="background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%); padding: 1.5rem; border-radius: 1.5rem; text-align: center; margin: 1rem auto; max-width: 1200px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);">
@@ -94,7 +121,7 @@ TITLE_HTML = """
94
 
95
  <div style="display: flex; gap: 1.5rem; justify-content: center;">
96
  <div class="model-section">
97
- <h2 style="font-size: 1.25rem; color: #e2e8f0; margin-bottom: 1rem; margin-top: 1px; font-weight: 600; display: flex; align-items: center; justify-content: center; gap: 0.7rem;">
98
  <i class="fas fa-brain"></i>
99
  7B Models
100
  </h2>
@@ -108,7 +135,7 @@ TITLE_HTML = """
108
  </div>
109
  </div>
110
  <div class="model-section">
111
- <h2 style="font-size: 1.25rem; color: #e2e8f0; margin-bottom: 1rem; margin-top: 1px; font-weight: 600; display: flex; align-items: center; justify-content: center; gap: 0.7rem;">
112
  <i class="fas fa-rocket"></i>
113
  14B Models
114
  </h2>
@@ -125,9 +152,76 @@ TITLE_HTML = """
125
  </div>
126
  """
127
 
 
 
 
 
 
 
 
128
  with gr.Blocks() as demo:
129
  gr.HTML(TITLE_HTML)
130
- chat_interface = gr.ChatInterface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  fn=generate,
 
 
 
 
 
 
 
 
132
  )
 
133
  demo.launch(share=True)
 
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
 
14
  @spaces.GPU
15
+ def generate(message, chat_history, temperature=0.7, top_p=0.9, top_k=50, max_new_tokens=512, repetition_penalty=1.1):
16
  messages = [
17
  {"role": "system", "content": "You are Zurich, a 7 billion parameter Large Language model built on the Qwen 2.5 7B model developed by Alibaba Cloud, and fine-tuned by Ruben Roy. You have been fine-tuned with the GammaCorpus v2 dataset, a dataset filled with structured and filtered multi-turn conversations and was also created by Ruben Roy. You are a helpful assistant."},
18
+ {"role": "user", "content": message}
19
  ]
20
  text = tokenizer.apply_chat_template(
21
  messages,
 
25
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
26
  generated_ids = model.generate(
27
  **model_inputs,
28
+ temperature=float(temperature),
29
+ top_p=float(top_p),
30
+ top_k=int(top_k),
31
+ max_new_tokens=int(max_new_tokens),
32
+ repetition_penalty=float(repetition_penalty),
33
+ do_sample=True if float(temperature) > 0 else False
34
  )
35
  generated_ids = [
36
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
 
83
  font-size: 0.9rem;
84
  color: #94a3b8;
85
  }
86
+ .settings-section {
87
+ background: rgba(255, 255, 255, 0.05);
88
+ padding: 1.5rem;
89
+ border-radius: 1rem;
90
+ margin: 1.5rem auto;
91
+ border: 1px solid rgba(255, 255, 255, 0.1);
92
+ max-width: 800px;
93
+ }
94
+ .settings-title {
95
+ color: #e2e8f0;
96
+ font-size: 1.25rem;
97
+ font-weight: 600;
98
+ margin-bottom: 1rem;
99
+ display: flex;
100
+ align-items: center;
101
+ gap: 0.7rem;
102
+ }
103
+ .parameter-info {
104
+ color: #94a3b8;
105
+ font-size: 0.8rem;
106
+ margin-top: 0.25rem;
107
+ }
108
  </style>
109
 
110
  <div style="background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%); padding: 1.5rem; border-radius: 1.5rem; text-align: center; margin: 1rem auto; max-width: 1200px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);">
 
121
 
122
  <div style="display: flex; gap: 1.5rem; justify-content: center;">
123
  <div class="model-section">
124
+ <h2 style="font-size: 1.25rem; color: #e2e8f0; margin-bottom: 1.4rem; margin-top: 1px; font-weight: 600; display: flex; align-items: center; justify-content: center; gap: 0.7rem;">
125
  <i class="fas fa-brain"></i>
126
  7B Models
127
  </h2>
 
135
  </div>
136
  </div>
137
  <div class="model-section">
138
+ <h2 style="font-size: 1.25rem; color: #e2e8f0; margin-bottom: 1.4rem; margin-top: 1px; font-weight: 600; display: flex; align-items: center; justify-content: center; gap: 0.7rem;">
139
  <i class="fas fa-rocket"></i>
140
  14B Models
141
  </h2>
 
152
  </div>
153
  """
154
 
155
+ examples = [
156
+ ["Explain quantum computing in simple terms"],
157
+ ["Write a short story about a time traveler"],
158
+ ["Explain the process of photosynthesis"],
159
+ ["Tell me an intersting fact about Palm trees"]
160
+ ]
161
+
162
  with gr.Blocks() as demo:
163
  gr.HTML(TITLE_HTML)
164
+
165
+ with gr.Accordion("Generation Settings", open=False):
166
+ with gr.Row():
167
+ with gr.Column():
168
+ temperature = gr.Slider(
169
+ minimum=0.0,
170
+ maximum=2.0,
171
+ value=0.7,
172
+ step=0.1,
173
+ label="Temperature",
174
+ info="Higher values make the output more random, lower values make it more deterministic",
175
+ interactive=True
176
+ )
177
+ top_p = gr.Slider(
178
+ minimum=0.0,
179
+ maximum=1.0,
180
+ value=0.9,
181
+ step=0.05,
182
+ label="Top P",
183
+ info="Controls the cumulative probability threshold for nucleus sampling",
184
+ interactive=True
185
+ )
186
+ top_k = gr.Slider(
187
+ minimum=1,
188
+ maximum=100,
189
+ value=50,
190
+ step=1,
191
+ label="Top K",
192
+ info="Limits the number of tokens to consider for each generation step",
193
+ interactive=True
194
+ )
195
+ with gr.Column():
196
+ max_new_tokens = gr.Slider(
197
+ minimum=1,
198
+ maximum=2048,
199
+ value=512,
200
+ step=1,
201
+ label="Max New Tokens",
202
+ info="Maximum number of tokens to generate in the response",
203
+ interactive=True
204
+ )
205
+ repetition_penalty = gr.Slider(
206
+ minimum=1.0,
207
+ maximum=2.0,
208
+ value=1.1,
209
+ step=0.1,
210
+ label="Repetition Penalty",
211
+ info="Higher values stop the model from repeating the same info",
212
+ interactive=True
213
+ )
214
+
215
+ chatbot = gr.ChatInterface(
216
  fn=generate,
217
+ additional_inputs=[
218
+ temperature,
219
+ top_p,
220
+ top_k,
221
+ max_new_tokens,
222
+ repetition_penalty
223
+ ],
224
+ examples=examples
225
  )
226
+
227
  demo.launch(share=True)