prithivMLmods commited on
Commit
0800c0d
·
verified ·
1 Parent(s): 01d03b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -143,8 +143,15 @@ def generate(input_dict: dict, chat_history: list[dict],
143
  {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
144
  {"role": "user", "content": [{"type": "text", "text": prompt}]}
145
  ]
 
146
  inputs = processor.apply_chat_template(
147
- messages, tokenize=True, add_generation_prompt=True, return_dict=True, return_tensors="pt"
 
 
 
 
 
 
148
  ).to("cuda")
149
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
150
  generation_kwargs = {
@@ -184,7 +191,15 @@ def generate(input_dict: dict, chat_history: list[dict],
184
  ]
185
  }]
186
  prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
187
- inputs = processor(text=[prompt_full], images=images, return_tensors="pt", padding=True).to("cuda")
 
 
 
 
 
 
 
 
188
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
189
  generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
190
  thread = Thread(target=model_m.generate, kwargs=generation_kwargs)
 
143
  {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
144
  {"role": "user", "content": [{"type": "text", "text": prompt}]}
145
  ]
146
+ # Explicitly enable truncation to avoid token/feature mismatch.
147
  inputs = processor.apply_chat_template(
148
+ messages,
149
+ tokenize=True,
150
+ add_generation_prompt=True,
151
+ return_dict=True,
152
+ return_tensors="pt",
153
+ truncation=True,
154
+ max_length=MAX_INPUT_TOKEN_LENGTH
155
  ).to("cuda")
156
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
157
  generation_kwargs = {
 
191
  ]
192
  }]
193
  prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
194
+ # Enable truncation explicitly here as well.
195
+ inputs = processor(
196
+ text=[prompt_full],
197
+ images=images,
198
+ return_tensors="pt",
199
+ padding=True,
200
+ truncation=True,
201
+ max_length=MAX_INPUT_TOKEN_LENGTH
202
+ ).to("cuda")
203
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
204
  generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
205
  thread = Thread(target=model_m.generate, kwargs=generation_kwargs)