Spaces:

prithivMLmods
/

Doc-VLMs-OCR

Running on Zero

prithivMLmods commited on Apr 2

Commit

0800c0d

verified ·

1 Parent(s): 01d03b6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -143,8 +143,15 @@ def generate(input_dict: dict, chat_history: list[dict],
                 {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
                 {"role": "user", "content": [{"type": "text", "text": prompt}]}
             ]
         inputs = processor.apply_chat_template(
-            messages, tokenize=True, add_generation_prompt=True, return_dict=True, return_tensors="pt"
         ).to("cuda")
         streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
         generation_kwargs = {
@@ -184,7 +191,15 @@ def generate(input_dict: dict, chat_history: list[dict],
             ]
         }]
         prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        inputs = processor(text=[prompt_full], images=images, return_tensors="pt", padding=True).to("cuda")
         streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
         generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
         thread = Thread(target=model_m.generate, kwargs=generation_kwargs)

                 {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
                 {"role": "user", "content": [{"type": "text", "text": prompt}]}
             ]
+        # Explicitly enable truncation to avoid token/feature mismatch.
         inputs = processor.apply_chat_template(
+            messages,
+            tokenize=True,
+            add_generation_prompt=True,
+            return_dict=True,
+            return_tensors="pt",
+            truncation=True,
+            max_length=MAX_INPUT_TOKEN_LENGTH
         ).to("cuda")
         streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
         generation_kwargs = {
             ]
         }]
         prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        # Enable truncation explicitly here as well.
+        inputs = processor(
+            text=[prompt_full],
+            images=images,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=MAX_INPUT_TOKEN_LENGTH
+        ).to("cuda")
         streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
         generation_kwargs = {**inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
         thread = Thread(target=model_m.generate, kwargs=generation_kwargs)