Spaces:

prithivMLmods
/

Multimodal-OCR

Running on Zero

App Files Files Community

prithivMLmods commited on May 5

Commit

e576635

verified ·

1 Parent(s): 8c1f8ea

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -15

app.py CHANGED Viewed

@@ -119,7 +119,6 @@ def model_inference(input_dict, history, use_rolmocr=False):
     model = rolmocr_model if use_rolmocr else qwen_model
     model_name = "RolmOCR" if use_rolmocr else "Qwen2VL OCR"
-    # Prepare prompt and inputs
     prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     all_images = [item["image"] for item in content if item["type"] == "image"]
     inputs = processor(
@@ -129,7 +128,6 @@ def model_inference(input_dict, history, use_rolmocr=False):
         padding=True,
     ).to("cuda")
-    # Set up streaming
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
@@ -138,23 +136,17 @@ def model_inference(input_dict, history, use_rolmocr=False):
     buffer = ""
     yield progress_bar_html(f"Processing with {model_name}")
-    # Stream tokens
     for new_text in streamer:
-        buffer += new_text
-        buffer = buffer.replace("<|im_end|>", "")
         time.sleep(0.01)
         yield buffer
-    # Once streaming is done, save to response.txt and yield final result
-    results = buffer.strip()
-    try:
-        with open("response.txt", "w", encoding="utf-8") as f:
-            f.write(results)
-    except Exception as e:
-        yield f"Error writing to response.txt: {e}"
-        return
-    yield results
     return
 # Gradio Interface
@@ -180,4 +172,5 @@ demo = gr.ChatInterface(
     additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
 )
-demo.launch(debug=True)

     model = rolmocr_model if use_rolmocr else qwen_model
     model_name = "RolmOCR" if use_rolmocr else "Qwen2VL OCR"
     prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     all_images = [item["image"] for item in content if item["type"] == "image"]
     inputs = processor(
         padding=True,
     ).to("cuda")
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     buffer = ""
     yield progress_bar_html(f"Processing with {model_name}")
+    # Stream generation
     for new_text in streamer:
+        buffer += new_text.replace("<|im_end|>", "")
         time.sleep(0.01)
         yield buffer
+    # Once complete, save to response.txt and yield final confirmation
+    with open("response.txt", "w", encoding="utf-8") as f:
+        f.write(buffer)
+    yield f"\n✅ Response saved to `response.txt`:\n\n{buffer}"
     return
 # Gradio Interface
     additional_inputs=[gr.Checkbox(label="Use RolmOCR", value=False, info="Check to use RolmOCR, uncheck to use Qwen2VL OCR")],
 )
+if __name__ == "__main__":
+    demo.launch(debug=True)