Spaces:

Saurabh1207
/

VLM

Sleeping

Saurabh Kumar commited on Sep 30, 2024

Commit

3ddf8ca

verified ·

1 Parent(s): bad34f0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ MODEL, PROCESSOR = init_qwen_model()
 # Streamlit app title
 st.title("OCR Image Text Extraction")
 # File uploader for images
 uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
@@ -33,7 +33,7 @@ if uploaded_file is not None:
                         "type": "image",
                         "image": image,
                     },
-                    {"type": "text", "text": "Run Optical Character recognition on the image."},
                 ],
             }
         ]
@@ -53,7 +53,7 @@ if uploaded_file is not None:
         inputs = inputs.to("cpu")
         # Inference: Generation of the output
-        generated_ids = MODEL.generate(**inputs, max_new_tokens=512)
         generated_ids_trimmed = [
             out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
         ]

 # Streamlit app title
 st.title("OCR Image Text Extraction")
+st.subheader("I used Qwen2-VL-7B-Instruct model to get better accuracy but as it is running on CPU it takes 25-30 minutes to run it. So please have patience.")
 # File uploader for images
 uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
                         "type": "image",
                         "image": image,
                     },
+                    {"type": "text", "text": "Run Optical Character recognition on the image for Hindi and English."},
                 ],
             }
         ]
         inputs = inputs.to("cpu")
         # Inference: Generation of the output
+        generated_ids = MODEL.generate(**inputs, max_new_tokens=256)
         generated_ids_trimmed = [
             out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
         ]