Spaces:

Saurabh1207
/

VLM

Sleeping

App Files Files Community

Saurabh Kumar commited on Sep 30, 2024

Commit

ee30b6a

verified ·

1 Parent(s): fba9f00

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -46

app.py CHANGED Viewed

@@ -11,59 +11,56 @@ def init_qwen_model():
     processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
     return model, processor
-def get_qwen_text(uploaded_file, model, processor):
-    if uploaded_file is not None:
-        # Open the uploaded image file
-        image = Image.open(uploaded_file)
-        st.image(image, caption="Uploaded Image", use_column_width=True)
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image",
-                        "image": image,
-                    },
-                    {"type": "text", "text": "Run Optical Character recognition on the image."},
-                ],
-            }
-        ]
-        # Preparation for inference
-        text = processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
-        image_inputs, video_inputs = process_vision_info(messages)
-        inputs = processor(
-            text=[text],
-            images=image_inputs,
-            videos=video_inputs,
-            padding=True,
-            return_tensors="pt",
-        )
-        inputs = inputs.to("cpu")
-        # Inference: Generation of the output
-        generated_ids = model.generate(**inputs, max_new_tokens=128)
-        generated_ids_trimmed = [
-            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-        ]
-        output_text = processor.batch_decode(
-            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-        )
-        return output_text
 # Streamlit app title
 st.title("OCR Image Text Extraction")
 # File uploader for images
 uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
-if uploaded_file:
     st.subheader("Extracted Text:")
-    output = get_qwen_text(uploaded_file, MODEL, PROCESSOR)
-    st.write(output)
     # Keyword search functionality
     st.subheader("Keyword Search")

     processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
     return model, processor
 # Streamlit app title
 st.title("OCR Image Text Extraction")
 # File uploader for images
 uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
+MODEL, PROCESSOR = init_qwen_model()
+if uploaded_file is not None:
+    # Open the uploaded image file
+    image = Image.open(uploaded_file)
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image",
+                    "image": image,
+                },
+                {"type": "text", "text": "Run Optical Character recognition on the image."},
+            ],
+        }
+    ]
+    # Preparation for inference
+    text = PROCESSOR.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    inputs = inputs.to("cpu")
+    # Inference: Generation of the output
+    generated_ids = MODEL.generate(**inputs, max_new_tokens=128)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = PROCESSOR.batch_decode(
+        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+    )
     st.subheader("Extracted Text:")
+    st.write(output_text)
     # Keyword search functionality
     st.subheader("Keyword Search")