sprakhil commited on
Commit
639e3fa
·
1 Parent(s): 04b6e65
Files changed (2) hide show
  1. app.py +60 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import torch
4
+ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, pipeline
5
+ from colpali_engine.models import ColPali, ColPaliProcessor
6
+ import os
7
+
8
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+
10
+ token = os.getenv('HUGGINGFACE_TOKEN')
11
+ model = pipeline("image-to-text", model="google/paligemma-3b-mix-448", use_auth_token=token)
12
+
13
+ try:
14
+ model_colpali = ColPali.from_pretrained("vidore/colpali-v1.2", torch_dtype=torch.bfloat16).to(device)
15
+ processor_colpali = ColPaliProcessor.from_pretrained("google/paligemma-3b-mix-448")
16
+ except Exception as e:
17
+ st.error(f"Error loading ColPali model or processor: {e}")
18
+ st.stop()
19
+
20
+ try:
21
+ model_qwen = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct").to(device)
22
+ processor_qwen = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
23
+ except Exception as e:
24
+ st.error(f"Error loading Qwen model or processor: {e}")
25
+ st.stop()
26
+
27
+ st.title("OCR and Document Search Web Application")
28
+ st.write("Upload an image containing text in both Hindi and English for OCR processing and keyword search.")
29
+
30
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
31
+
32
+ if uploaded_file is not None:
33
+ try:
34
+ image = Image.open(uploaded_file)
35
+ st.image(image, caption='Uploaded Image.', use_column_width=True)
36
+ st.write("")
37
+
38
+ conversation = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "Describe this image."}]}]
39
+ text_prompt = processor_qwen.apply_chat_template(conversation, add_generation_prompt=True)
40
+ inputs = processor_qwen(text=[text_prompt], images=[image], padding=True, return_tensors="pt").to(device)
41
+
42
+ with torch.no_grad():
43
+ output_ids = model_qwen.generate(**inputs, max_new_tokens=128)
44
+ generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, output_ids)]
45
+ output_text = processor_qwen.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
46
+
47
+ st.write("Extracted Text:")
48
+ st.write(output_text)
49
+
50
+ keyword = st.text_input("Enter a keyword to search in the extracted text:")
51
+ if keyword:
52
+ if keyword.lower() in output_text[0].lower():
53
+ st.write(f"Keyword '{keyword}' found in the text.")
54
+ else:
55
+ st.write(f"Keyword '{keyword}' not found in the text.")
56
+ except Exception as e:
57
+ st.error(f"An error occurred: {e}")
58
+
59
+ if __name__ == "__main__":
60
+ st.write("Deploying the web application...")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ Pillow
3
+ torch
4
+ transformers
5
+ colpali-engine