Saurabh Kumar commited on
Commit
e3dbfa9
·
verified ·
1 Parent(s): 6d15eb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -26
app.py CHANGED
@@ -4,26 +4,16 @@ import streamlit as st
4
  import torch
5
  from PIL import Image
6
 
7
- # default: Load the model on the available device(s)
8
  @st.cache_resource
9
  def init_qwen_model():
10
  _model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto")
11
  _processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
12
  return _model, _processor
13
- # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
14
- # model = Qwen2VLForConditionalGeneration.from_pretrained(
15
- # "Qwen/Qwen2-VL-7B-Instruct",
16
- # torch_dtype=torch.bfloat16,
17
- # attn_implementation="flash_attention_2",
18
- # device_map="auto",
19
- # )
20
- # The default range for the number of visual tokens per image in the model is 4-16384. You can set min_pixels and max_pixels according to your needs, such as a token count range of 256-1280, to balance speed and memory usage.
21
- # min_pixels = 256*28*28
22
- # max_pixels = 1280*28*28
23
- # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
24
 
 
25
  @st.cache_data
26
- def get_qwen_text(uploaded_file):
27
  if uploaded_file is not None:
28
  # Open the uploaded image file
29
  image = Image.open(uploaded_file)
@@ -69,23 +59,27 @@ def get_qwen_text(uploaded_file):
69
  # Streamlit app title
70
  st.title("OCR Image Text Extraction")
71
 
 
72
  MODEL, PROCESSOR = init_qwen_model()
73
 
74
  # File uploader for images
75
  uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
76
 
77
- st.subheader("Extracted Text:")
78
- output = get_qwen_text(uploaded_file, MODEL, PROCESSOR)
79
- st.write(output)
 
80
 
81
- # Keyword search functionality
82
- st.subheader("Keyword Search")
83
- search_query = st.text_input("Enter keywords to search within the extracted text")
84
 
85
- if search_query:
86
- # Check if the search query is in the extracted text
87
- if search_query.lower() in output.lower():
88
- highlighted_text = output.replace(search_query, f"**{search_query}**")
89
- st.write(f"Matching Text: {highlighted_text}")
90
- else:
91
- st.write("No matching text found.")
 
 
 
4
  import torch
5
  from PIL import Image
6
 
7
+ # Default: Load the model on the available device(s)
8
  @st.cache_resource
9
  def init_qwen_model():
10
  _model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", torch_dtype="auto", device_map="auto")
11
  _processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
12
  return _model, _processor
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # Modified function to use only the image as the argument
15
  @st.cache_data
16
+ def get_qwen_text(uploaded_file, model, processor):
17
  if uploaded_file is not None:
18
  # Open the uploaded image file
19
  image = Image.open(uploaded_file)
 
59
  # Streamlit app title
60
  st.title("OCR Image Text Extraction")
61
 
62
+ # Initialize the model and processor
63
  MODEL, PROCESSOR = init_qwen_model()
64
 
65
  # File uploader for images
66
  uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
67
 
68
+ if uploaded_file:
69
+ st.subheader("Extracted Text:")
70
+ output = get_qwen_text(uploaded_file, MODEL, PROCESSOR)
71
+ st.write(output)
72
 
73
+ # Keyword search functionality
74
+ st.subheader("Keyword Search")
75
+ search_query = st.text_input("Enter keywords to search within the extracted text")
76
 
77
+ if search_query:
78
+ # Check if the search query is in the extracted text
79
+ if search_query.lower() in output.lower():
80
+ highlighted_text = output.replace(search_query, f"**{search_query}**")
81
+ st.write(f"Matching Text: {highlighted_text}")
82
+ else:
83
+ st.write("No matching text found.")
84
+ else:
85
+ st.info("Please upload an image to extract text.")