Saurabh Kumar commited on
Commit
ee30b6a
·
verified ·
1 Parent(s): fba9f00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -46
app.py CHANGED
@@ -11,59 +11,56 @@ def init_qwen_model():
11
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
12
  return model, processor
13
 
14
- def get_qwen_text(uploaded_file, model, processor):
15
- if uploaded_file is not None:
16
- # Open the uploaded image file
17
- image = Image.open(uploaded_file)
18
- st.image(image, caption="Uploaded Image", use_column_width=True)
19
-
20
- messages = [
21
- {
22
- "role": "user",
23
- "content": [
24
- {
25
- "type": "image",
26
- "image": image,
27
- },
28
- {"type": "text", "text": "Run Optical Character recognition on the image."},
29
- ],
30
- }
31
- ]
32
-
33
- # Preparation for inference
34
- text = processor.apply_chat_template(
35
- messages, tokenize=False, add_generation_prompt=True
36
- )
37
- image_inputs, video_inputs = process_vision_info(messages)
38
- inputs = processor(
39
- text=[text],
40
- images=image_inputs,
41
- videos=video_inputs,
42
- padding=True,
43
- return_tensors="pt",
44
- )
45
- inputs = inputs.to("cpu")
46
-
47
- # Inference: Generation of the output
48
- generated_ids = model.generate(**inputs, max_new_tokens=128)
49
- generated_ids_trimmed = [
50
- out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
51
- ]
52
- output_text = processor.batch_decode(
53
- generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
54
- )
55
- return output_text
56
-
57
  # Streamlit app title
58
  st.title("OCR Image Text Extraction")
59
 
60
  # File uploader for images
61
  uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
62
 
63
- if uploaded_file:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  st.subheader("Extracted Text:")
65
- output = get_qwen_text(uploaded_file, MODEL, PROCESSOR)
66
- st.write(output)
67
 
68
  # Keyword search functionality
69
  st.subheader("Keyword Search")
 
11
  processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
12
  return model, processor
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Streamlit app title
15
  st.title("OCR Image Text Extraction")
16
 
17
  # File uploader for images
18
  uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
19
 
20
+ MODEL, PROCESSOR = init_qwen_model()
21
+
22
+ if uploaded_file is not None:
23
+ # Open the uploaded image file
24
+ image = Image.open(uploaded_file)
25
+ st.image(image, caption="Uploaded Image", use_column_width=True)
26
+
27
+ messages = [
28
+ {
29
+ "role": "user",
30
+ "content": [
31
+ {
32
+ "type": "image",
33
+ "image": image,
34
+ },
35
+ {"type": "text", "text": "Run Optical Character recognition on the image."},
36
+ ],
37
+ }
38
+ ]
39
+
40
+ # Preparation for inference
41
+ text = PROCESSOR.apply_chat_template(
42
+ messages, tokenize=False, add_generation_prompt=True
43
+ )
44
+ image_inputs, video_inputs = process_vision_info(messages)
45
+ inputs = processor(
46
+ text=[text],
47
+ images=image_inputs,
48
+ videos=video_inputs,
49
+ padding=True,
50
+ return_tensors="pt",
51
+ )
52
+ inputs = inputs.to("cpu")
53
+
54
+ # Inference: Generation of the output
55
+ generated_ids = MODEL.generate(**inputs, max_new_tokens=128)
56
+ generated_ids_trimmed = [
57
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
58
+ ]
59
+ output_text = PROCESSOR.batch_decode(
60
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
61
+ )
62
  st.subheader("Extracted Text:")
63
+ st.write(output_text)
 
64
 
65
  # Keyword search functionality
66
  st.subheader("Keyword Search")