mdasad3617 commited on
Commit
233d635
·
verified ·
1 Parent(s): 8a3f880

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -17
app.py CHANGED
@@ -1,9 +1,12 @@
1
  import streamlit as st
2
- from transformers import pipeline, AutoProcessor, AutoModelForCausalLM
 
3
  from PIL import Image
4
  import fitz # PyMuPDF for PDF processing
5
  import logging
6
  from concurrent.futures import ThreadPoolExecutor
 
 
7
 
8
  # Setup logging
9
  def setup_logging():
@@ -17,10 +20,6 @@ def setup_logging():
17
  def load_models():
18
  logging.info("Loading Hugging Face models...")
19
 
20
- # Use a more reliable image-to-text model
21
- image_to_text_processor = AutoProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
22
- image_to_text_model = AutoModelForCausalLM.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
23
-
24
  # Translation models
25
  translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
26
  translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
@@ -28,22 +27,49 @@ def load_models():
28
  # Summarization model
29
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
30
 
31
- return image_to_text_processor, image_to_text_model, translator_hi, translator_ur, summarizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # Function to extract text from images
34
- def extract_text_from_image(image, processor, model):
35
  logging.info("Extracting text from image...")
36
 
37
- # Prepare image for model
38
- inputs = processor(images=image, return_tensors="pt")
39
-
40
- # Generate text
41
- outputs = model.generate(**inputs)
42
 
43
- # Decode the generated text
44
- preds = processor.decode(outputs[0], skip_special_tokens=True)
45
 
46
- return preds
47
 
48
  # Function to extract text from PDFs
49
  def extract_text_from_pdf(pdf_file):
@@ -69,7 +95,7 @@ def main():
69
  st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
70
 
71
  # Load all models
72
- image_to_text_processor, image_to_text_model, translator_hi, translator_ur, summarizer = load_models()
73
 
74
  file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
75
 
@@ -78,7 +104,7 @@ def main():
78
  try:
79
  if file.type in ["image/jpeg", "image/png", "image/jpg"]:
80
  image = Image.open(file)
81
- text = extract_text_from_image(image, image_to_text_processor, image_to_text_model)
82
  elif file.type == "application/pdf":
83
  text = extract_text_from_pdf(file)
84
  elif file.type == "text/plain":
 
1
  import streamlit as st
2
+ import pytesseract
3
+ from transformers import pipeline
4
  from PIL import Image
5
  import fitz # PyMuPDF for PDF processing
6
  import logging
7
  from concurrent.futures import ThreadPoolExecutor
8
+ import cv2
9
+ import numpy as np
10
 
11
  # Setup logging
12
  def setup_logging():
 
20
  def load_models():
21
  logging.info("Loading Hugging Face models...")
22
 
 
 
 
 
23
  # Translation models
24
  translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
25
  translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
 
27
  # Summarization model
28
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
29
 
30
+ return translator_hi, translator_ur, summarizer
31
+
32
+ # Function to preprocess image for better OCR
33
+ def preprocess_image(image):
34
+ # Convert PIL Image to OpenCV format
35
+ img_np = np.array(image)
36
+
37
+ # Convert to grayscale
38
+ gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
39
+
40
+ # Apply thresholding to preprocess the image
41
+ gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
42
+
43
+ # Apply deskewing if needed
44
+ coords = np.column_stack(np.where(gray > 0))
45
+ angle = cv2.minAreaRect(coords)[-1]
46
+
47
+ # The cv2.minAreaRect returns values in the range [:-90, 0)
48
+ # so we need to take the inverse to get the rotation from the horizontal axis
49
+ if angle < -45:
50
+ angle = -(90 + angle)
51
+ else:
52
+ angle = -angle
53
+
54
+ # Rotate the image to deskew
55
+ (h, w) = gray.shape[:2]
56
+ center = (w // 2, h // 2)
57
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
58
+ rotated = cv2.warpAffine(gray, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
59
+
60
+ return rotated
61
 
62
  # Function to extract text from images
63
+ def extract_text_from_image(image):
64
  logging.info("Extracting text from image...")
65
 
66
+ # Preprocess image
67
+ preprocessed_img = preprocess_image(image)
 
 
 
68
 
69
+ # Use pytesseract for OCR
70
+ text = pytesseract.image_to_string(preprocessed_img)
71
 
72
+ return text.strip()
73
 
74
  # Function to extract text from PDFs
75
  def extract_text_from_pdf(pdf_file):
 
95
  st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
96
 
97
  # Load all models
98
+ translator_hi, translator_ur, summarizer = load_models()
99
 
100
  file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
101
 
 
104
  try:
105
  if file.type in ["image/jpeg", "image/png", "image/jpg"]:
106
  image = Image.open(file)
107
+ text = extract_text_from_image(image)
108
  elif file.type == "application/pdf":
109
  text = extract_text_from_pdf(file)
110
  elif file.type == "text/plain":