Spaces:

mdasad3617
/

lab-report-analyzer

Sleeping

App Files Files Community

mdasad3617 commited on Nov 30, 2024

Commit

233d635

verified ·

1 Parent(s): 8a3f880

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -17

app.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import streamlit as st
-from transformers import pipeline, AutoProcessor, AutoModelForCausalLM
 from PIL import Image
 import fitz  # PyMuPDF for PDF processing
 import logging
 from concurrent.futures import ThreadPoolExecutor
 # Setup logging
 def setup_logging():
@@ -17,10 +20,6 @@ def setup_logging():
 def load_models():
     logging.info("Loading Hugging Face models...")
-    # Use a more reliable image-to-text model
-    image_to_text_processor = AutoProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
-    image_to_text_model = AutoModelForCausalLM.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
     # Translation models
     translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
     translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
@@ -28,22 +27,49 @@ def load_models():
     # Summarization model
     summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-    return image_to_text_processor, image_to_text_model, translator_hi, translator_ur, summarizer
 # Function to extract text from images
-def extract_text_from_image(image, processor, model):
     logging.info("Extracting text from image...")
-    # Prepare image for model
-    inputs = processor(images=image, return_tensors="pt")
-    # Generate text
-    outputs = model.generate(**inputs)
-    # Decode the generated text
-    preds = processor.decode(outputs[0], skip_special_tokens=True)
-    return preds
 # Function to extract text from PDFs
 def extract_text_from_pdf(pdf_file):
@@ -69,7 +95,7 @@ def main():
     st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
     # Load all models
-    image_to_text_processor, image_to_text_model, translator_hi, translator_ur, summarizer = load_models()
     file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
@@ -78,7 +104,7 @@ def main():
         try:
             if file.type in ["image/jpeg", "image/png", "image/jpg"]:
                 image = Image.open(file)
-                text = extract_text_from_image(image, image_to_text_processor, image_to_text_model)
             elif file.type == "application/pdf":
                 text = extract_text_from_pdf(file)
             elif file.type == "text/plain":

 import streamlit as st
+import pytesseract
+from transformers import pipeline
 from PIL import Image
 import fitz  # PyMuPDF for PDF processing
 import logging
 from concurrent.futures import ThreadPoolExecutor
+import cv2
+import numpy as np
 # Setup logging
 def setup_logging():
 def load_models():
     logging.info("Loading Hugging Face models...")
     # Translation models
     translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
     translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
     # Summarization model
     summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    return translator_hi, translator_ur, summarizer
+# Function to preprocess image for better OCR
+def preprocess_image(image):
+    # Convert PIL Image to OpenCV format
+    img_np = np.array(image)
+    # Convert to grayscale
+    gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
+    # Apply thresholding to preprocess the image
+    gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
+    # Apply deskewing if needed
+    coords = np.column_stack(np.where(gray > 0))
+    angle = cv2.minAreaRect(coords)[-1]
+    # The cv2.minAreaRect returns values in the range [:-90, 0)
+    # so we need to take the inverse to get the rotation from the horizontal axis
+    if angle < -45:
+        angle = -(90 + angle)
+    else:
+        angle = -angle
+    # Rotate the image to deskew
+    (h, w) = gray.shape[:2]
+    center = (w // 2, h // 2)
+    M = cv2.getRotationMatrix2D(center, angle, 1.0)
+    rotated = cv2.warpAffine(gray, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
+    return rotated
 # Function to extract text from images
+def extract_text_from_image(image):
     logging.info("Extracting text from image...")
+    # Preprocess image
+    preprocessed_img = preprocess_image(image)
+    # Use pytesseract for OCR
+    text = pytesseract.image_to_string(preprocessed_img)
+    return text.strip()
 # Function to extract text from PDFs
 def extract_text_from_pdf(pdf_file):
     st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
     # Load all models
+    translator_hi, translator_ur, summarizer = load_models()
     file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
         try:
             if file.type in ["image/jpeg", "image/png", "image/jpg"]:
                 image = Image.open(file)
+                text = extract_text_from_image(image)
             elif file.type == "application/pdf":
                 text = extract_text_from_pdf(file)
             elif file.type == "text/plain":