mdasad3617 commited on
Commit
cc10da2
·
verified ·
1 Parent(s): 21263ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -6
app.py CHANGED
@@ -1,12 +1,20 @@
1
  import streamlit as st
2
- import pytesseract
3
- from transformers import pipeline
4
- from PIL import Image
5
- import fitz # PyMuPDF for PDF processing
6
  import logging
7
  from concurrent.futures import ThreadPoolExecutor
8
- import cv2
9
- import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Setup logging
12
  def setup_logging():
@@ -15,6 +23,46 @@ def setup_logging():
15
  format="%(asctime)s - %(levelname)s - %(message)s",
16
  )
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # Load models globally for faster performance
19
  @st.cache_resource
20
  def load_models():
@@ -42,6 +90,11 @@ def preprocess_image(image):
42
 
43
  # Apply deskewing if needed
44
  coords = np.column_stack(np.where(gray > 0))
 
 
 
 
 
45
  angle = cv2.minAreaRect(coords)[-1]
46
 
47
  # The cv2.minAreaRect returns values in the range [:-90, 0)
@@ -90,6 +143,10 @@ def process_chunks(text, model, chunk_size=500):
90
 
91
  # Main app logic
92
  def main():
 
 
 
 
93
  setup_logging()
94
  st.title("Advanced Lab Report Analyzer")
95
  st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
 
1
  import streamlit as st
 
 
 
 
2
  import logging
3
  from concurrent.futures import ThreadPoolExecutor
4
+ import subprocess
5
+ import sys
6
+
7
+ # Attempt to import libraries, with fallback
8
+ try:
9
+ import pytesseract
10
+ import cv2
11
+ import numpy as np
12
+ from PIL import Image
13
+ import fitz # PyMuPDF for PDF processing
14
+ from transformers import pipeline
15
+ except ImportError:
16
+ st.error("Required libraries are missing. Please install them using pip.")
17
+ st.stop()
18
 
19
  # Setup logging
20
  def setup_logging():
 
23
  format="%(asctime)s - %(levelname)s - %(message)s",
24
  )
25
 
26
+ # Tesseract installation check and guide
27
+ def check_tesseract():
28
+ try:
29
+ # Try to get Tesseract version
30
+ version = subprocess.check_output(['tesseract', '--version'],
31
+ stderr=subprocess.STDOUT).decode('utf-8')
32
+ return True
33
+ except (subprocess.CalledProcessError, FileNotFoundError):
34
+ # Provide installation instructions based on operating system
35
+ st.error("Tesseract OCR is not installed.")
36
+ st.markdown("### Tesseract Installation Guide:")
37
+
38
+ if sys.platform.startswith('linux'):
39
+ st.code("""
40
+ # For Ubuntu/Debian
41
+ sudo apt-get update
42
+ sudo apt-get install -y tesseract-ocr
43
+
44
+ # For Fedora
45
+ sudo dnf install -y tesseract
46
+
47
+ # For CentOS/RHEL
48
+ sudo yum install -y tesseract
49
+ """)
50
+ elif sys.platform.startswith('darwin'):
51
+ st.code("""
52
+ # For macOS (using Homebrew)
53
+ brew install tesseract
54
+ """)
55
+ elif sys.platform.startswith('win'):
56
+ st.markdown("""
57
+ 1. Download Tesseract installer from:
58
+ https://github.com/UB-Mannheim/tesseract/wiki
59
+ 2. Run the installer
60
+ 3. Add Tesseract directory to your system PATH
61
+ """)
62
+
63
+ st.info("After installation, restart your application.")
64
+ return False
65
+
66
  # Load models globally for faster performance
67
  @st.cache_resource
68
  def load_models():
 
90
 
91
  # Apply deskewing if needed
92
  coords = np.column_stack(np.where(gray > 0))
93
+
94
+ # Prevent error if no foreground pixels found
95
+ if coords.size == 0:
96
+ return gray
97
+
98
  angle = cv2.minAreaRect(coords)[-1]
99
 
100
  # The cv2.minAreaRect returns values in the range [:-90, 0)
 
143
 
144
  # Main app logic
145
  def main():
146
+ # Check Tesseract installation first
147
+ if not check_tesseract():
148
+ return
149
+
150
  setup_logging()
151
  st.title("Advanced Lab Report Analyzer")
152
  st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")