mdasad3617 commited on
Commit
8b18b7b
·
verified ·
1 Parent(s): ca69a0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -43
app.py CHANGED
@@ -1,60 +1,107 @@
1
- import logging
2
- import pytesseract
3
- from PIL import Image
4
- import os
5
  import streamlit as st
 
 
 
 
 
 
6
 
7
- # Configure logging to display debug information
8
- logging.basicConfig(level=logging.DEBUG)
 
 
 
 
9
 
10
- # Function to extract text from an image
11
- def extract_text_from_image(image):
12
- try:
13
- logging.info("Starting text extraction from image...")
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # Verify the image is not corrupted
16
- image.verify() # Verifies the image is not corrupted
17
- logging.info("Image opened and verified successfully.")
 
 
 
 
 
18
 
19
- # Resize the image to improve performance (optional)
20
- image = image.resize((image.width // 2, image.height // 2)) # Resize image to 50% of the original size
21
-
22
- # Extract text using pytesseract
23
- text = pytesseract.image_to_string(image)
24
-
25
- logging.info("Text extraction completed successfully.")
26
- return text
27
 
28
- except Exception as e:
29
- logging.error(f"An error occurred while processing the image: {str(e)}")
30
- return f"Error: {str(e)}"
 
 
 
 
31
 
32
- # Streamlit web application
33
  def main():
 
34
  st.title("Lab Report Analyzer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- st.markdown("Upload an image file to extract text from it.")
 
 
 
37
 
38
- # File uploader widget
39
- uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"])
 
40
 
41
- if uploaded_file is not None:
42
- # Save the uploaded file temporarily
43
- with open("temp_image.jpg", "wb") as f:
44
- f.write(uploaded_file.getbuffer())
45
-
46
- # Open the image file
47
- image = Image.open("temp_image.jpg")
48
-
49
- # Extract text from the uploaded image
50
- extracted_text = extract_text_from_image(image)
51
 
52
- # Display extracted text
53
- st.subheader("Extracted Text")
54
- st.text(extracted_text)
55
 
56
- # Optionally, delete the temporary file after processing
57
- os.remove("temp_image.jpg")
 
 
 
 
 
 
 
58
 
59
  if __name__ == "__main__":
60
  main()
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import pipeline, VisionEncoderDecoderModel, ViTImageProcessor
3
+ from PIL import Image
4
+ import fitz
5
+ import logging
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ import torch
8
 
9
+ # Setup logging
10
+ def setup_logging():
11
+ logging.basicConfig(
12
+ level=logging.INFO,
13
+ format="%(asctime)s - %(levelname)s - %(message)s",
14
+ )
15
 
16
+ # Load models globally for faster performance
17
+ @st.cache_resource
18
+ def load_models():
19
+ logging.info("Loading Hugging Face models...")
20
+ # Load image-to-text model from Hugging Face
21
+ processor = ViTImageProcessor.from_pretrained("microsoft/vision-transformation-transformer")
22
+ model = VisionEncoderDecoderModel.from_pretrained("microsoft/vision-transformation-transformer")
23
+
24
+ # Load translation models
25
+ translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
26
+ translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
27
+
28
+ # Summarization model
29
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
30
+
31
+ return processor, model, translator_hi, translator_ur, summarizer
32
 
33
+ # Function to extract text from images using Hugging Face model
34
+ def extract_text_from_image(image, processor, model):
35
+ logging.info("Extracting text from image...")
36
+ # Preprocess image
37
+ inputs = processor(images=image, return_tensors="pt")
38
+ # Use the model to generate captions
39
+ out = model.generate(**inputs)
40
+ return processor.decode(out[0], skip_special_tokens=True)
41
 
42
+ # Function to extract text from PDFs
43
+ def extract_text_from_pdf(pdf_file):
44
+ logging.info("Extracting text from PDF...")
45
+ doc = fitz.open(pdf_file)
46
+ text = ""
47
+ for page in doc:
48
+ text += page.get_text()
49
+ return text
50
 
51
+ # Function to process text in chunks for better performance
52
+ def process_chunks(text, model, chunk_size=500):
53
+ chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
54
+ results = []
55
+ with ThreadPoolExecutor() as executor:
56
+ results = list(executor.map(lambda chunk: model(chunk, max_length=200), chunks))
57
+ return " ".join([result[0]["translation_text"] for result in results])
58
 
59
+ # Main app logic
60
  def main():
61
+ setup_logging()
62
  st.title("Lab Report Analyzer")
63
+ st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
64
+
65
+ # Load models
66
+ processor, model, translator_hi, translator_ur, summarizer = load_models()
67
+
68
+ file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
69
+ if file:
70
+ text = ""
71
+ try:
72
+ if file.type in ["image/jpeg", "image/png", "image/jpg"]:
73
+ image = Image.open(file)
74
+ text = extract_text_from_image(image, processor, model)
75
+ elif file.type == "application/pdf":
76
+ text = extract_text_from_pdf(file)
77
+ elif file.type == "text/plain":
78
+ text = file.read().decode("utf-8")
79
 
80
+ if text:
81
+ with st.spinner("Analyzing the report..."):
82
+ # Generate summary
83
+ summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]
84
 
85
+ # Generate translations
86
+ hindi_translation = process_chunks(text, translator_hi)
87
+ urdu_translation = process_chunks(text, translator_ur)
88
 
89
+ # Display results
90
+ st.subheader("Analysis Summary (English):")
91
+ st.write(summary)
 
 
 
 
 
 
 
92
 
93
+ st.subheader("Hindi Translation:")
94
+ st.write(hindi_translation)
 
95
 
96
+ st.subheader("Urdu Translation:")
97
+ st.write(urdu_translation)
98
+ else:
99
+ st.warning("No text could be extracted. Please check the file and try again.")
100
+ except Exception as e:
101
+ logging.error(f"Error processing the file: {e}")
102
+ st.error("An error occurred while processing the file. Please try again.")
103
+ else:
104
+ st.info("Please upload a file to begin.")
105
 
106
  if __name__ == "__main__":
107
  main()