mdasad3617 commited on
Commit
5244794
·
verified ·
1 Parent(s): ed4ebee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -25
app.py CHANGED
@@ -1,10 +1,9 @@
1
  import streamlit as st
2
- from transformers import pipeline, VisionEncoderDecoderModel, ViTImageProcessor
3
  from PIL import Image
4
  import fitz # PyMuPDF for PDF processing
5
  import logging
6
  from concurrent.futures import ThreadPoolExecutor
7
- import torch
8
 
9
  # Setup logging
10
  def setup_logging():
@@ -17,27 +16,26 @@ def setup_logging():
17
  @st.cache_resource
18
  def load_models():
19
  logging.info("Loading Hugging Face models...")
20
- # Use an alternative model from Hugging Face for image captioning
21
- processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224")
22
- model = VisionEncoderDecoderModel.from_pretrained("google/vit-base-patch16-224")
23
 
24
- # Load translation models
25
  translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
26
  translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
27
 
28
  # Summarization model
29
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
30
 
31
- return processor, model, translator_hi, translator_ur, summarizer
32
 
33
- # Function to extract text from images using Hugging Face model
34
- def extract_text_from_image(image, processor, model):
35
  logging.info("Extracting text from image...")
36
- # Preprocess image
37
- inputs = processor(images=image, return_tensors="pt")
38
- # Use the model to generate captions
39
- out = model.generate(**inputs)
40
- return processor.decode(out[0], skip_special_tokens=True)
41
 
42
  # Function to extract text from PDFs
43
  def extract_text_from_pdf(pdf_file):
@@ -59,49 +57,54 @@ def process_chunks(text, model, chunk_size=500):
59
  # Main app logic
60
  def main():
61
  setup_logging()
62
- st.title("Lab Report Analyzer")
63
  st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
64
 
65
- # Load models
66
- processor, model, translator_hi, translator_ur, summarizer = load_models()
67
 
68
  file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
 
69
  if file:
70
  text = ""
71
  try:
72
  if file.type in ["image/jpeg", "image/png", "image/jpg"]:
73
  image = Image.open(file)
74
- text = extract_text_from_image(image, processor, model)
75
  elif file.type == "application/pdf":
76
  text = extract_text_from_pdf(file)
77
  elif file.type == "text/plain":
78
  text = file.read().decode("utf-8")
79
-
80
  if text:
81
  with st.spinner("Analyzing the report..."):
82
  # Generate summary
83
  summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]
84
-
85
  # Generate translations
86
  hindi_translation = process_chunks(text, translator_hi)
87
  urdu_translation = process_chunks(text, translator_ur)
88
-
89
  # Display results
 
 
 
90
  st.subheader("Analysis Summary (English):")
91
  st.write(summary)
92
-
93
  st.subheader("Hindi Translation:")
94
  st.write(hindi_translation)
95
-
96
  st.subheader("Urdu Translation:")
97
  st.write(urdu_translation)
98
  else:
99
  st.warning("No text could be extracted. Please check the file and try again.")
 
100
  except Exception as e:
101
  logging.error(f"Error processing the file: {e}")
102
- st.error("An error occurred while processing the file. Please try again.")
103
  else:
104
  st.info("Please upload a file to begin.")
105
 
106
  if __name__ == "__main__":
107
- main()
 
1
  import streamlit as st
2
+ from transformers import pipeline
3
  from PIL import Image
4
  import fitz # PyMuPDF for PDF processing
5
  import logging
6
  from concurrent.futures import ThreadPoolExecutor
 
7
 
8
  # Setup logging
9
  def setup_logging():
 
16
  @st.cache_resource
17
  def load_models():
18
  logging.info("Loading Hugging Face models...")
19
+ # Use most popular image-to-text model
20
+ image_to_text = pipeline("image-to-text", model="microsoft/trocr-large-printed")
 
21
 
22
+ # Translation models
23
  translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
24
  translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
25
 
26
  # Summarization model
27
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
28
 
29
+ return image_to_text, translator_hi, translator_ur, summarizer
30
 
31
+ # Function to extract text from images
32
+ def extract_text_from_image(image):
33
  logging.info("Extracting text from image...")
34
+ # Use TrOCR for more accurate text extraction
35
+ image_to_text = load_models()[0]
36
+ results = image_to_text(image)
37
+ # Combine all detected text
38
+ return " ".join([result['generated_text'] for result in results])
39
 
40
  # Function to extract text from PDFs
41
  def extract_text_from_pdf(pdf_file):
 
57
  # Main app logic
58
  def main():
59
  setup_logging()
60
+ st.title("Advanced Lab Report Analyzer")
61
  st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
62
 
63
+ # Load all models
64
+ image_to_text, translator_hi, translator_ur, summarizer = load_models()
65
 
66
  file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
67
+
68
  if file:
69
  text = ""
70
  try:
71
  if file.type in ["image/jpeg", "image/png", "image/jpg"]:
72
  image = Image.open(file)
73
+ text = extract_text_from_image(image)
74
  elif file.type == "application/pdf":
75
  text = extract_text_from_pdf(file)
76
  elif file.type == "text/plain":
77
  text = file.read().decode("utf-8")
78
+
79
  if text:
80
  with st.spinner("Analyzing the report..."):
81
  # Generate summary
82
  summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]
83
+
84
  # Generate translations
85
  hindi_translation = process_chunks(text, translator_hi)
86
  urdu_translation = process_chunks(text, translator_ur)
87
+
88
  # Display results
89
+ st.subheader("Original Text:")
90
+ st.write(text)
91
+
92
  st.subheader("Analysis Summary (English):")
93
  st.write(summary)
94
+
95
  st.subheader("Hindi Translation:")
96
  st.write(hindi_translation)
97
+
98
  st.subheader("Urdu Translation:")
99
  st.write(urdu_translation)
100
  else:
101
  st.warning("No text could be extracted. Please check the file and try again.")
102
+
103
  except Exception as e:
104
  logging.error(f"Error processing the file: {e}")
105
+ st.error(f"An error occurred while processing the file: {e}")
106
  else:
107
  st.info("Please upload a file to begin.")
108
 
109
  if __name__ == "__main__":
110
+ main()