mdasad3617 commited on
Commit
bd2b9ae
·
verified ·
1 Parent(s): d574795

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
  from PIL import Image
4
  import fitz # PyMuPDF for PDF processing
5
  import logging
@@ -16,8 +16,10 @@ def setup_logging():
16
  @st.cache_resource
17
  def load_models():
18
  logging.info("Loading Hugging Face models...")
19
- # Use most popular image-to-text model
20
- image_to_text = pipeline("image-to-text", model="microsoft/trocr-large-printed")
 
 
21
 
22
  # Translation models
23
  translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
@@ -26,16 +28,22 @@ def load_models():
26
  # Summarization model
27
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
28
 
29
- return image_to_text, translator_hi, translator_ur, summarizer
30
 
31
  # Function to extract text from images
32
- def extract_text_from_image(image):
33
  logging.info("Extracting text from image...")
34
- # Use TrOCR for more accurate text extraction
35
- image_to_text = load_models()[0]
36
- results = image_to_text(image)
37
- # Combine all detected text
38
- return " ".join([result['generated_text'] for result in results])
 
 
 
 
 
 
39
 
40
  # Function to extract text from PDFs
41
  def extract_text_from_pdf(pdf_file):
@@ -61,7 +69,7 @@ def main():
61
  st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
62
 
63
  # Load all models
64
- image_to_text, translator_hi, translator_ur, summarizer = load_models()
65
 
66
  file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
67
 
@@ -70,7 +78,7 @@ def main():
70
  try:
71
  if file.type in ["image/jpeg", "image/png", "image/jpg"]:
72
  image = Image.open(file)
73
- text = extract_text_from_image(image)
74
  elif file.type == "application/pdf":
75
  text = extract_text_from_pdf(file)
76
  elif file.type == "text/plain":
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoProcessor, AutoModelForCausalLM
3
  from PIL import Image
4
  import fitz # PyMuPDF for PDF processing
5
  import logging
 
16
  @st.cache_resource
17
  def load_models():
18
  logging.info("Loading Hugging Face models...")
19
+
20
+ # Use a more reliable image-to-text model
21
+ image_to_text_processor = AutoProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
22
+ image_to_text_model = AutoModelForCausalLM.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
23
 
24
  # Translation models
25
  translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
 
28
  # Summarization model
29
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
30
 
31
+ return image_to_text_processor, image_to_text_model, translator_hi, translator_ur, summarizer
32
 
33
  # Function to extract text from images
34
+ def extract_text_from_image(image, processor, model):
35
  logging.info("Extracting text from image...")
36
+
37
+ # Prepare image for model
38
+ inputs = processor(images=image, return_tensors="pt")
39
+
40
+ # Generate text
41
+ outputs = model.generate(**inputs)
42
+
43
+ # Decode the generated text
44
+ preds = processor.decode(outputs[0], skip_special_tokens=True)
45
+
46
+ return preds
47
 
48
  # Function to extract text from PDFs
49
  def extract_text_from_pdf(pdf_file):
 
69
  st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
70
 
71
  # Load all models
72
+ image_to_text_processor, image_to_text_model, translator_hi, translator_ur, summarizer = load_models()
73
 
74
  file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
75
 
 
78
  try:
79
  if file.type in ["image/jpeg", "image/png", "image/jpg"]:
80
  image = Image.open(file)
81
+ text = extract_text_from_image(image, image_to_text_processor, image_to_text_model)
82
  elif file.type == "application/pdf":
83
  text = extract_text_from_pdf(file)
84
  elif file.type == "text/plain":