mdasad3617 commited on
Commit
4d72778
·
verified ·
1 Parent(s): 08d36af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
2
  from transformers import pipeline
3
  import pdfplumber
4
  from PIL import Image
5
- import pytesseract
6
  from langdetect import detect
7
 
8
  # Initialize Models
@@ -27,11 +27,12 @@ def extract_text_from_pdf(pdf_file):
27
  text += page.extract_text()
28
  return text.strip()
29
 
30
- # Extract text from Image
31
  def extract_text_from_image(image_file):
 
32
  image = Image.open(image_file)
33
- text = pytesseract.image_to_string(image)
34
- return text.strip()
35
 
36
  # Check if content is a lab report
37
  def is_lab_report(text, model):
 
2
  from transformers import pipeline
3
  import pdfplumber
4
  from PIL import Image
5
+ import easyocr
6
  from langdetect import detect
7
 
8
  # Initialize Models
 
27
  text += page.extract_text()
28
  return text.strip()
29
 
30
+ # Extract text from Image using EasyOCR
31
  def extract_text_from_image(image_file):
32
+ reader = easyocr.Reader(['en']) # Add more languages if needed
33
  image = Image.open(image_file)
34
+ result = reader.readtext(image, detail=0) # `detail=0` returns only the text
35
+ return " ".join(result).strip()
36
 
37
  # Check if content is a lab report
38
  def is_lab_report(text, model):