shukdevdatta123 commited on
Commit
5bc9cf6
·
verified ·
1 Parent(s): a3cc7e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -15
app.py CHANGED
@@ -7,7 +7,21 @@ import numpy as np
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.metrics.pairwise import cosine_similarity
9
  from io import StringIO
10
- import pytesseract
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # Function to extract text from a PDF file
12
  def extract_text_from_pdf(pdf_file):
13
  reader = PyPDF2.PdfReader(pdf_file)
@@ -67,12 +81,6 @@ def generate_math_solution(query):
67
  )
68
  return response['choices'][0]['message']['content']
69
 
70
- # Function to extract text from image using OCR
71
- def extract_text_from_image(image):
72
- # Use pytesseract to do OCR on the image and extract text
73
- text = pytesseract.image_to_string(image)
74
- return text
75
-
76
  # Function to answer questions based on the image or its content
77
  def answer_question_based_on_image(query, image_text):
78
  prompt = f"The following is text extracted from an image: {image_text}\n\nQuestion: {query}\n\nAnswer the question based on the image text."
@@ -81,7 +89,7 @@ def answer_question_based_on_image(query, image_text):
81
  messages=[{"role": "user", "content": prompt}]
82
  )
83
  return response['choices'][0]['message']['content']
84
-
85
  from PIL import Image # Required for local image files
86
 
87
  # Streamlit app starts here
@@ -402,28 +410,28 @@ if openai_api_key:
402
 
403
  # Image upload feature
404
  uploaded_image = st.file_uploader("Upload an image:", type=["jpg", "jpeg", "png"])
405
-
406
  if uploaded_image:
407
  # Open the image with PIL
408
  image = Image.open(uploaded_image)
409
-
410
  # Display the uploaded image
411
  st.image(image, caption="Uploaded Image", use_column_width=True)
412
-
413
- # Extract text from the image using OCR
414
  with st.spinner("Extracting text from the image..."):
415
  image_text = extract_text_from_image(image)
416
-
417
  # Show the extracted text
418
  if image_text:
419
  st.write("### Extracted Text from Image:")
420
  st.write(image_text)
421
  else:
422
  st.write("No text was extracted from the image.")
423
-
424
  # Allow the user to ask questions about the image
425
  question = st.text_input("Ask a question about the image:")
426
-
427
  if question:
428
  with st.spinner("Getting answer..."):
429
  answer = answer_question_based_on_image(question, image_text)
 
7
  from sklearn.feature_extraction.text import TfidfVectorizer
8
  from sklearn.metrics.pairwise import cosine_similarity
9
  from io import StringIO
10
+ import easyocr
11
+
12
+ # Function to extract text from image using EasyOCR
13
+ def extract_text_from_image(image):
14
+ # Initialize EasyOCR reader
15
+ reader = easyocr.Reader(['en']) # Specify language(s); you can add more like ['en', 'fr'] for multiple languages
16
+
17
+ # Read text from the image
18
+ result = reader.readtext(image)
19
+
20
+ # Extract and concatenate the text from the OCR result
21
+ text = ' '.join([item[1] for item in result]) # item[1] contains the recognized text
22
+
23
+ return text
24
+
25
  # Function to extract text from a PDF file
26
  def extract_text_from_pdf(pdf_file):
27
  reader = PyPDF2.PdfReader(pdf_file)
 
81
  )
82
  return response['choices'][0]['message']['content']
83
 
 
 
 
 
 
 
84
  # Function to answer questions based on the image or its content
85
  def answer_question_based_on_image(query, image_text):
86
  prompt = f"The following is text extracted from an image: {image_text}\n\nQuestion: {query}\n\nAnswer the question based on the image text."
 
89
  messages=[{"role": "user", "content": prompt}]
90
  )
91
  return response['choices'][0]['message']['content']
92
+
93
  from PIL import Image # Required for local image files
94
 
95
  # Streamlit app starts here
 
410
 
411
  # Image upload feature
412
  uploaded_image = st.file_uploader("Upload an image:", type=["jpg", "jpeg", "png"])
413
+
414
  if uploaded_image:
415
  # Open the image with PIL
416
  image = Image.open(uploaded_image)
417
+
418
  # Display the uploaded image
419
  st.image(image, caption="Uploaded Image", use_column_width=True)
420
+
421
+ # Extract text from the image using EasyOCR
422
  with st.spinner("Extracting text from the image..."):
423
  image_text = extract_text_from_image(image)
424
+
425
  # Show the extracted text
426
  if image_text:
427
  st.write("### Extracted Text from Image:")
428
  st.write(image_text)
429
  else:
430
  st.write("No text was extracted from the image.")
431
+
432
  # Allow the user to ask questions about the image
433
  question = st.text_input("Ask a question about the image:")
434
+
435
  if question:
436
  with st.spinner("Getting answer..."):
437
  answer = answer_question_based_on_image(question, image_text)