Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,21 @@ import numpy as np
|
|
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
from io import StringIO
|
10 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# Function to extract text from a PDF file
|
12 |
def extract_text_from_pdf(pdf_file):
|
13 |
reader = PyPDF2.PdfReader(pdf_file)
|
@@ -67,12 +81,6 @@ def generate_math_solution(query):
|
|
67 |
)
|
68 |
return response['choices'][0]['message']['content']
|
69 |
|
70 |
-
# Function to extract text from image using OCR
|
71 |
-
def extract_text_from_image(image):
|
72 |
-
# Use pytesseract to do OCR on the image and extract text
|
73 |
-
text = pytesseract.image_to_string(image)
|
74 |
-
return text
|
75 |
-
|
76 |
# Function to answer questions based on the image or its content
|
77 |
def answer_question_based_on_image(query, image_text):
|
78 |
prompt = f"The following is text extracted from an image: {image_text}\n\nQuestion: {query}\n\nAnswer the question based on the image text."
|
@@ -81,7 +89,7 @@ def answer_question_based_on_image(query, image_text):
|
|
81 |
messages=[{"role": "user", "content": prompt}]
|
82 |
)
|
83 |
return response['choices'][0]['message']['content']
|
84 |
-
|
85 |
from PIL import Image # Required for local image files
|
86 |
|
87 |
# Streamlit app starts here
|
@@ -402,28 +410,28 @@ if openai_api_key:
|
|
402 |
|
403 |
# Image upload feature
|
404 |
uploaded_image = st.file_uploader("Upload an image:", type=["jpg", "jpeg", "png"])
|
405 |
-
|
406 |
if uploaded_image:
|
407 |
# Open the image with PIL
|
408 |
image = Image.open(uploaded_image)
|
409 |
-
|
410 |
# Display the uploaded image
|
411 |
st.image(image, caption="Uploaded Image", use_column_width=True)
|
412 |
-
|
413 |
-
# Extract text from the image using
|
414 |
with st.spinner("Extracting text from the image..."):
|
415 |
image_text = extract_text_from_image(image)
|
416 |
-
|
417 |
# Show the extracted text
|
418 |
if image_text:
|
419 |
st.write("### Extracted Text from Image:")
|
420 |
st.write(image_text)
|
421 |
else:
|
422 |
st.write("No text was extracted from the image.")
|
423 |
-
|
424 |
# Allow the user to ask questions about the image
|
425 |
question = st.text_input("Ask a question about the image:")
|
426 |
-
|
427 |
if question:
|
428 |
with st.spinner("Getting answer..."):
|
429 |
answer = answer_question_based_on_image(question, image_text)
|
|
|
7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
from io import StringIO
|
10 |
+
import easyocr
|
11 |
+
|
12 |
+
# Function to extract text from image using EasyOCR
|
13 |
+
def extract_text_from_image(image):
|
14 |
+
# Initialize EasyOCR reader
|
15 |
+
reader = easyocr.Reader(['en']) # Specify language(s); you can add more like ['en', 'fr'] for multiple languages
|
16 |
+
|
17 |
+
# Read text from the image
|
18 |
+
result = reader.readtext(image)
|
19 |
+
|
20 |
+
# Extract and concatenate the text from the OCR result
|
21 |
+
text = ' '.join([item[1] for item in result]) # item[1] contains the recognized text
|
22 |
+
|
23 |
+
return text
|
24 |
+
|
25 |
# Function to extract text from a PDF file
|
26 |
def extract_text_from_pdf(pdf_file):
|
27 |
reader = PyPDF2.PdfReader(pdf_file)
|
|
|
81 |
)
|
82 |
return response['choices'][0]['message']['content']
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
# Function to answer questions based on the image or its content
|
85 |
def answer_question_based_on_image(query, image_text):
|
86 |
prompt = f"The following is text extracted from an image: {image_text}\n\nQuestion: {query}\n\nAnswer the question based on the image text."
|
|
|
89 |
messages=[{"role": "user", "content": prompt}]
|
90 |
)
|
91 |
return response['choices'][0]['message']['content']
|
92 |
+
|
93 |
from PIL import Image # Required for local image files
|
94 |
|
95 |
# Streamlit app starts here
|
|
|
410 |
|
411 |
# Image upload feature
|
412 |
uploaded_image = st.file_uploader("Upload an image:", type=["jpg", "jpeg", "png"])
|
413 |
+
|
414 |
if uploaded_image:
|
415 |
# Open the image with PIL
|
416 |
image = Image.open(uploaded_image)
|
417 |
+
|
418 |
# Display the uploaded image
|
419 |
st.image(image, caption="Uploaded Image", use_column_width=True)
|
420 |
+
|
421 |
+
# Extract text from the image using EasyOCR
|
422 |
with st.spinner("Extracting text from the image..."):
|
423 |
image_text = extract_text_from_image(image)
|
424 |
+
|
425 |
# Show the extracted text
|
426 |
if image_text:
|
427 |
st.write("### Extracted Text from Image:")
|
428 |
st.write(image_text)
|
429 |
else:
|
430 |
st.write("No text was extracted from the image.")
|
431 |
+
|
432 |
# Allow the user to ask questions about the image
|
433 |
question = st.text_input("Ask a question about the image:")
|
434 |
+
|
435 |
if question:
|
436 |
with st.spinner("Getting answer..."):
|
437 |
answer = answer_question_based_on_image(question, image_text)
|