Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
|
2 |
-
|
3 |
from fastapi import FastAPI
|
4 |
from fastapi.responses import RedirectResponse
|
5 |
import gradio as gr
|
@@ -93,7 +93,7 @@ reader = easyocr.Reader(['en', 'fr']) # OCR for English & French
|
|
93 |
|
94 |
# ---- TEXT EXTRACTION FUNCTIONS ----
|
95 |
def extract_text_from_pdf(pdf_file):
|
96 |
-
"
|
97 |
text = []
|
98 |
try:
|
99 |
with fitz.open(pdf_file) as doc:
|
@@ -104,12 +104,12 @@ def extract_text_from_pdf(pdf_file):
|
|
104 |
return "\n".join(text)
|
105 |
|
106 |
def extract_text_from_docx(docx_file):
|
107 |
-
"
|
108 |
doc = docx.Document(docx_file)
|
109 |
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
|
110 |
|
111 |
def extract_text_from_pptx(pptx_file):
|
112 |
-
"
|
113 |
text = []
|
114 |
try:
|
115 |
presentation = pptx.Presentation(pptx_file)
|
@@ -122,7 +122,7 @@ def extract_text_from_pptx(pptx_file):
|
|
122 |
return "\n".join(text)
|
123 |
|
124 |
def extract_text_from_xlsx(xlsx_file):
|
125 |
-
"
|
126 |
text = []
|
127 |
try:
|
128 |
wb = openpyxl.load_workbook(xlsx_file)
|
@@ -135,13 +135,13 @@ def extract_text_from_xlsx(xlsx_file):
|
|
135 |
return "\n".join(text)
|
136 |
|
137 |
def extract_text_from_image(image_path):
|
138 |
-
"
|
139 |
result = reader.readtext(image_path, detail=0)
|
140 |
return " ".join(result) # Return text as a single string
|
141 |
|
142 |
# ---- MAIN PROCESSING FUNCTIONS ----
|
143 |
def answer_question_from_doc(file, question):
|
144 |
-
"
|
145 |
ext = file.name.split(".")[-1].lower()
|
146 |
|
147 |
if ext == "pdf":
|
@@ -166,7 +166,7 @@ def answer_question_from_doc(file, question):
|
|
166 |
return f"Error generating answer: {e}"
|
167 |
|
168 |
def answer_question_from_image(image, question):
|
169 |
-
"
|
170 |
img_text = extract_text_from_image(image)
|
171 |
if not img_text.strip():
|
172 |
return "No readable text found in the image."
|
@@ -201,3 +201,4 @@ app = gr.mount_gradio_app(app, demo, path="/")
|
|
201 |
@app.get("/")
|
202 |
def home():
|
203 |
return RedirectResponse(url="/")
|
|
|
|
1 |
|
2 |
+
|
3 |
from fastapi import FastAPI
|
4 |
from fastapi.responses import RedirectResponse
|
5 |
import gradio as gr
|
|
|
93 |
|
94 |
# ---- TEXT EXTRACTION FUNCTIONS ----
|
95 |
def extract_text_from_pdf(pdf_file):
|
96 |
+
"Extract text from a PDF file.""
|
97 |
text = []
|
98 |
try:
|
99 |
with fitz.open(pdf_file) as doc:
|
|
|
104 |
return "\n".join(text)
|
105 |
|
106 |
def extract_text_from_docx(docx_file):
|
107 |
+
"Extract text from a DOCX file."
|
108 |
doc = docx.Document(docx_file)
|
109 |
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
|
110 |
|
111 |
def extract_text_from_pptx(pptx_file):
|
112 |
+
"Extract text from a PPTX file."
|
113 |
text = []
|
114 |
try:
|
115 |
presentation = pptx.Presentation(pptx_file)
|
|
|
122 |
return "\n".join(text)
|
123 |
|
124 |
def extract_text_from_xlsx(xlsx_file):
|
125 |
+
"Extract text from an XLSX file."
|
126 |
text = []
|
127 |
try:
|
128 |
wb = openpyxl.load_workbook(xlsx_file)
|
|
|
135 |
return "\n".join(text)
|
136 |
|
137 |
def extract_text_from_image(image_path):
|
138 |
+
"Extract text from an image using EasyOCR.""
|
139 |
result = reader.readtext(image_path, detail=0)
|
140 |
return " ".join(result) # Return text as a single string
|
141 |
|
142 |
# ---- MAIN PROCESSING FUNCTIONS ----
|
143 |
def answer_question_from_doc(file, question):
|
144 |
+
"Process document and answer a question based on its content."
|
145 |
ext = file.name.split(".")[-1].lower()
|
146 |
|
147 |
if ext == "pdf":
|
|
|
166 |
return f"Error generating answer: {e}"
|
167 |
|
168 |
def answer_question_from_image(image, question):
|
169 |
+
"Process an image, extract text, and answer a question.""
|
170 |
img_text = extract_text_from_image(image)
|
171 |
if not img_text.strip():
|
172 |
return "No readable text found in the image."
|
|
|
201 |
@app.get("/")
|
202 |
def home():
|
203 |
return RedirectResponse(url="/")
|
204 |
+
"""
|