Spaces:
Runtime error
Runtime error
File size: 528 Bytes
c5446a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
from pdfminer.high_level import extract_text
from docx import Document
import pytesseract
from PIL import Image
def extract_text_from_image(file_path):
image = Image.open(file_path)
text = pytesseract.image_to_string(image)
return text
def extract_text_from_docx(file_path):
doc = Document(file_path)
full_text = []
for para in doc.paragraphs:
full_text.append(para.text)
return '\n'.join(full_text)
def extract_text_from_pdf(file_path):
text = extract_text(file_path)
return text
|