Spaces:

jalvaroluna
/

rag

Runtime error

rag

File size: 528 Bytes

c5446a2

from pdfminer.high_level import extract_text
from docx import Document
import pytesseract
from PIL import Image

def extract_text_from_image(file_path):
    image = Image.open(file_path)
    text = pytesseract.image_to_string(image)
    return text

def extract_text_from_docx(file_path):
    doc = Document(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)

def extract_text_from_pdf(file_path):
    text = extract_text(file_path)
    return text