Spaces:
Runtime error
Runtime error
import PyPDF2 | |
from docx import Document | |
def read_pdf(file): | |
reader = PyPDF2.PdfReader(file) | |
num_pages = len(reader.pages) | |
text = "" | |
for page_num in range(num_pages): | |
page = reader.pages[page_num] | |
text += page.extract_text() | |
return text | |
def read_docx(file): | |
doc = Document(file) | |
text = "" | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + "\n" | |
return text |