Spaces:
Sleeping
Sleeping
import PyPDF2 | |
from docx import Document | |
def extract_text_from_pdf(file_path): | |
""" | |
Estrae il testo da un file PDF. | |
Args: | |
file_path: Percorso del file PDF | |
Returns: | |
str: Testo estratto dal PDF | |
""" | |
with open(file_path, 'rb') as f: | |
reader = PyPDF2.PdfReader(f) | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
def extract_text_from_docx(file_path): | |
""" | |
Estrae il testo da un file DOCX. | |
Args: | |
file_path: Percorso del file DOCX | |
Returns: | |
str: Testo estratto dal documento Word | |
""" | |
doc = Document(file_path) | |
text = "" | |
for para in doc.paragraphs: | |
text += para.text + "\n" | |
return text |