Spaces:
Sleeping
Sleeping
import io | |
import PyPDF2 | |
import docx2txt | |
import numpy as np | |
from psycopg2.extensions import register_adapter, AsIs | |
register_adapter(np.int64, AsIs) | |
import warnings | |
warnings.filterwarnings('ignore') | |
class ExtractContentFromFile: | |
def ExtractDataFromFile(FileName,file): | |
text ='' | |
#print(text) | |
if FileName.endswith("pdf"): | |
reserve_pdf_on_memory = io.BytesIO(file) | |
load_pdf = PyPDF2.PdfReader(reserve_pdf_on_memory) | |
for page in load_pdf.pages: | |
text += page.extract_text() | |
elif FileName.endswith("doc") or FileName.endswith("docx"): | |
text = docx2txt.process(file) | |
text = text.read() | |
else: | |
text = file.decode('utf-8') | |
return text |