Vaibhav84 commited on
Commit
b271292
·
1 Parent(s): 11e22b7
Files changed (1) hide show
  1. ExtractContentsFromFile.py +20 -13
ExtractContentsFromFile.py CHANGED
@@ -9,17 +9,24 @@ warnings.filterwarnings('ignore')
9
  class ExtractContentFromFile:
10
  def ExtractDataFromFile(FileName,file):
11
  text =''
12
- #print(text)
13
- if FileName.endswith("pdf"):
14
- reserve_pdf_on_memory = io.BytesIO(file)
15
- load_pdf = PyPDF2.PdfReader(reserve_pdf_on_memory)
16
- for page in load_pdf.pages:
17
- text += page.extract_text()
18
-
19
- elif FileName.endswith("doc") or FileName.endswith("docx"):
20
- text = docx2txt.process(file)
21
- text = text.read()
22
-
23
- else:
24
- text = file.decode('utf-8')
 
 
 
 
 
 
 
25
  return text
 
9
  class ExtractContentFromFile:
10
  def ExtractDataFromFile(FileName,file):
11
  text =''
12
+ try:
13
+ #print(text)
14
+ if FileName.endswith("pdf"):
15
+ reserve_pdf_on_memory = io.BytesIO(file)
16
+ load_pdf = PyPDF2.PdfReader(reserve_pdf_on_memory)
17
+ for page in load_pdf.pages:
18
+ text += page.extract_text()
19
+
20
+ elif FileName.endswith("doc") or FileName.endswith("docx"):
21
+ text = docx2txt.process(file)
22
+ text = text.read()
23
+
24
+ else:
25
+ text = file.decode('utf-8')
26
+ except:
27
+ print("Error reading file")
28
+ finally:
29
+ # Close the resources to free up memory
30
+ del reserve_pdf_on_memory
31
+ del load_pdf
32
  return text