root commited on
Commit
08f9d0b
·
1 Parent(s): 487c4a2
Files changed (2) hide show
  1. app.py +4 -4
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- import PyPDF2
3
  import io
4
  import spacy
5
  from docx import Document
@@ -41,10 +41,10 @@ job_descriptions = {
41
  }
42
 
43
  def extract_text_from_pdf(pdf_file):
44
- pdf_reader = PyPDF2.PdfReader(pdf_file)
45
  text = ""
46
- for page in pdf_reader.pages:
47
- text += page.extract_text()
 
48
  return text
49
 
50
  def extract_text_from_docx(docx_file):
 
1
  import streamlit as st
2
+ import pdfplumber
3
  import io
4
  import spacy
5
  from docx import Document
 
41
  }
42
 
43
  def extract_text_from_pdf(pdf_file):
 
44
  text = ""
45
+ with pdfplumber.open(pdf_file) as pdf:
46
+ for page in pdf.pages:
47
+ text += page.extract_text() or ""
48
  return text
49
 
50
  def extract_text_from_docx(docx_file):
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  streamlit>=1.31.0
2
- PyPDF2>=3.0.1
3
  python-docx>=1.1.0
4
  transformers>=4.37.2
5
  spacy>=3.7.2
 
1
  streamlit>=1.31.0
2
+ pdfplumber>=0.10.3
3
  python-docx>=1.1.0
4
  transformers>=4.37.2
5
  spacy>=3.7.2