root
commited on
Commit
·
08f9d0b
1
Parent(s):
487c4a2
sss
Browse files- app.py +4 -4
- requirements.txt +1 -1
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
import io
|
4 |
import spacy
|
5 |
from docx import Document
|
@@ -41,10 +41,10 @@ job_descriptions = {
|
|
41 |
}
|
42 |
|
43 |
def extract_text_from_pdf(pdf_file):
|
44 |
-
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
45 |
text = ""
|
46 |
-
|
47 |
-
|
|
|
48 |
return text
|
49 |
|
50 |
def extract_text_from_docx(docx_file):
|
|
|
1 |
import streamlit as st
|
2 |
+
import pdfplumber
|
3 |
import io
|
4 |
import spacy
|
5 |
from docx import Document
|
|
|
41 |
}
|
42 |
|
43 |
def extract_text_from_pdf(pdf_file):
|
|
|
44 |
text = ""
|
45 |
+
with pdfplumber.open(pdf_file) as pdf:
|
46 |
+
for page in pdf.pages:
|
47 |
+
text += page.extract_text() or ""
|
48 |
return text
|
49 |
|
50 |
def extract_text_from_docx(docx_file):
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
streamlit>=1.31.0
|
2 |
-
|
3 |
python-docx>=1.1.0
|
4 |
transformers>=4.37.2
|
5 |
spacy>=3.7.2
|
|
|
1 |
streamlit>=1.31.0
|
2 |
+
pdfplumber>=0.10.3
|
3 |
python-docx>=1.1.0
|
4 |
transformers>=4.37.2
|
5 |
spacy>=3.7.2
|