Spaces:
Sleeping
Sleeping
Update functions.py
Browse files- functions.py +54 -53
functions.py
CHANGED
@@ -385,79 +385,80 @@ class QdrantInterface:
|
|
385 |
|
386 |
|
387 |
|
388 |
-
def resume_pdf():
|
389 |
|
390 |
|
391 |
-
# File uploader widget
|
392 |
-
uploaded_file = st.file_uploader("Upload Resumes", type=["pdf"])
|
393 |
|
394 |
-
|
395 |
-
|
396 |
-
|
|
|
|
|
|
|
|
|
397 |
num_pages = len(pdf_reader.pages)
|
398 |
-
|
399 |
-
# Extract text from each page
|
400 |
-
text = ""
|
401 |
for page_num in range(num_pages):
|
402 |
page = pdf_reader.pages[page_num]
|
403 |
text += page.extract_text()
|
|
|
|
|
|
|
404 |
|
405 |
-
|
406 |
-
|
407 |
|
408 |
-
|
409 |
-
|
410 |
-
st.write(csv_data)
|
411 |
|
412 |
-
|
413 |
-
st.
|
414 |
-
|
415 |
-
data=csv_data,
|
416 |
-
file_name="converted_data.csv",
|
417 |
-
mime="text/csv"
|
418 |
-
)
|
419 |
|
420 |
-
|
|
|
|
|
421 |
|
|
|
|
|
422 |
|
423 |
-
|
424 |
-
|
|
|
425 |
|
426 |
-
|
427 |
-
|
428 |
-
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
429 |
-
num_pages = len(pdf_reader.pages)
|
430 |
|
431 |
-
#
|
432 |
-
|
433 |
-
|
434 |
-
page = pdf_reader.pages[page_num]
|
435 |
-
text += page.extract_text()
|
436 |
|
437 |
-
|
438 |
-
|
439 |
|
440 |
-
|
441 |
-
|
442 |
-
st.write(csv_data)
|
443 |
|
444 |
-
|
445 |
-
st.
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
)
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
|
|
|
|
|
|
|
|
|
|
456 |
|
457 |
-
|
458 |
-
|
|
|
459 |
|
460 |
-
return csv_data
|
461 |
|
462 |
|
463 |
|
|
|
385 |
|
386 |
|
387 |
|
|
|
388 |
|
389 |
|
|
|
|
|
390 |
|
391 |
+
def extract_text_from_pdf(file):
|
392 |
+
"""
|
393 |
+
Extract text from a PDF file using PyPDF2 library.
|
394 |
+
"""
|
395 |
+
text = ""
|
396 |
+
try:
|
397 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
398 |
num_pages = len(pdf_reader.pages)
|
|
|
|
|
|
|
399 |
for page_num in range(num_pages):
|
400 |
page = pdf_reader.pages[page_num]
|
401 |
text += page.extract_text()
|
402 |
+
except Exception as e:
|
403 |
+
st.error(f"Error extracting text from PDF: {e}")
|
404 |
+
return text
|
405 |
|
406 |
+
def resume_pdf():
|
407 |
+
st.title(UPLOAD RESUMES")
|
408 |
|
409 |
+
# Allow user to upload multiple PDF files
|
410 |
+
uploaded_files = st.file_uploader("Upload PDF files", type="pdf")
|
|
|
411 |
|
412 |
+
if uploaded_files:
|
413 |
+
st.write("## Extracted Text from PDFs")
|
414 |
+
df_rows = []
|
|
|
|
|
|
|
|
|
415 |
|
416 |
+
# Iterate over uploaded PDF files
|
417 |
+
for uploaded_file in uploaded_files:
|
418 |
+
text = extract_text_from_pdf(uploaded_file)
|
419 |
|
420 |
+
# Add text to DataFrame
|
421 |
+
df_rows.append({"File Name": uploaded_file.name, "Text": text})
|
422 |
|
423 |
+
# Display extracted text
|
424 |
+
st.write(f"### {uploaded_file.name}")
|
425 |
+
st.write(text)
|
426 |
|
427 |
+
# Create DataFrame
|
428 |
+
df = pd.DataFrame(df_rows)
|
|
|
|
|
429 |
|
430 |
+
# Display DataFrame
|
431 |
+
st.write("## Combined Data in DataFrame")
|
432 |
+
st.write(df)
|
|
|
|
|
433 |
|
434 |
+
def job_desc_pdf():
|
435 |
+
st.title(UPLOAD JOB DESCRIPTION")
|
436 |
|
437 |
+
# Allow user to upload multiple PDF files
|
438 |
+
uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type="pdf")
|
|
|
439 |
|
440 |
+
if uploaded_files:
|
441 |
+
st.write("## Extracted Text from PDFs")
|
442 |
+
df_rows = []
|
443 |
+
|
444 |
+
# # Iterate over uploaded PDF files
|
445 |
+
# for uploaded_file in uploaded_files:
|
446 |
+
# text = extract_text_from_pdf(uploaded_file)
|
447 |
+
|
448 |
+
# # Add text to DataFrame
|
449 |
+
# df_rows.append({"File Name": uploaded_file.name, "Text": text})
|
450 |
+
|
451 |
+
# # Display extracted text
|
452 |
+
# st.write(f"### {uploaded_file.name}")
|
453 |
+
# st.write(text)
|
454 |
+
|
455 |
+
# Create DataFrame
|
456 |
+
df = pd.DataFrame(df_rows)
|
457 |
|
458 |
+
# Display DataFrame
|
459 |
+
st.write("## Combined Data in DataFrame")
|
460 |
+
st.write(df)
|
461 |
|
|
|
462 |
|
463 |
|
464 |
|