manasvinid commited on
Commit
50e0a7b
·
verified ·
1 Parent(s): 5a25f5f

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +54 -53
functions.py CHANGED
@@ -385,79 +385,80 @@ class QdrantInterface:
385
 
386
 
387
 
388
- def resume_pdf():
389
 
390
 
391
- # File uploader widget
392
- uploaded_file = st.file_uploader("Upload Resumes", type=["pdf"])
393
 
394
- if uploaded_file is not None:
395
- # Read PDF file
396
- pdf_reader = PyPDF2.PdfReader(uploaded_file)
 
 
 
 
397
  num_pages = len(pdf_reader.pages)
398
-
399
- # Extract text from each page
400
- text = ""
401
  for page_num in range(num_pages):
402
  page = pdf_reader.pages[page_num]
403
  text += page.extract_text()
 
 
 
404
 
405
- # Convert text to CSV
406
- csv_data = convert_to_csv(text)
407
 
408
- # Display or download CSV
409
- st.subheader("Converted CSV Data")
410
- st.write(csv_data)
411
 
412
- # Download link for CSV file
413
- st.download_button(
414
- label="Download CSV",
415
- data=csv_data,
416
- file_name="converted_data.csv",
417
- mime="text/csv"
418
- )
419
 
420
- def job_desc_pdf():
 
 
421
 
 
 
422
 
423
- # File uploader widget
424
- uploaded_file = st.file_uploader("Upload Job Description", type=["pdf"])
 
425
 
426
- if uploaded_file is not None:
427
- # Read PDF file
428
- pdf_reader = PyPDF2.PdfReader(uploaded_file)
429
- num_pages = len(pdf_reader.pages)
430
 
431
- # Extract text from each page
432
- text = ""
433
- for page_num in range(num_pages):
434
- page = pdf_reader.pages[page_num]
435
- text += page.extract_text()
436
 
437
- # Convert text to CSV
438
- csv_data = convert_to_csv(text)
439
 
440
- # Display or download CSV
441
- st.subheader("Converted CSV Data")
442
- st.write(csv_data)
443
 
444
- # Download link for CSV file
445
- st.download_button(
446
- label="Download CSV",
447
- data=csv_data,
448
- file_name="converted_data.csv",
449
- mime="text/csv"
450
- )
451
-
452
- def convert_to_csv(text):
453
- # Split text into lines and create a DataFrame
454
- lines = text.split("\n")
455
- df = pd.DataFrame(lines, columns=["Text"])
 
 
 
 
 
456
 
457
- # Convert DataFrame to CSV format
458
- csv_data = df.to_csv(index=False, escapechar='\\')
 
459
 
460
- return csv_data
461
 
462
 
463
 
 
385
 
386
 
387
 
 
388
 
389
 
 
 
390
 
391
+ def extract_text_from_pdf(file):
392
+ """
393
+ Extract text from a PDF file using PyPDF2 library.
394
+ """
395
+ text = ""
396
+ try:
397
+ pdf_reader = PyPDF2.PdfReader(file)
398
  num_pages = len(pdf_reader.pages)
 
 
 
399
  for page_num in range(num_pages):
400
  page = pdf_reader.pages[page_num]
401
  text += page.extract_text()
402
+ except Exception as e:
403
+ st.error(f"Error extracting text from PDF: {e}")
404
+ return text
405
 
406
+ def resume_pdf():
407
+ st.title(UPLOAD RESUMES")
408
 
409
+ # Allow user to upload multiple PDF files
410
+ uploaded_files = st.file_uploader("Upload PDF files", type="pdf")
 
411
 
412
+ if uploaded_files:
413
+ st.write("## Extracted Text from PDFs")
414
+ df_rows = []
 
 
 
 
415
 
416
+ # Iterate over uploaded PDF files
417
+ for uploaded_file in uploaded_files:
418
+ text = extract_text_from_pdf(uploaded_file)
419
 
420
+ # Add text to DataFrame
421
+ df_rows.append({"File Name": uploaded_file.name, "Text": text})
422
 
423
+ # Display extracted text
424
+ st.write(f"### {uploaded_file.name}")
425
+ st.write(text)
426
 
427
+ # Create DataFrame
428
+ df = pd.DataFrame(df_rows)
 
 
429
 
430
+ # Display DataFrame
431
+ st.write("## Combined Data in DataFrame")
432
+ st.write(df)
 
 
433
 
434
+ def job_desc_pdf():
435
+ st.title(UPLOAD JOB DESCRIPTION")
436
 
437
+ # Allow user to upload multiple PDF files
438
+ uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type="pdf")
 
439
 
440
+ if uploaded_files:
441
+ st.write("## Extracted Text from PDFs")
442
+ df_rows = []
443
+
444
+ # # Iterate over uploaded PDF files
445
+ # for uploaded_file in uploaded_files:
446
+ # text = extract_text_from_pdf(uploaded_file)
447
+
448
+ # # Add text to DataFrame
449
+ # df_rows.append({"File Name": uploaded_file.name, "Text": text})
450
+
451
+ # # Display extracted text
452
+ # st.write(f"### {uploaded_file.name}")
453
+ # st.write(text)
454
+
455
+ # Create DataFrame
456
+ df = pd.DataFrame(df_rows)
457
 
458
+ # Display DataFrame
459
+ st.write("## Combined Data in DataFrame")
460
+ st.write(df)
461
 
 
462
 
463
 
464