Vaibhav84 commited on
Commit
f40468d
·
1 Parent(s): 02cf2aa
Files changed (1) hide show
  1. app.py +24 -10
app.py CHANGED
@@ -5,6 +5,7 @@ import json
5
  from typing_extensions import Annotated
6
 
7
  #SkillExtraction Packages
 
8
  from PyPDF2 import PdfReader
9
  import psycopg2
10
  from psycopg2 import sql
@@ -25,6 +26,8 @@ register_adapter(np.int64, AsIs)
25
  import warnings
26
  warnings.filterwarnings('ignore')
27
 
 
 
28
  #Custom Classes for endpoints
29
  from DbConnection import DbConnection
30
  from UploadFile import UploadOpenFile
@@ -113,15 +116,26 @@ def upload_PDF(file: UploadFile = File(...)):
113
  return {"message": f"Successfully uploaded {contents}"}
114
 
115
  @app.post("/uploadJobDescriptionPDF2/")
116
- def upload_PDF2(file: UploadFile = File(...)):
117
- text =''
 
 
 
118
 
119
- if file.filename.endswith("pdf"):
120
- pdf_reader = PdfReader(file)
121
-
122
- for page in pdf_reader.pages:
123
- text += page.extract_text()
124
-
125
-
 
 
 
 
 
 
 
 
126
 
127
- return {"message": f"Successfully uploaded {text}"}
 
5
  from typing_extensions import Annotated
6
 
7
  #SkillExtraction Packages
8
+ import PyPDF2
9
  from PyPDF2 import PdfReader
10
  import psycopg2
11
  from psycopg2 import sql
 
26
  import warnings
27
  warnings.filterwarnings('ignore')
28
 
29
+ from io import BytesIO
30
+ import requests
31
  #Custom Classes for endpoints
32
  from DbConnection import DbConnection
33
  from UploadFile import UploadOpenFile
 
116
  return {"message": f"Successfully uploaded {contents}"}
117
 
118
  @app.post("/uploadJobDescriptionPDF2/")
119
+ def process_pdf_file(file: UploadFile = File(...)):
120
+ # Save file locally for processing
121
+ contents = file.read()
122
+ with open(file.filename, 'wb') as f:
123
+ f.write(contents)
124
 
125
+ # Process saved file
126
+ return process_pdf(file.filename, is_local_file=True)
127
+
128
+ def process_pdf(pdf_source, is_local_file=False):
129
+ # Process the PDF from URL or local file
130
+ file = BytesIO(requests.get(pdf_source).content) if not is_local_file else open(pdf_source, 'rb')
131
+
132
+ # Extract text from PDF
133
+ pdf_reader = PyPDF2.PdfFileReader(file)
134
+ text = ""
135
+ for page in range(pdf_reader.numPages):
136
+ text += pdf_reader.getPage(page).extractText()
137
+
138
+ if is_local_file:
139
+ file.close()
140
 
141
+ return {"content": text}