Spaces:
Sleeping
Sleeping
Changes
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import json
|
|
5 |
from typing_extensions import Annotated
|
6 |
|
7 |
#SkillExtraction Packages
|
|
|
8 |
from PyPDF2 import PdfReader
|
9 |
import psycopg2
|
10 |
from psycopg2 import sql
|
@@ -25,6 +26,8 @@ register_adapter(np.int64, AsIs)
|
|
25 |
import warnings
|
26 |
warnings.filterwarnings('ignore')
|
27 |
|
|
|
|
|
28 |
#Custom Classes for endpoints
|
29 |
from DbConnection import DbConnection
|
30 |
from UploadFile import UploadOpenFile
|
@@ -113,15 +116,26 @@ def upload_PDF(file: UploadFile = File(...)):
|
|
113 |
return {"message": f"Successfully uploaded {contents}"}
|
114 |
|
115 |
@app.post("/uploadJobDescriptionPDF2/")
|
116 |
-
def
|
117 |
-
|
|
|
|
|
|
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
-
return {"
|
|
|
5 |
from typing_extensions import Annotated
|
6 |
|
7 |
#SkillExtraction Packages
|
8 |
+
import PyPDF2
|
9 |
from PyPDF2 import PdfReader
|
10 |
import psycopg2
|
11 |
from psycopg2 import sql
|
|
|
26 |
import warnings
|
27 |
warnings.filterwarnings('ignore')
|
28 |
|
29 |
+
from io import BytesIO
|
30 |
+
import requests
|
31 |
#Custom Classes for endpoints
|
32 |
from DbConnection import DbConnection
|
33 |
from UploadFile import UploadOpenFile
|
|
|
116 |
return {"message": f"Successfully uploaded {contents}"}
|
117 |
|
118 |
@app.post("/uploadJobDescriptionPDF2/")
|
119 |
+
def process_pdf_file(file: UploadFile = File(...)):
|
120 |
+
# Save file locally for processing
|
121 |
+
contents = file.read()
|
122 |
+
with open(file.filename, 'wb') as f:
|
123 |
+
f.write(contents)
|
124 |
|
125 |
+
# Process saved file
|
126 |
+
return process_pdf(file.filename, is_local_file=True)
|
127 |
+
|
128 |
+
def process_pdf(pdf_source, is_local_file=False):
|
129 |
+
# Process the PDF from URL or local file
|
130 |
+
file = BytesIO(requests.get(pdf_source).content) if not is_local_file else open(pdf_source, 'rb')
|
131 |
+
|
132 |
+
# Extract text from PDF
|
133 |
+
pdf_reader = PyPDF2.PdfFileReader(file)
|
134 |
+
text = ""
|
135 |
+
for page in range(pdf_reader.numPages):
|
136 |
+
text += pdf_reader.getPage(page).extractText()
|
137 |
+
|
138 |
+
if is_local_file:
|
139 |
+
file.close()
|
140 |
|
141 |
+
return {"content": text}
|