Vaibhav84 commited on
Commit
ba60b2c
·
1 Parent(s): eb32f81
Files changed (1) hide show
  1. app.py +5 -21
app.py CHANGED
@@ -17,7 +17,7 @@ import re
17
  from sentence_transformers import SentenceTransformer, util
18
  from sklearn.feature_extraction.text import CountVectorizer
19
  from sklearn.metrics.pairwise import cosine_similarity
20
- from io import StringIO
21
  from spacy.matcher import PhraseMatcher
22
  from skillNer.general_params import SKILL_DB
23
  from skillNer.skill_extractor_class import SkillExtractor
@@ -118,24 +118,8 @@ def upload_PDF(file: UploadFile = File(...)):
118
  @app.post("/uploadJobDescriptionPDF2/")
119
  def process_pdf_file(file: bytes = File(...)):
120
  # Save file locally for processing
121
- contents = file.read()
122
- with open(file.filename, 'wb') as f:
123
- f.write(contents)
124
-
125
  # Process saved file
126
- return process_pdf(file.filename, is_local_file=True)
127
-
128
- def process_pdf(pdf_source, is_local_file=False):
129
- # Process the PDF from URL or local file
130
- file = BytesIO(requests.get(pdf_source).content) if not is_local_file else open(pdf_source, 'rb')
131
-
132
- # Extract text from PDF
133
- pdf_reader = PyPDF2.PdfFileReader(file)
134
- text = ""
135
- for page in range(pdf_reader.numPages):
136
- text += pdf_reader.getPage(page).extractText()
137
-
138
- if is_local_file:
139
- file.close()
140
-
141
- return {"content": text}
 
17
  from sentence_transformers import SentenceTransformer, util
18
  from sklearn.feature_extraction.text import CountVectorizer
19
  from sklearn.metrics.pairwise import cosine_similarity
20
+ import io
21
  from spacy.matcher import PhraseMatcher
22
  from skillNer.general_params import SKILL_DB
23
  from skillNer.skill_extractor_class import SkillExtractor
 
118
  @app.post("/uploadJobDescriptionPDF2/")
119
  def process_pdf_file(file: bytes = File(...)):
120
  # Save file locally for processing
121
+
122
+ reserve_pdf_on_memory = io.BytesIO(file)
123
+ load_pdf = PyPDF2.PdfFileReader(reserve_pdf_on_memory)
 
124
  # Process saved file
125
+ return {"content": load_pdf}