Vaibhav84 commited on
Commit
0f5f3bf
·
1 Parent(s): 813bff0
Files changed (2) hide show
  1. app.py +16 -7
  2. requirements.txt +2 -1
app.py CHANGED
@@ -14,6 +14,7 @@ from datetime import date
14
  import numpy as np
15
  import spacy
16
  import re
 
17
  from sentence_transformers import SentenceTransformer, util
18
  from sklearn.feature_extraction.text import CountVectorizer
19
  from sklearn.metrics.pairwise import cosine_similarity
@@ -85,16 +86,24 @@ def process_pdf_file(file: bytes = File(...)):
85
  return {"content": text}
86
 
87
  @app.post("/uploadJobDescriptionPDF_Fname/")
88
- def process_pdf_file1(file: bytes = File(...), data: str = str):
89
  # Save file locally for processing
90
  text =''
91
- reserve_pdf_on_memory = io.BytesIO(file)
92
- load_pdf = PyPDF2.PdfReader(reserve_pdf_on_memory)
93
- for page in load_pdf.pages:
94
- text += page.extract_text()
95
  print(text)
 
 
 
 
 
 
 
 
 
 
 
 
96
  # Process saved file
97
- return {"content": data}
98
  @app.get("/ProfileMatch")
99
  def ProfileMatchResults():
100
  dbQuery = "select * from profilematch"
@@ -124,7 +133,7 @@ def ExtractSkills(skill_data: SkillDetails):
124
 
125
  @app.post("/uploadJobDescription/")
126
  def create_upload_file(file: bytes = File(...)):
127
- content = file.encode('utf-8').strip()
128
  lines = content.split('\n')
129
  return {"content": lines}
130
 
 
14
  import numpy as np
15
  import spacy
16
  import re
17
+ import docx2txt
18
  from sentence_transformers import SentenceTransformer, util
19
  from sklearn.feature_extraction.text import CountVectorizer
20
  from sklearn.metrics.pairwise import cosine_similarity
 
86
  return {"content": text}
87
 
88
  @app.post("/uploadJobDescriptionPDF_Fname/")
89
+ def process_pdf_file1(file: bytes = File(...), FileName: str = str):
90
  # Save file locally for processing
91
  text =''
 
 
 
 
92
  print(text)
93
+ if FileName.endswith("pdf"):
94
+ reserve_pdf_on_memory = io.BytesIO(file)
95
+ load_pdf = PyPDF2.PdfReader(reserve_pdf_on_memory)
96
+ for page in load_pdf.pages:
97
+ text += page.extract_text()
98
+
99
+ elif FileName.endswith("doc") or FileName.endswith("docx"):
100
+ text = docx2txt.process(file.decode("utf-8"))
101
+ text = text.read()
102
+
103
+ else:
104
+ text = file.decode('utf-8')
105
  # Process saved file
106
+ return {"content": text}
107
  @app.get("/ProfileMatch")
108
  def ProfileMatchResults():
109
  dbQuery = "select * from profilematch"
 
133
 
134
  @app.post("/uploadJobDescription/")
135
  def create_upload_file(file: bytes = File(...)):
136
+ content = file.decode('utf-8')
137
  lines = content.split('\n')
138
  return {"content": lines}
139
 
requirements.txt CHANGED
@@ -10,4 +10,5 @@ spacy
10
  en-core-web-lg @https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl
11
  fastapi
12
  uvicorn
13
- python-multipart
 
 
10
  en-core-web-lg @https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl
11
  fastapi
12
  uvicorn
13
+ python-multipart
14
+ docx2txt