|
|
|
from fastapi import FastAPI,File, UploadFile |
|
from pydantic import BaseModel |
|
import json |
|
from typing_extensions import Annotated |
|
|
|
|
|
import PyPDF2 |
|
from PyPDF2 import PdfReader |
|
import psycopg2 |
|
from psycopg2 import sql |
|
import pandas as pd |
|
from datetime import date |
|
import numpy as np |
|
import spacy |
|
import re |
|
from sentence_transformers import SentenceTransformer, util |
|
from sklearn.feature_extraction.text import CountVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import io |
|
from spacy.matcher import PhraseMatcher |
|
from skillNer.general_params import SKILL_DB |
|
from skillNer.skill_extractor_class import SkillExtractor |
|
from psycopg2.extensions import register_adapter, AsIs |
|
register_adapter(np.int64, AsIs) |
|
import warnings |
|
warnings.filterwarnings('ignore') |
|
|
|
from io import BytesIO |
|
import requests |
|
|
|
from DbConnection import DbConnection |
|
from UploadFile import UploadOpenFile |
|
from SkillExtract import SkillExtractorDetails |
|
import os |
|
os.environ['HF_HOME'] = '/hug/cache/' |
|
|
|
app = FastAPI() |
|
class FileDetails(BaseModel): |
|
filecontents: str |
|
filename: str |
|
fileid: str |
|
message: str |
|
|
|
|
|
class SkillDetails(BaseModel): |
|
skillid: int |
|
requiredSkills: str |
|
softSkills: str |
|
goodToHaveSkills: str |
|
|
|
class FileResponse(BaseModel): |
|
fileid: int |
|
message: str |
|
class FileUploadDetails(BaseModel): |
|
fileData: bytes = File(...) |
|
filename: str |
|
|
|
|
|
|
|
nlp = spacy.load("en_core_web_lg") |
|
|
|
skill_extractor = SkillExtractor(nlp, SKILL_DB, PhraseMatcher) |
|
|
|
@app.get("/") |
|
async def root(): |
|
return {"SkillAPI":"SkillAPi Version 0.05"} |
|
|
|
db_params = DbConnection.GetDbConnection() |
|
def parse_csv(df): |
|
res = df.to_json(orient="records") |
|
parsed = json.loads(res) |
|
return parsed |
|
|
|
@app.post("/uploadJobDescriptionPDF/") |
|
def process_pdf_file(file: bytes = File(...)): |
|
|
|
text ='' |
|
reserve_pdf_on_memory = io.BytesIO(file) |
|
load_pdf = PyPDF2.PdfReader(reserve_pdf_on_memory) |
|
for page in load_pdf.pages: |
|
text += page.extract_text() |
|
print(text) |
|
|
|
return {"content": text} |
|
|
|
@app.post("/uploadJobDescriptionPDF_Fname/") |
|
def process_pdf_file1(file: FileUploadDetails): |
|
|
|
text ='' |
|
reserve_pdf_on_memory = io.BytesIO(file.fileData) |
|
load_pdf = PyPDF2.PdfReader(reserve_pdf_on_memory) |
|
for page in load_pdf.pages: |
|
text += page.extract_text() |
|
print(text) |
|
|
|
return {"content": text} |
|
@app.get("/ProfileMatch") |
|
def ProfileMatchResults(): |
|
dbQuery = "select * from profilematch" |
|
conn = psycopg2.connect(**db_params) |
|
df = pd.read_sql_query(dbQuery, conn) |
|
return parse_csv(df) |
|
|
|
@app.post("/UploadFile/") |
|
def UploadFileDetails(file_data: FileDetails): |
|
|
|
returnID = UploadOpenFile.uploadFile(file_data.filecontents,file_data.filename,db_params) |
|
file_data.filecontents = "" |
|
file_data.fileid = str(returnID) |
|
file_data.message = "File Uploaded Successfully!" |
|
|
|
return file_data |
|
|
|
|
|
@app.post("/ExtractSkills/") |
|
def ExtractSkills(skill_data: SkillDetails): |
|
returnSkills = SkillExtractorDetails.SkillExtract(db_params,skill_extractor,skill_data.skillid) |
|
details = returnSkills.split('@') |
|
skill_data.requiredSkills = details[0] |
|
skill_data.softSkills = details[1] |
|
skill_data.goodToHaveSkills = details[1] |
|
return skill_data |
|
|
|
@app.post("/uploadJobDescription/") |
|
def create_upload_file(file: bytes = File(...)): |
|
content = file.encode('utf-8').strip() |
|
lines = content.split('\n') |
|
return {"content": lines} |
|
|
|
@app.post("/uploadJobDescriptionPDF_Test/") |
|
def upload_PDF(file: UploadFile = File(...)): |
|
text='' |
|
data = json.loads(file.file.read()) |
|
|
|
|
|
|
|
|
|
|
|
return {"message": f"Successfully uploaded {data}"} |
|
|
|
|
|
|
|
|
|
@app.post("/uploadJobDescriptionPDF3/") |
|
def process_pdf_file2(file: Annotated[bytes, File()]): |
|
|
|
text ='' |
|
|
|
|
|
|
|
|
|
|
|
|
|
return {"content": file.content_type } |