Spaces:
Sleeping
Sleeping
import pathlib | |
import ast | |
import os | |
from pypdf import PdfReader | |
import docx2txt | |
from sqlalchemy import select | |
from sqlalchemy.orm import Session | |
import openai | |
import cohere | |
from models import Job, ENGINE | |
openai.api_key = os.environ["OPEN_API_KEY"] | |
co = cohere.Client(os.environ["COHERE_API_KEY"]) | |
def gpt(user_query): | |
response = openai.Completion.create( | |
engine="text-davinci-003", | |
prompt = user_query, | |
max_tokens=1024, | |
n=1, | |
stop=None, | |
temperature=0.5, | |
) | |
return response['choices'][0]['text'] | |
def parse_pdf(file_name): | |
reader = PdfReader(file_name) | |
page = reader.pages[0] | |
resume_text = page.extract_text() | |
return resume_text | |
def parse_docx(file_name): | |
file_text = docx2txt.process(file_name) | |
return file_text | |
# def get_dict(resume_text): | |
# resume_dict = ast.literal_eval(gpt(f"""parse the resume and convert it into a Python string with the headings as "experience," "skills," "certifications," and "education". | |
# resume: "{resume_text}" | |
# resume_dict: """).strip()) | |
# return resume_dict | |
def parse(filename): | |
resume_file = pathlib.Path(filename) | |
text = parse_pdf(resume_file) if resume_file.suffix == ".pdf" else parse_docx(resume_file) | |
print("parse"+"~"*10,text) | |
# dct = get_dict(text) | |
# print(dct) | |
return text | |
def rerank(job_id,docs,top_n): | |
with Session(ENGINE) as session: | |
stmt = select(Job).where(Job.job_id == job_id) | |
job = session.scalars(stmt).one() | |
post = job.post_name | |
response = co.rerank( | |
model = 'rerank-english-v2.0', | |
query = f'Which profile suits most for the role of {post}?', | |
documents = docs, | |
top_n = top_n, | |
) | |
print(response) | |
return response |