CV_Reviewer / app.py
Jonah Ramponi
commiting day1
22be37d
raw
history blame
4.55 kB
"""
For HF, the interface should be called app.py
"""
import json
import concurrent.futures
import streamlit as st
from utils.process_doc import parse_docx, parse_pdf
from backend import process_cv, process_job_posting
from utils.gpt import test_api_key
st.set_page_config(layout="wide")
with st.sidebar:
COHERE_API_KEY = st.text_input(
"Cohere API Key Entry",
value="",
placeholder="Enter your Free Tier Cohere API Key",
)
if "state" not in st.session_state:
st.session_state.state = {"formatted": False}
STATE = st.session_state.state
cv_upload_box = st.file_uploader(
"CV Upload Box",
help="Upload your CV in .docx or .pdf form. This CV will be parsed, and used to analyse against the given job post.",
type=["docx", "pdf"],
accept_multiple_files=False,
)
job_posting_upload_box = st.text_area(
"Job Description Upload Box",
placeholder="Copy and Paste a job post you are interested in. Make sure to include the full post! More information is better.",
help="In this box, please dump text content for a job description you are interested in. This could easily be setup to work directly with a webpage (we'd simply need to scrape said page) however I do not want to do that on HF spaces.",
)
if cv_upload_box and job_posting_upload_box != "":
process_files = st.button("Process Files", type="primary")
if process_files:
if test_api_key(COHERE_API_KEY):
# Process our two uploaded files into state variables
STATE["job_posting"] = job_posting_upload_box
cv_filetype = cv_upload_box.name.split(".")[-1]
cv_file_contents = cv_upload_box.getvalue()
STATE["cv"] = (
parse_docx(cv_file_contents)
if cv_filetype == "docx"
else parse_pdf(cv_file_contents)
)
# Now, use Cohere to get structured output for both cv and job_posting
# Making these calls in parallel
with concurrent.futures.ThreadPoolExecutor() as executor:
future1 = executor.submit(process_cv, STATE["cv"], COHERE_API_KEY)
future2 = executor.submit(
process_job_posting, STATE["job_posting"], COHERE_API_KEY
)
cv_json_text = future1.result()
job_posting_json_text = future2.result()
cv_json_text = (
"{" + cv_json_text.lstrip().lstrip("{").rstrip().rstrip("}") + "}"
)
job_posting_json_text = (
"{"
+ job_posting_json_text.lstrip().lstrip("{").rstrip().rstrip("}")
+ "}"
)
try:
STATE["cv_json"] = json.loads(cv_json_text)
except json.JSONDecodeError as e:
print(
f"Error parsing JSON Output for CV: {e}. Response content: {cv_json_text}"
)
STATE["cv_json"] = {"name": "Failed"}
try:
STATE["job_posting_json"] = json.loads(job_posting_json_text)
except json.JSONDecodeError as e:
print(
f"Error parsing JSON Output for Job Posting: {e}. Response content: {job_posting_json_text}"
)
STATE["job_posting_json"] = {"companyName": "Failed"}
STATE["formatted"] = True
else:
st.error(
"You entered an invalid Cohere API Key. Please enter a valid API key in the sidebar."
)
# Now, we can work with this !
if STATE["formatted"]:
lcol, rcol = st.columns((0.5, 0.5))
with lcol:
st.download_button(
label="Download Job Posting JSON",
data=json.dumps(STATE["job_posting_json"], indent=4),
file_name=f"job_posting_formatted_{STATE['job_posting_json']['companyName']}.json",
mime="application/json",
use_container_width=True,
)
with rcol:
st.download_button(
label="Download CV JSON",
data=json.dumps(STATE["cv_json"], indent=4),
file_name=f"cv_formatted_{STATE['cv_json']['name']}.json",
mime="application/json",
use_container_width=True,
)
cv_critique, practice_interview, general_cv_critique = st.tabs(
["Role Specific CV Critique", "Practice Interview", "General CV Critique"]
)