File size: 4,546 Bytes
22be37d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
"""
    For HF, the interface should be called app.py
"""

import json
import concurrent.futures

import streamlit as st

from utils.process_doc import parse_docx, parse_pdf
from backend import process_cv, process_job_posting
from utils.gpt import test_api_key

st.set_page_config(layout="wide")

with st.sidebar:
    COHERE_API_KEY = st.text_input(
        "Cohere API Key Entry",
        value="",
        placeholder="Enter your Free Tier Cohere API Key",
    )

if "state" not in st.session_state:
    st.session_state.state = {"formatted": False}

STATE = st.session_state.state


cv_upload_box = st.file_uploader(
    "CV Upload Box",
    help="Upload your CV in .docx or .pdf form. This CV will be parsed, and used to analyse against the given job post.",
    type=["docx", "pdf"],
    accept_multiple_files=False,
)
job_posting_upload_box = st.text_area(
    "Job Description Upload Box",
    placeholder="Copy and Paste a job post you are interested in. Make sure to include the full post! More information is better.",
    help="In this box, please dump text content for a job description you are interested in. This could easily be setup to work directly with a webpage (we'd simply need to scrape said page) however I do not want to do that on HF spaces.",
)

if cv_upload_box and job_posting_upload_box != "":

    process_files = st.button("Process Files", type="primary")

    if process_files:
        if test_api_key(COHERE_API_KEY):

            # Process our two uploaded files into state variables
            STATE["job_posting"] = job_posting_upload_box

            cv_filetype = cv_upload_box.name.split(".")[-1]
            cv_file_contents = cv_upload_box.getvalue()

            STATE["cv"] = (
                parse_docx(cv_file_contents)
                if cv_filetype == "docx"
                else parse_pdf(cv_file_contents)
            )

            # Now, use Cohere to get structured output for both cv and job_posting

            # Making these calls in parallel
            with concurrent.futures.ThreadPoolExecutor() as executor:

                future1 = executor.submit(process_cv, STATE["cv"], COHERE_API_KEY)
                future2 = executor.submit(
                    process_job_posting, STATE["job_posting"], COHERE_API_KEY
                )

                cv_json_text = future1.result()
                job_posting_json_text = future2.result()

            cv_json_text = (
                "{" + cv_json_text.lstrip().lstrip("{").rstrip().rstrip("}") + "}"
            )
            job_posting_json_text = (
                "{"
                + job_posting_json_text.lstrip().lstrip("{").rstrip().rstrip("}")
                + "}"
            )
            try:
                STATE["cv_json"] = json.loads(cv_json_text)
            except json.JSONDecodeError as e:
                print(
                    f"Error parsing JSON Output for CV: {e}. Response content: {cv_json_text}"
                )
                STATE["cv_json"] = {"name": "Failed"}

            try:
                STATE["job_posting_json"] = json.loads(job_posting_json_text)

            except json.JSONDecodeError as e:
                print(
                    f"Error parsing JSON Output for Job Posting: {e}. Response content: {job_posting_json_text}"
                )
                STATE["job_posting_json"] = {"companyName": "Failed"}

            STATE["formatted"] = True
        else:
            st.error(
                "You entered an invalid Cohere API Key. Please enter a valid API key in the sidebar."
            )

    # Now, we can work with this !
    if STATE["formatted"]:
        lcol, rcol = st.columns((0.5, 0.5))
        with lcol:
            st.download_button(
                label="Download Job Posting JSON",
                data=json.dumps(STATE["job_posting_json"], indent=4),
                file_name=f"job_posting_formatted_{STATE['job_posting_json']['companyName']}.json",
                mime="application/json",
                use_container_width=True,
            )
        with rcol:
            st.download_button(
                label="Download CV JSON",
                data=json.dumps(STATE["cv_json"], indent=4),
                file_name=f"cv_formatted_{STATE['cv_json']['name']}.json",
                mime="application/json",
                use_container_width=True,
            )

        cv_critique, practice_interview, general_cv_critique = st.tabs(
            ["Role Specific CV Critique", "Practice Interview", "General CV Critique"]
        )