File size: 3,244 Bytes
b040bf0
da7bb3f
b040bf0
8d2e36c
b040bf0
8d2e36c
da7bb3f
 
b040bf0
7a9bfed
 
5ce7b7b
 
7a9bfed
 
 
da7bb3f
7a9bfed
da7bb3f
 
 
7a9bfed
da7bb3f
7a9bfed
 
 
 
 
 
5ce7b7b
7a9bfed
 
da7bb3f
 
7a9bfed
 
da7bb3f
 
 
 
5ce7b7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da7bb3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a9bfed
 
 
 
da7bb3f
7a9bfed
e249cd1
 
7a9bfed
da7bb3f
 
 
f914500
da7bb3f
 
 
8d2e36c
da7bb3f
 
 
 
f6ebf9c
bcad65d
e249cd1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.llms import OpenAIChat
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain import PromptTemplate
import os
import tempfile

# Updated imports for Gradio components
from gradio.components import File, Textbox


def format_resume_to_yaml(api_key, file):
    # Set the API key for OpenAI
    os.environ['OPENAI_API_KEY'] = api_key

    file_content = file.read()

    # Check if the file content is not empty
    if not file_content:
        raise ValueError("The uploaded file is empty.")

    # Save the uploaded file content to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, mode='wb+') as tmp_file:
        tmp_file.write(file_content)
        tmp_file.flush()
        os.fsync(tmp_file.fileno())  # Ensure data is written to disk
        temp_file_path = tmp_file.name

    # Now we can use PyPDFLoader with the path to the temporary file
    try:
        loader = PyPDFLoader(temp_file_path)
        docs = loader.load_and_split()  # This will return a list of text chunks from the PDF
    except (IOError, PyPDF2.errors.PdfReaderError) as e:  # Handle potential PDF reading errors
        raise ValueError(f"An error occurred while processing the PDF: {e}")

    # Combine the text chunks into a single string
    resume_text = " ".join(docs)

    template = """Format the provided resume to this YAML template:
    ---
    name: ''
    phoneNumbers:
    - ''
    websites:
    - ''
    emails:
    - ''
    dateOfBirth: ''
    addresses:
    - street: ''
      city: ''
      state: ''
      zip: ''
      country: ''
    summary: ''
    education:
    - school: ''
      degree: ''
      fieldOfStudy: ''
      startDate: ''
      endDate: ''
    workExperience:
    - company: ''
      position: ''
      startDate: ''
      endDate: ''
    skills:
    - name: ''
    certifications:
    - name: ''

    {chat_history}
    {human_input}"""

    prompt = PromptTemplate(
        input_variables=["chat_history", "human_input"],
        template=template
    )

    memory = ConversationBufferMemory(memory_key="chat_history")

    llm_chain = LLMChain(
        llm=OpenAIChat(model="gpt-3.5-turbo"),
        prompt=prompt,
        verbose=True,
        memory=memory,
    )

    res = llm_chain.predict(human_input=resume_text)
    return res['output_text']

def on_file_upload(filename, file_content):
    if not file_content:
        gr.Interface.alert(title="Error", message="Please upload a valid PDF resume.")

def main():
    input_api_key = Textbox(label="Enter your OpenAI API Key")
    # Use 'binary' type to receive the file's content directly as a binary object
    input_pdf_file = File(label="Upload your PDF resume", type="binary")
    output_yaml = Textbox(label="Formatted Resume in YAML")

    iface = gr.Interface(
        fn=format_resume_to_yaml,
        inputs=[input_api_key, input_pdf_file],
        outputs=output_yaml,
        title="Resume to YAML Formatter",
        description="Upload a PDF resume and enter your OpenAI API key to get it formatted to a YAML template.",
    )

    iface.launch(debug=True)

if __name__ == "__main__":
    main()