Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,12 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import os
|
3 |
-
import io
|
4 |
-
import PyPDF2
|
5 |
from langchain_openai import ChatOpenAI
|
6 |
-
|
7 |
-
# Corrected import statement for PromptTemplate
|
8 |
from langchain_core.prompts import PromptTemplate
|
9 |
-
|
10 |
from langchain.chains import LLMChain
|
11 |
from langchain.memory import ConversationBufferMemory
|
12 |
-
|
|
|
|
|
|
|
|
|
13 |
|
14 |
def extract_text_from_pdf_binary(pdf_binary):
|
15 |
text = ""
|
@@ -23,41 +20,56 @@ def extract_text_from_pdf_binary(pdf_binary):
|
|
23 |
text += page_text
|
24 |
return text
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def format_resume_to_yaml(api_key, file_content, filter_option="full"):
|
27 |
os.environ['OPENAI_API_KEY'] = api_key
|
28 |
if not file_content:
|
29 |
raise ValueError("The uploaded file is empty.")
|
30 |
resume_text = extract_text_from_pdf_binary(file_content)
|
|
|
|
|
|
|
31 |
|
32 |
# Define the YAML template here, before it's used
|
33 |
-
template = """
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
"skills": "{skills}\n",
|
52 |
-
"certifications": "{certifications}\n",
|
53 |
-
}
|
54 |
-
|
55 |
-
# Use the selected filter option to pick the appropriate template
|
56 |
-
filtered_template = sections.get(filter_option, template)
|
57 |
|
58 |
prompt = PromptTemplate(
|
59 |
-
input_variables=
|
60 |
-
template=
|
61 |
)
|
62 |
|
63 |
memory = ConversationBufferMemory(memory_key="chat_history")
|
@@ -69,42 +81,6 @@ def format_resume_to_yaml(api_key, file_content, filter_option="full"):
|
|
69 |
memory=memory,
|
70 |
)
|
71 |
|
72 |
-
|
|
|
73 |
return res
|
74 |
-
|
75 |
-
def main():
|
76 |
-
# Define sections before using it for filter_options
|
77 |
-
sections = {
|
78 |
-
"full": "Complete template",
|
79 |
-
"name": "Only the name section",
|
80 |
-
"phoneNumbers": "Only the phone numbers section",
|
81 |
-
"websites": "Only the websites section",
|
82 |
-
"emails": "Only the emails section",
|
83 |
-
"dateOfBirth": "Only the date of birth section",
|
84 |
-
"addresses": "Only the addresses section",
|
85 |
-
"summary": "Only the summary section",
|
86 |
-
"education": "Only the education section",
|
87 |
-
"workExperience": "Only the work experience section",
|
88 |
-
"skills": "Only the skills section",
|
89 |
-
"certifications": "Only the certifications section",
|
90 |
-
# Add more sections as needed
|
91 |
-
}
|
92 |
-
|
93 |
-
input_api_key = Textbox(label="Enter your OpenAI API Key")
|
94 |
-
input_pdf_file = File(label="Upload your PDF resume", type="binary")
|
95 |
-
filter_options = Dropdown(label="Filter", choices=["full", "name"] + list(sections.keys()))
|
96 |
-
|
97 |
-
output_yaml = Textbox(label="Formatted Resume in YAML")
|
98 |
-
|
99 |
-
iface = gr.Interface(
|
100 |
-
fn=format_resume_to_yaml,
|
101 |
-
inputs=[input_api_key, input_pdf_file, filter_options],
|
102 |
-
outputs=output_yaml,
|
103 |
-
title="Resume to YAML Formatter",
|
104 |
-
description="Upload a PDF resume, enter your OpenAI API key, and choose a section to filter the output. (Full format by default)",
|
105 |
-
)
|
106 |
-
|
107 |
-
iface.launch(debug=True, share=True)
|
108 |
-
|
109 |
-
if __name__ == "__main__":
|
110 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
1 |
from langchain_openai import ChatOpenAI
|
|
|
|
|
2 |
from langchain_core.prompts import PromptTemplate
|
|
|
3 |
from langchain.chains import LLMChain
|
4 |
from langchain.memory import ConversationBufferMemory
|
5 |
+
|
6 |
+
import gradio as gr
|
7 |
+
import os
|
8 |
+
import io
|
9 |
+
import PyPDF2
|
10 |
|
11 |
def extract_text_from_pdf_binary(pdf_binary):
|
12 |
text = ""
|
|
|
20 |
text += page_text
|
21 |
return text
|
22 |
|
23 |
+
def parse_resume_text_into_fields(resume_text):
|
24 |
+
# Dummy function for illustration. You'll need to implement parsing logic based on your resume format.
|
25 |
+
# This should return a dictionary with keys matching those expected by your PromptTemplate.
|
26 |
+
return {
|
27 |
+
"name": "John Doe",
|
28 |
+
"phoneNumbers": "+1 234 567 890",
|
29 |
+
"websites": "www.johndoe.com",
|
30 |
+
"emails": "[email protected]",
|
31 |
+
"dateOfBirth": "1990-01-01",
|
32 |
+
"addresses": "123 Main St, Anytown, USA",
|
33 |
+
"summary": "Experienced software engineer...",
|
34 |
+
"education": "B.S. in Computer Science from XYZ University",
|
35 |
+
"workExperience": "Software Engineer at ABC Corp...",
|
36 |
+
"skills": "Python, Java, SQL",
|
37 |
+
"certifications": "Certified Kubernetes Administrator",
|
38 |
+
# Add any additional fields as necessary.
|
39 |
+
}
|
40 |
+
|
41 |
def format_resume_to_yaml(api_key, file_content, filter_option="full"):
|
42 |
os.environ['OPENAI_API_KEY'] = api_key
|
43 |
if not file_content:
|
44 |
raise ValueError("The uploaded file is empty.")
|
45 |
resume_text = extract_text_from_pdf_binary(file_content)
|
46 |
+
|
47 |
+
# Extract and parse resume text into fields
|
48 |
+
resume_fields = parse_resume_text_into_fields(resume_text)
|
49 |
|
50 |
# Define the YAML template here, before it's used
|
51 |
+
template = """---
|
52 |
+
{name}
|
53 |
+
{phoneNumbers}
|
54 |
+
{websites}
|
55 |
+
{emails}
|
56 |
+
{dateOfBirth}
|
57 |
+
{addresses}
|
58 |
+
{summary}
|
59 |
+
{education}
|
60 |
+
{workExperience}
|
61 |
+
{skills}
|
62 |
+
{certifications}
|
63 |
+
{chat_history}
|
64 |
+
{human_input}
|
65 |
+
"""
|
66 |
+
|
67 |
+
# Generate a dictionary for the input_variables expected by the PromptTemplate
|
68 |
+
input_variables_dict = {key: resume_fields[key] for key in resume_fields.keys() if key in template}
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
prompt = PromptTemplate(
|
71 |
+
input_variables=list(input_variables_dict.keys()),
|
72 |
+
template=template
|
73 |
)
|
74 |
|
75 |
memory = ConversationBufferMemory(memory_key="chat_history")
|
|
|
81 |
memory=memory,
|
82 |
)
|
83 |
|
84 |
+
# The predict method expects a dictionary of variables to replace in the template
|
85 |
+
res = llm_chain.predict(human_input=resume_text, **input_variables_dict)
|
86 |
return res
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|