Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,11 +3,16 @@ import os
|
|
3 |
import io
|
4 |
import PyPDF2
|
5 |
from langchain_openai import ChatOpenAI
|
|
|
6 |
from langchain.chains import LLMChain
|
7 |
from langchain.memory import ConversationBufferMemory
|
8 |
from langchain import PromptTemplate
|
|
|
|
|
|
|
9 |
from gradio.components import File, Textbox, Dropdown
|
10 |
|
|
|
11 |
def extract_text_from_pdf_binary(pdf_binary):
|
12 |
text = ""
|
13 |
pdf_data = io.BytesIO(pdf_binary)
|
@@ -17,85 +22,88 @@ def extract_text_from_pdf_binary(pdf_binary):
|
|
17 |
for page in range(num_pages):
|
18 |
current_page = reader.pages[page]
|
19 |
page_text = current_page.extract_text()
|
20 |
-
if page_text:
|
21 |
text += page_text
|
22 |
return text
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
Assumes `section_text` is a string containing the text for the section.
|
28 |
-
|
29 |
-
Parameters:
|
30 |
-
- section_text: The extracted text for the section.
|
31 |
-
- section_name: The name of the section.
|
32 |
-
|
33 |
-
Returns:
|
34 |
-
- A string representing the section information in YAML format.
|
35 |
-
"""
|
36 |
-
# For a more complex formatting based on the content's structure,
|
37 |
-
# you'd need to parse and transform `section_text` accordingly.
|
38 |
-
yaml_output = f"{section_name}:\n"
|
39 |
-
for line in section_text.split('\n'):
|
40 |
-
if line.strip(): # Avoid adding empty lines
|
41 |
-
yaml_output += f" - {line.strip()}\n"
|
42 |
-
return yaml_output
|
43 |
-
|
44 |
-
def format_resume_to_yaml(api_key, file_content, section):
|
45 |
os.environ['OPENAI_API_KEY'] = api_key
|
46 |
|
|
|
47 |
if not file_content:
|
48 |
raise ValueError("The uploaded file is empty.")
|
49 |
|
|
|
50 |
resume_text = extract_text_from_pdf_binary(file_content)
|
51 |
|
52 |
-
#
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
}
|
59 |
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
|
64 |
-
# Simple parsing logic to extract the section's content
|
65 |
-
# This is a very basic implementation; actual logic might need to account for various resume formats
|
66 |
-
try:
|
67 |
-
start_index = resume_text.index(header) + len(header)
|
68 |
-
section_text = resume_text[start_index:]
|
69 |
-
# Assuming sections are separated by two newlines, adjust based on actual resume format
|
70 |
-
end_index = section_text.find("\n\n")
|
71 |
-
if end_index != -1:
|
72 |
-
section_text = section_text[:end_index]
|
73 |
-
except ValueError:
|
74 |
-
section_text = "Section not found in the resume."
|
75 |
-
else:
|
76 |
-
# If "All" is selected, use the entire resume text
|
77 |
-
section_text = resume_text
|
78 |
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
-
|
|
|
82 |
|
83 |
|
84 |
def main():
|
85 |
input_api_key = Textbox(label="Enter your OpenAI API Key")
|
86 |
input_pdf_file = File(label="Upload your PDF resume", type="binary")
|
87 |
-
|
|
|
|
|
|
|
88 |
output_yaml = Textbox(label="Formatted Resume in YAML")
|
89 |
|
90 |
iface = gr.Interface(
|
91 |
fn=format_resume_to_yaml,
|
92 |
-
inputs=[input_api_key, input_pdf_file,
|
93 |
outputs=output_yaml,
|
94 |
title="Resume to YAML Formatter",
|
95 |
-
description="Upload a PDF resume, enter your OpenAI API key, and
|
96 |
)
|
97 |
|
98 |
iface.launch(debug=True, share=True)
|
99 |
|
|
|
100 |
if __name__ == "__main__":
|
101 |
main()
|
|
|
3 |
import io
|
4 |
import PyPDF2
|
5 |
from langchain_openai import ChatOpenAI
|
6 |
+
|
7 |
from langchain.chains import LLMChain
|
8 |
from langchain.memory import ConversationBufferMemory
|
9 |
from langchain import PromptTemplate
|
10 |
+
|
11 |
+
|
12 |
+
# Updated imports for Gradio components
|
13 |
from gradio.components import File, Textbox, Dropdown
|
14 |
|
15 |
+
|
16 |
def extract_text_from_pdf_binary(pdf_binary):
|
17 |
text = ""
|
18 |
pdf_data = io.BytesIO(pdf_binary)
|
|
|
22 |
for page in range(num_pages):
|
23 |
current_page = reader.pages[page]
|
24 |
page_text = current_page.extract_text()
|
25 |
+
if page_text: # Check if page_text is not None or empty
|
26 |
text += page_text
|
27 |
return text
|
28 |
|
29 |
+
|
30 |
+
def format_resume_to_yaml(api_key, file_content, filter_option="full"):
|
31 |
+
# Set the API key for OpenAI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
os.environ['OPENAI_API_KEY'] = api_key
|
33 |
|
34 |
+
# Check if the file content is not empty
|
35 |
if not file_content:
|
36 |
raise ValueError("The uploaded file is empty.")
|
37 |
|
38 |
+
# Extract text from the uploaded PDF binary
|
39 |
resume_text = extract_text_from_pdf_binary(file_content)
|
40 |
|
41 |
+
# Define the YAML template with placeholders
|
42 |
+
template = """
|
43 |
+
---
|
44 |
+
{name}{phoneNumbers}{websites}{emails}{dateOfBirth}{addresses}{summary}{education}{workExperience}{skills}{certifications}
|
45 |
+
{chat_history}
|
46 |
+
{human_input}
|
47 |
+
"""
|
48 |
+
|
49 |
+
# Define sections based on the filter option
|
50 |
+
sections = {
|
51 |
+
"full": template,
|
52 |
+
"name": "{name}\n",
|
53 |
+
"phoneNumbers": "{phoneNumbers}\n",
|
54 |
+
"websites": "{websites}\n",
|
55 |
+
"emails": "{emails}\n",
|
56 |
+
"dateOfBirth": "{dateOfBirth}\n",
|
57 |
+
"addresses": "{addresses}\n",
|
58 |
+
"summary": "{summary}\n",
|
59 |
+
"education": "{education}\n",
|
60 |
+
"workExperience": "{workExperience}\n",
|
61 |
+
"skills": "{skills}\n",
|
62 |
+
"certifications": "{certifications}\n",
|
63 |
+
|
64 |
+
# Add placeholders for other sections you want to filter
|
65 |
}
|
66 |
|
67 |
+
# Use the selected filter option to pick the appropriate template
|
68 |
+
filtered_template = sections.get(filter_option, template)
|
69 |
+
|
70 |
+
prompt = PromptTemplate(
|
71 |
+
input_variables=["chat_history", "human_input"],
|
72 |
+
template=filtered_template
|
73 |
+
)
|
74 |
|
75 |
+
memory = ConversationBufferMemory(memory_key="chat_history")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
llm_chain = LLMChain(
|
78 |
+
llm=ChatOpenAI(model="gpt-3.5-turbo"),
|
79 |
+
prompt=prompt,
|
80 |
+
verbose=True,
|
81 |
+
memory=memory,
|
82 |
+
)
|
83 |
|
84 |
+
res = llm_chain.predict(human_input=resume_text)
|
85 |
+
return res
|
86 |
|
87 |
|
88 |
def main():
|
89 |
input_api_key = Textbox(label="Enter your OpenAI API Key")
|
90 |
input_pdf_file = File(label="Upload your PDF resume", type="binary")
|
91 |
+
|
92 |
+
# Add a dropdown for filtering
|
93 |
+
filter_options = Dropdown(label="Filter", choices=["full", "name"] + list(sections.keys())) # Include custom sections
|
94 |
+
|
95 |
output_yaml = Textbox(label="Formatted Resume in YAML")
|
96 |
|
97 |
iface = gr.Interface(
|
98 |
fn=format_resume_to_yaml,
|
99 |
+
inputs=[input_api_key, input_pdf_file, filter_options],
|
100 |
outputs=output_yaml,
|
101 |
title="Resume to YAML Formatter",
|
102 |
+
description="Upload a PDF resume, enter your OpenAI API key, and choose a section to filter the output. (Full format by default)",
|
103 |
)
|
104 |
|
105 |
iface.launch(debug=True, share=True)
|
106 |
|
107 |
+
|
108 |
if __name__ == "__main__":
|
109 |
main()
|