Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,16 +2,11 @@ import gradio as gr
|
|
2 |
import os
|
3 |
import io
|
4 |
import PyPDF2
|
5 |
-
#from langchain.llms import OpenAIChat
|
6 |
from langchain_openai import ChatOpenAI
|
7 |
-
|
8 |
from langchain.chains import LLMChain
|
9 |
from langchain.memory import ConversationBufferMemory
|
10 |
from langchain import PromptTemplate
|
11 |
-
|
12 |
-
|
13 |
-
# Updated imports for Gradio components
|
14 |
-
from gradio.components import File, Textbox
|
15 |
|
16 |
def extract_text_from_pdf_binary(pdf_binary):
|
17 |
text = ""
|
@@ -22,87 +17,85 @@ def extract_text_from_pdf_binary(pdf_binary):
|
|
22 |
for page in range(num_pages):
|
23 |
current_page = reader.pages[page]
|
24 |
page_text = current_page.extract_text()
|
25 |
-
if page_text:
|
26 |
text += page_text
|
27 |
return text
|
28 |
|
29 |
-
def
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
os.environ['OPENAI_API_KEY'] = api_key
|
32 |
|
33 |
-
# Check if the file content is not empty
|
34 |
if not file_content:
|
35 |
raise ValueError("The uploaded file is empty.")
|
36 |
|
37 |
-
# Extract text from the uploaded PDF binary
|
38 |
resume_text = extract_text_from_pdf_binary(file_content)
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
certifications:
|
71 |
-
- name: ''
|
72 |
-
{chat_history}
|
73 |
-
{human_input}"""
|
74 |
-
|
75 |
-
prompt = PromptTemplate(
|
76 |
-
input_variables=["chat_history", "human_input"],
|
77 |
-
template=template
|
78 |
-
)
|
79 |
-
|
80 |
-
memory = ConversationBufferMemory(memory_key="chat_history")
|
81 |
-
|
82 |
-
llm_chain = LLMChain(
|
83 |
-
llm=ChatOpenAI(model="gpt-3.5-turbo"),
|
84 |
-
prompt=prompt,
|
85 |
-
verbose=True,
|
86 |
-
memory=memory,
|
87 |
-
)
|
88 |
|
89 |
-
res = llm_chain.predict(human_input=resume_text)
|
90 |
-
return res
|
91 |
|
92 |
def main():
|
93 |
input_api_key = Textbox(label="Enter your OpenAI API Key")
|
94 |
input_pdf_file = File(label="Upload your PDF resume", type="binary")
|
|
|
95 |
output_yaml = Textbox(label="Formatted Resume in YAML")
|
96 |
|
97 |
iface = gr.Interface(
|
98 |
fn=format_resume_to_yaml,
|
99 |
-
inputs=[input_api_key, input_pdf_file],
|
100 |
outputs=output_yaml,
|
101 |
title="Resume to YAML Formatter",
|
102 |
-
description="Upload a PDF resume
|
103 |
)
|
104 |
|
105 |
iface.launch(debug=True, share=True)
|
106 |
|
107 |
if __name__ == "__main__":
|
108 |
-
main()
|
|
|
2 |
import os
|
3 |
import io
|
4 |
import PyPDF2
|
|
|
5 |
from langchain_openai import ChatOpenAI
|
|
|
6 |
from langchain.chains import LLMChain
|
7 |
from langchain.memory import ConversationBufferMemory
|
8 |
from langchain import PromptTemplate
|
9 |
+
from gradio.components import File, Textbox, Dropdown
|
|
|
|
|
|
|
10 |
|
11 |
def extract_text_from_pdf_binary(pdf_binary):
|
12 |
text = ""
|
|
|
17 |
for page in range(num_pages):
|
18 |
current_page = reader.pages[page]
|
19 |
page_text = current_page.extract_text()
|
20 |
+
if page_text:
|
21 |
text += page_text
|
22 |
return text
|
23 |
|
24 |
+
def format_section_to_yaml(section_text, section_name):
|
25 |
+
"""
|
26 |
+
Formats the extracted text for a specific section into a simple YAML structure.
|
27 |
+
Assumes `section_text` is a string containing the text for the section.
|
28 |
+
|
29 |
+
Parameters:
|
30 |
+
- section_text: The extracted text for the section.
|
31 |
+
- section_name: The name of the section.
|
32 |
+
|
33 |
+
Returns:
|
34 |
+
- A string representing the section information in YAML format.
|
35 |
+
"""
|
36 |
+
# For a more complex formatting based on the content's structure,
|
37 |
+
# you'd need to parse and transform `section_text` accordingly.
|
38 |
+
yaml_output = f"{section_name}:\n"
|
39 |
+
for line in section_text.split('\n'):
|
40 |
+
if line.strip(): # Avoid adding empty lines
|
41 |
+
yaml_output += f" - {line.strip()}\n"
|
42 |
+
return yaml_output
|
43 |
+
|
44 |
+
def format_resume_to_yaml(api_key, file_content, section):
|
45 |
os.environ['OPENAI_API_KEY'] = api_key
|
46 |
|
|
|
47 |
if not file_content:
|
48 |
raise ValueError("The uploaded file is empty.")
|
49 |
|
|
|
50 |
resume_text = extract_text_from_pdf_binary(file_content)
|
51 |
|
52 |
+
# Example keywords for section headers in resumes
|
53 |
+
section_headers = {
|
54 |
+
"All": None, # No specific processing, use entire text
|
55 |
+
"Education": "Education",
|
56 |
+
"Work Experience": "Work Experience",
|
57 |
+
"Skills": "Skills",
|
58 |
+
}
|
59 |
+
|
60 |
+
header = section_headers.get(section)
|
61 |
+
section_text = ""
|
62 |
+
|
63 |
+
if header:
|
64 |
+
# Simple parsing logic to extract the section's content
|
65 |
+
# This is a very basic implementation; actual logic might need to account for various resume formats
|
66 |
+
try:
|
67 |
+
start_index = resume_text.index(header) + len(header)
|
68 |
+
section_text = resume_text[start_index:]
|
69 |
+
# Assuming sections are separated by two newlines, adjust based on actual resume format
|
70 |
+
end_index = section_text.find("\n\n")
|
71 |
+
if end_index != -1:
|
72 |
+
section_text = section_text[:end_index]
|
73 |
+
except ValueError:
|
74 |
+
section_text = "Section not found in the resume."
|
75 |
+
else:
|
76 |
+
# If "All" is selected, use the entire resume text
|
77 |
+
section_text = resume_text
|
78 |
+
|
79 |
+
formatted_output = format_section_to_yaml(section_text, section)
|
80 |
+
|
81 |
+
return formatted_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
|
|
|
|
83 |
|
84 |
def main():
|
85 |
input_api_key = Textbox(label="Enter your OpenAI API Key")
|
86 |
input_pdf_file = File(label="Upload your PDF resume", type="binary")
|
87 |
+
input_section = Dropdown(label="Select Section", choices=["All", "Name", "Phone", "Education"], value="All")
|
88 |
output_yaml = Textbox(label="Formatted Resume in YAML")
|
89 |
|
90 |
iface = gr.Interface(
|
91 |
fn=format_resume_to_yaml,
|
92 |
+
inputs=[input_api_key, input_pdf_file, input_section],
|
93 |
outputs=output_yaml,
|
94 |
title="Resume to YAML Formatter",
|
95 |
+
description="Upload a PDF resume, enter your OpenAI API key, and select the section you want to format to a YAML template.",
|
96 |
)
|
97 |
|
98 |
iface.launch(debug=True, share=True)
|
99 |
|
100 |
if __name__ == "__main__":
|
101 |
+
main()
|