barghavani commited on
Commit
1d086f7
·
verified ·
1 Parent(s): e0f8496

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -72
app.py CHANGED
@@ -1,15 +1,12 @@
1
- import gradio as gr
2
- import os
3
- import io
4
- import PyPDF2
5
  from langchain_openai import ChatOpenAI
6
-
7
- # Corrected import statement for PromptTemplate
8
  from langchain_core.prompts import PromptTemplate
9
-
10
  from langchain.chains import LLMChain
11
  from langchain.memory import ConversationBufferMemory
12
- from gradio.components import File, Textbox, Dropdown
 
 
 
 
13
 
14
  def extract_text_from_pdf_binary(pdf_binary):
15
  text = ""
@@ -23,41 +20,56 @@ def extract_text_from_pdf_binary(pdf_binary):
23
  text += page_text
24
  return text
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def format_resume_to_yaml(api_key, file_content, filter_option="full"):
27
  os.environ['OPENAI_API_KEY'] = api_key
28
  if not file_content:
29
  raise ValueError("The uploaded file is empty.")
30
  resume_text = extract_text_from_pdf_binary(file_content)
 
 
 
31
 
32
  # Define the YAML template here, before it's used
33
- template = """
34
- ---
35
- {name}{phoneNumbers}{websites}{emails}{dateOfBirth}{addresses}{summary}{education}{workExperience}{skills}{certifications}
36
- {chat_history}
37
- {human_input}
38
- """
39
-
40
- sections = {
41
- "full": template,
42
- "name": "{name}\n",
43
- "phoneNumbers": "{phoneNumbers}\n",
44
- "websites": "{websites}\n",
45
- "emails": "{emails}\n",
46
- "dateOfBirth": "{dateOfBirth}\n",
47
- "addresses": "{addresses}\n",
48
- "summary": "{summary}\n",
49
- "education": "{education}\n",
50
- "workExperience": "{workExperience}\n",
51
- "skills": "{skills}\n",
52
- "certifications": "{certifications}\n",
53
- }
54
-
55
- # Use the selected filter option to pick the appropriate template
56
- filtered_template = sections.get(filter_option, template)
57
 
58
  prompt = PromptTemplate(
59
- input_variables=["chat_history", "human_input"],
60
- template=filtered_template
61
  )
62
 
63
  memory = ConversationBufferMemory(memory_key="chat_history")
@@ -69,42 +81,6 @@ def format_resume_to_yaml(api_key, file_content, filter_option="full"):
69
  memory=memory,
70
  )
71
 
72
- res = llm_chain.predict(human_input=resume_text)
 
73
  return res
74
-
75
- def main():
76
- # Define sections before using it for filter_options
77
- sections = {
78
- "full": "Complete template",
79
- "name": "Only the name section",
80
- "phoneNumbers": "Only the phone numbers section",
81
- "websites": "Only the websites section",
82
- "emails": "Only the emails section",
83
- "dateOfBirth": "Only the date of birth section",
84
- "addresses": "Only the addresses section",
85
- "summary": "Only the summary section",
86
- "education": "Only the education section",
87
- "workExperience": "Only the work experience section",
88
- "skills": "Only the skills section",
89
- "certifications": "Only the certifications section",
90
- # Add more sections as needed
91
- }
92
-
93
- input_api_key = Textbox(label="Enter your OpenAI API Key")
94
- input_pdf_file = File(label="Upload your PDF resume", type="binary")
95
- filter_options = Dropdown(label="Filter", choices=["full", "name"] + list(sections.keys()))
96
-
97
- output_yaml = Textbox(label="Formatted Resume in YAML")
98
-
99
- iface = gr.Interface(
100
- fn=format_resume_to_yaml,
101
- inputs=[input_api_key, input_pdf_file, filter_options],
102
- outputs=output_yaml,
103
- title="Resume to YAML Formatter",
104
- description="Upload a PDF resume, enter your OpenAI API key, and choose a section to filter the output. (Full format by default)",
105
- )
106
-
107
- iface.launch(debug=True, share=True)
108
-
109
- if __name__ == "__main__":
110
- main()
 
 
 
 
 
1
  from langchain_openai import ChatOpenAI
 
 
2
  from langchain_core.prompts import PromptTemplate
 
3
  from langchain.chains import LLMChain
4
  from langchain.memory import ConversationBufferMemory
5
+
6
+ import gradio as gr
7
+ import os
8
+ import io
9
+ import PyPDF2
10
 
11
  def extract_text_from_pdf_binary(pdf_binary):
12
  text = ""
 
20
  text += page_text
21
  return text
22
 
23
+ def parse_resume_text_into_fields(resume_text):
24
+ # Dummy function for illustration. You'll need to implement parsing logic based on your resume format.
25
+ # This should return a dictionary with keys matching those expected by your PromptTemplate.
26
+ return {
27
+ "name": "John Doe",
28
+ "phoneNumbers": "+1 234 567 890",
29
+ "websites": "www.johndoe.com",
30
+ "emails": "[email protected]",
31
+ "dateOfBirth": "1990-01-01",
32
+ "addresses": "123 Main St, Anytown, USA",
33
+ "summary": "Experienced software engineer...",
34
+ "education": "B.S. in Computer Science from XYZ University",
35
+ "workExperience": "Software Engineer at ABC Corp...",
36
+ "skills": "Python, Java, SQL",
37
+ "certifications": "Certified Kubernetes Administrator",
38
+ # Add any additional fields as necessary.
39
+ }
40
+
41
  def format_resume_to_yaml(api_key, file_content, filter_option="full"):
42
  os.environ['OPENAI_API_KEY'] = api_key
43
  if not file_content:
44
  raise ValueError("The uploaded file is empty.")
45
  resume_text = extract_text_from_pdf_binary(file_content)
46
+
47
+ # Extract and parse resume text into fields
48
+ resume_fields = parse_resume_text_into_fields(resume_text)
49
 
50
  # Define the YAML template here, before it's used
51
+ template = """---
52
+ {name}
53
+ {phoneNumbers}
54
+ {websites}
55
+ {emails}
56
+ {dateOfBirth}
57
+ {addresses}
58
+ {summary}
59
+ {education}
60
+ {workExperience}
61
+ {skills}
62
+ {certifications}
63
+ {chat_history}
64
+ {human_input}
65
+ """
66
+
67
+ # Generate a dictionary for the input_variables expected by the PromptTemplate
68
+ input_variables_dict = {key: resume_fields[key] for key in resume_fields.keys() if key in template}
 
 
 
 
 
 
69
 
70
  prompt = PromptTemplate(
71
+ input_variables=list(input_variables_dict.keys()),
72
+ template=template
73
  )
74
 
75
  memory = ConversationBufferMemory(memory_key="chat_history")
 
81
  memory=memory,
82
  )
83
 
84
+ # The predict method expects a dictionary of variables to replace in the template
85
+ res = llm_chain.predict(human_input=resume_text, **input_variables_dict)
86
  return res