barghavani commited on
Commit
050a351
·
verified ·
1 Parent(s): 3522502

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -50
app.py CHANGED
@@ -3,11 +3,16 @@ import os
3
  import io
4
  import PyPDF2
5
  from langchain_openai import ChatOpenAI
 
6
  from langchain.chains import LLMChain
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain import PromptTemplate
 
 
 
9
  from gradio.components import File, Textbox, Dropdown
10
 
 
11
  def extract_text_from_pdf_binary(pdf_binary):
12
  text = ""
13
  pdf_data = io.BytesIO(pdf_binary)
@@ -17,85 +22,88 @@ def extract_text_from_pdf_binary(pdf_binary):
17
  for page in range(num_pages):
18
  current_page = reader.pages[page]
19
  page_text = current_page.extract_text()
20
- if page_text:
21
  text += page_text
22
  return text
23
 
24
- def format_section_to_yaml(section_text, section_name):
25
- """
26
- Formats the extracted text for a specific section into a simple YAML structure.
27
- Assumes `section_text` is a string containing the text for the section.
28
-
29
- Parameters:
30
- - section_text: The extracted text for the section.
31
- - section_name: The name of the section.
32
-
33
- Returns:
34
- - A string representing the section information in YAML format.
35
- """
36
- # For a more complex formatting based on the content's structure,
37
- # you'd need to parse and transform `section_text` accordingly.
38
- yaml_output = f"{section_name}:\n"
39
- for line in section_text.split('\n'):
40
- if line.strip(): # Avoid adding empty lines
41
- yaml_output += f" - {line.strip()}\n"
42
- return yaml_output
43
-
44
- def format_resume_to_yaml(api_key, file_content, section):
45
  os.environ['OPENAI_API_KEY'] = api_key
46
 
 
47
  if not file_content:
48
  raise ValueError("The uploaded file is empty.")
49
 
 
50
  resume_text = extract_text_from_pdf_binary(file_content)
51
 
52
- # Example keywords for section headers in resumes
53
- section_headers = {
54
- "All": None, # No specific processing, use entire text
55
- "Education": r"Education|EDUCATION|University|Academic Background|Educational Qualifications",
56
- "Work Experience": "Work Experience",
57
- "Skills": "Skills",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  }
59
 
60
- header = section_headers.get(section)
61
- section_text = ""
 
 
 
 
 
62
 
63
- if header:
64
- # Simple parsing logic to extract the section's content
65
- # This is a very basic implementation; actual logic might need to account for various resume formats
66
- try:
67
- start_index = resume_text.index(header) + len(header)
68
- section_text = resume_text[start_index:]
69
- # Assuming sections are separated by two newlines, adjust based on actual resume format
70
- end_index = section_text.find("\n\n")
71
- if end_index != -1:
72
- section_text = section_text[:end_index]
73
- except ValueError:
74
- section_text = "Section not found in the resume."
75
- else:
76
- # If "All" is selected, use the entire resume text
77
- section_text = resume_text
78
 
79
- formatted_output = format_section_to_yaml(section_text, section)
 
 
 
 
 
80
 
81
- return formatted_output
 
82
 
83
 
84
  def main():
85
  input_api_key = Textbox(label="Enter your OpenAI API Key")
86
  input_pdf_file = File(label="Upload your PDF resume", type="binary")
87
- input_section = Dropdown(label="Select Section", choices=["All", "Name", "Phone", "Education"], value="All")
 
 
 
88
  output_yaml = Textbox(label="Formatted Resume in YAML")
89
 
90
  iface = gr.Interface(
91
  fn=format_resume_to_yaml,
92
- inputs=[input_api_key, input_pdf_file, input_section],
93
  outputs=output_yaml,
94
  title="Resume to YAML Formatter",
95
- description="Upload a PDF resume, enter your OpenAI API key, and select the section you want to format to a YAML template.",
96
  )
97
 
98
  iface.launch(debug=True, share=True)
99
 
 
100
  if __name__ == "__main__":
101
  main()
 
3
  import io
4
  import PyPDF2
5
  from langchain_openai import ChatOpenAI
6
+
7
  from langchain.chains import LLMChain
8
  from langchain.memory import ConversationBufferMemory
9
  from langchain import PromptTemplate
10
+
11
+
12
+ # Updated imports for Gradio components
13
  from gradio.components import File, Textbox, Dropdown
14
 
15
+
16
  def extract_text_from_pdf_binary(pdf_binary):
17
  text = ""
18
  pdf_data = io.BytesIO(pdf_binary)
 
22
  for page in range(num_pages):
23
  current_page = reader.pages[page]
24
  page_text = current_page.extract_text()
25
+ if page_text: # Check if page_text is not None or empty
26
  text += page_text
27
  return text
28
 
29
+
30
+ def format_resume_to_yaml(api_key, file_content, filter_option="full"):
31
+ # Set the API key for OpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  os.environ['OPENAI_API_KEY'] = api_key
33
 
34
+ # Check if the file content is not empty
35
  if not file_content:
36
  raise ValueError("The uploaded file is empty.")
37
 
38
+ # Extract text from the uploaded PDF binary
39
  resume_text = extract_text_from_pdf_binary(file_content)
40
 
41
+ # Define the YAML template with placeholders
42
+ template = """
43
+ ---
44
+ {name}{phoneNumbers}{websites}{emails}{dateOfBirth}{addresses}{summary}{education}{workExperience}{skills}{certifications}
45
+ {chat_history}
46
+ {human_input}
47
+ """
48
+
49
+ # Define sections based on the filter option
50
+ sections = {
51
+ "full": template,
52
+ "name": "{name}\n",
53
+ "phoneNumbers": "{phoneNumbers}\n",
54
+ "websites": "{websites}\n",
55
+ "emails": "{emails}\n",
56
+ "dateOfBirth": "{dateOfBirth}\n",
57
+ "addresses": "{addresses}\n",
58
+ "summary": "{summary}\n",
59
+ "education": "{education}\n",
60
+ "workExperience": "{workExperience}\n",
61
+ "skills": "{skills}\n",
62
+ "certifications": "{certifications}\n",
63
+
64
+ # Add placeholders for other sections you want to filter
65
  }
66
 
67
+ # Use the selected filter option to pick the appropriate template
68
+ filtered_template = sections.get(filter_option, template)
69
+
70
+ prompt = PromptTemplate(
71
+ input_variables=["chat_history", "human_input"],
72
+ template=filtered_template
73
+ )
74
 
75
+ memory = ConversationBufferMemory(memory_key="chat_history")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ llm_chain = LLMChain(
78
+ llm=ChatOpenAI(model="gpt-3.5-turbo"),
79
+ prompt=prompt,
80
+ verbose=True,
81
+ memory=memory,
82
+ )
83
 
84
+ res = llm_chain.predict(human_input=resume_text)
85
+ return res
86
 
87
 
88
  def main():
89
  input_api_key = Textbox(label="Enter your OpenAI API Key")
90
  input_pdf_file = File(label="Upload your PDF resume", type="binary")
91
+
92
+ # Add a dropdown for filtering
93
+ filter_options = Dropdown(label="Filter", choices=["full", "name"] + list(sections.keys())) # Include custom sections
94
+
95
  output_yaml = Textbox(label="Formatted Resume in YAML")
96
 
97
  iface = gr.Interface(
98
  fn=format_resume_to_yaml,
99
+ inputs=[input_api_key, input_pdf_file, filter_options],
100
  outputs=output_yaml,
101
  title="Resume to YAML Formatter",
102
+ description="Upload a PDF resume, enter your OpenAI API key, and choose a section to filter the output. (Full format by default)",
103
  )
104
 
105
  iface.launch(debug=True, share=True)
106
 
107
+
108
  if __name__ == "__main__":
109
  main()