barghavani commited on
Commit
8bac8c2
·
verified ·
1 Parent(s): 2957d1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -64
app.py CHANGED
@@ -2,16 +2,11 @@ import gradio as gr
2
  import os
3
  import io
4
  import PyPDF2
5
- #from langchain.llms import OpenAIChat
6
  from langchain_openai import ChatOpenAI
7
-
8
  from langchain.chains import LLMChain
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain import PromptTemplate
11
-
12
-
13
- # Updated imports for Gradio components
14
- from gradio.components import File, Textbox
15
 
16
  def extract_text_from_pdf_binary(pdf_binary):
17
  text = ""
@@ -22,87 +17,85 @@ def extract_text_from_pdf_binary(pdf_binary):
22
  for page in range(num_pages):
23
  current_page = reader.pages[page]
24
  page_text = current_page.extract_text()
25
- if page_text: # Check if page_text is not None or empty
26
  text += page_text
27
  return text
28
 
29
- def format_resume_to_yaml(api_key, file_content):
30
- # Set the API key for OpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  os.environ['OPENAI_API_KEY'] = api_key
32
 
33
- # Check if the file content is not empty
34
  if not file_content:
35
  raise ValueError("The uploaded file is empty.")
36
 
37
- # Extract text from the uploaded PDF binary
38
  resume_text = extract_text_from_pdf_binary(file_content)
39
 
40
- template = """Format the provided resume to this YAML template:
41
- ---
42
- name: ''
43
- phoneNumbers:
44
- - ''
45
- websites:
46
- - ''
47
- emails:
48
- - ''
49
- dateOfBirth: ''
50
- addresses:
51
- - street: ''
52
- city: ''
53
- state: ''
54
- zip: ''
55
- country: ''
56
- summary: ''
57
- education:
58
- - school: ''
59
- degree: ''
60
- fieldOfStudy: ''
61
- startDate: ''
62
- endDate: ''
63
- workExperience:
64
- - company: ''
65
- position: ''
66
- startDate: ''
67
- endDate: ''
68
- skills:
69
- - name: ''
70
- certifications:
71
- - name: ''
72
- {chat_history}
73
- {human_input}"""
74
-
75
- prompt = PromptTemplate(
76
- input_variables=["chat_history", "human_input"],
77
- template=template
78
- )
79
-
80
- memory = ConversationBufferMemory(memory_key="chat_history")
81
-
82
- llm_chain = LLMChain(
83
- llm=ChatOpenAI(model="gpt-3.5-turbo"),
84
- prompt=prompt,
85
- verbose=True,
86
- memory=memory,
87
- )
88
 
89
- res = llm_chain.predict(human_input=resume_text)
90
- return res
91
 
92
  def main():
93
  input_api_key = Textbox(label="Enter your OpenAI API Key")
94
  input_pdf_file = File(label="Upload your PDF resume", type="binary")
 
95
  output_yaml = Textbox(label="Formatted Resume in YAML")
96
 
97
  iface = gr.Interface(
98
  fn=format_resume_to_yaml,
99
- inputs=[input_api_key, input_pdf_file],
100
  outputs=output_yaml,
101
  title="Resume to YAML Formatter",
102
- description="Upload a PDF resume and enter your OpenAI API key to get it formatted to a YAML template.",
103
  )
104
 
105
  iface.launch(debug=True, share=True)
106
 
107
  if __name__ == "__main__":
108
- main()
 
2
  import os
3
  import io
4
  import PyPDF2
 
5
  from langchain_openai import ChatOpenAI
 
6
  from langchain.chains import LLMChain
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain import PromptTemplate
9
+ from gradio.components import File, Textbox, Dropdown
 
 
 
10
 
11
  def extract_text_from_pdf_binary(pdf_binary):
12
  text = ""
 
17
  for page in range(num_pages):
18
  current_page = reader.pages[page]
19
  page_text = current_page.extract_text()
20
+ if page_text:
21
  text += page_text
22
  return text
23
 
24
+ def format_section_to_yaml(section_text, section_name):
25
+ """
26
+ Formats the extracted text for a specific section into a simple YAML structure.
27
+ Assumes `section_text` is a string containing the text for the section.
28
+
29
+ Parameters:
30
+ - section_text: The extracted text for the section.
31
+ - section_name: The name of the section.
32
+
33
+ Returns:
34
+ - A string representing the section information in YAML format.
35
+ """
36
+ # For a more complex formatting based on the content's structure,
37
+ # you'd need to parse and transform `section_text` accordingly.
38
+ yaml_output = f"{section_name}:\n"
39
+ for line in section_text.split('\n'):
40
+ if line.strip(): # Avoid adding empty lines
41
+ yaml_output += f" - {line.strip()}\n"
42
+ return yaml_output
43
+
44
+ def format_resume_to_yaml(api_key, file_content, section):
45
  os.environ['OPENAI_API_KEY'] = api_key
46
 
 
47
  if not file_content:
48
  raise ValueError("The uploaded file is empty.")
49
 
 
50
  resume_text = extract_text_from_pdf_binary(file_content)
51
 
52
+ # Example keywords for section headers in resumes
53
+ section_headers = {
54
+ "All": None, # No specific processing, use entire text
55
+ "Education": "Education",
56
+ "Work Experience": "Work Experience",
57
+ "Skills": "Skills",
58
+ }
59
+
60
+ header = section_headers.get(section)
61
+ section_text = ""
62
+
63
+ if header:
64
+ # Simple parsing logic to extract the section's content
65
+ # This is a very basic implementation; actual logic might need to account for various resume formats
66
+ try:
67
+ start_index = resume_text.index(header) + len(header)
68
+ section_text = resume_text[start_index:]
69
+ # Assuming sections are separated by two newlines, adjust based on actual resume format
70
+ end_index = section_text.find("\n\n")
71
+ if end_index != -1:
72
+ section_text = section_text[:end_index]
73
+ except ValueError:
74
+ section_text = "Section not found in the resume."
75
+ else:
76
+ # If "All" is selected, use the entire resume text
77
+ section_text = resume_text
78
+
79
+ formatted_output = format_section_to_yaml(section_text, section)
80
+
81
+ return formatted_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
 
 
83
 
84
  def main():
85
  input_api_key = Textbox(label="Enter your OpenAI API Key")
86
  input_pdf_file = File(label="Upload your PDF resume", type="binary")
87
+ input_section = Dropdown(label="Select Section", choices=["All", "Name", "Phone", "Education"], value="All")
88
  output_yaml = Textbox(label="Formatted Resume in YAML")
89
 
90
  iface = gr.Interface(
91
  fn=format_resume_to_yaml,
92
+ inputs=[input_api_key, input_pdf_file, input_section],
93
  outputs=output_yaml,
94
  title="Resume to YAML Formatter",
95
+ description="Upload a PDF resume, enter your OpenAI API key, and select the section you want to format to a YAML template.",
96
  )
97
 
98
  iface.launch(debug=True, share=True)
99
 
100
  if __name__ == "__main__":
101
+ main()