Spaces:

barghavani
/

Resume_ATS

Sleeping

App Files Files Community

barghavani commited on Apr 1, 2024

Commit

050a351

verified ·

1 Parent(s): 3522502

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -50

app.py CHANGED Viewed

@@ -3,11 +3,16 @@ import os
 import io
 import PyPDF2
 from langchain_openai import ChatOpenAI
 from langchain.chains import LLMChain
 from langchain.memory import ConversationBufferMemory
 from langchain import PromptTemplate
 from gradio.components import File, Textbox, Dropdown
 def extract_text_from_pdf_binary(pdf_binary):
     text = ""
     pdf_data = io.BytesIO(pdf_binary)
@@ -17,85 +22,88 @@ def extract_text_from_pdf_binary(pdf_binary):
     for page in range(num_pages):
         current_page = reader.pages[page]
         page_text = current_page.extract_text()
-        if page_text:
             text += page_text
     return text
-def format_section_to_yaml(section_text, section_name):
-    """
-    Formats the extracted text for a specific section into a simple YAML structure.
-    Assumes `section_text` is a string containing the text for the section.
-    Parameters:
-    - section_text: The extracted text for the section.
-    - section_name: The name of the section.
-    Returns:
-    - A string representing the section information in YAML format.
-    """
-    # For a more complex formatting based on the content's structure,
-    # you'd need to parse and transform `section_text` accordingly.
-    yaml_output = f"{section_name}:\n"
-    for line in section_text.split('\n'):
-        if line.strip():  # Avoid adding empty lines
-            yaml_output += f"  - {line.strip()}\n"
-    return yaml_output
-def format_resume_to_yaml(api_key, file_content, section):
     os.environ['OPENAI_API_KEY'] = api_key
     if not file_content:
         raise ValueError("The uploaded file is empty.")
     resume_text = extract_text_from_pdf_binary(file_content)
-    # Example keywords for section headers in resumes
-    section_headers = {
-        "All": None,  # No specific processing, use entire text
-        "Education": r"Education|EDUCATION|University|Academic Background|Educational Qualifications",
-        "Work Experience": "Work Experience",
-        "Skills": "Skills",
     }
-    header = section_headers.get(section)
-    section_text = ""
-    if header:
-        # Simple parsing logic to extract the section's content
-        # This is a very basic implementation; actual logic might need to account for various resume formats
-        try:
-            start_index = resume_text.index(header) + len(header)
-            section_text = resume_text[start_index:]
-            # Assuming sections are separated by two newlines, adjust based on actual resume format
-            end_index = section_text.find("\n\n")
-            if end_index != -1:
-                section_text = section_text[:end_index]
-        except ValueError:
-            section_text = "Section not found in the resume."
-    else:
-        # If "All" is selected, use the entire resume text
-        section_text = resume_text
-    formatted_output = format_section_to_yaml(section_text, section)
-    return formatted_output
 def main():
     input_api_key = Textbox(label="Enter your OpenAI API Key")
     input_pdf_file = File(label="Upload your PDF resume", type="binary")
-    input_section = Dropdown(label="Select Section", choices=["All", "Name", "Phone", "Education"], value="All")
     output_yaml = Textbox(label="Formatted Resume in YAML")
     iface = gr.Interface(
         fn=format_resume_to_yaml,
-        inputs=[input_api_key, input_pdf_file, input_section],
         outputs=output_yaml,
         title="Resume to YAML Formatter",
-        description="Upload a PDF resume, enter your OpenAI API key, and select the section you want to format to a YAML template.",
     )
     iface.launch(debug=True, share=True)
 if __name__ == "__main__":
     main()

 import io
 import PyPDF2
 from langchain_openai import ChatOpenAI
 from langchain.chains import LLMChain
 from langchain.memory import ConversationBufferMemory
 from langchain import PromptTemplate
+# Updated imports for Gradio components
 from gradio.components import File, Textbox, Dropdown
 def extract_text_from_pdf_binary(pdf_binary):
     text = ""
     pdf_data = io.BytesIO(pdf_binary)
     for page in range(num_pages):
         current_page = reader.pages[page]
         page_text = current_page.extract_text()
+        if page_text:  # Check if page_text is not None or empty
             text += page_text
     return text
+def format_resume_to_yaml(api_key, file_content, filter_option="full"):
+    # Set the API key for OpenAI
     os.environ['OPENAI_API_KEY'] = api_key
+    # Check if the file content is not empty
     if not file_content:
         raise ValueError("The uploaded file is empty.")
+    # Extract text from the uploaded PDF binary
     resume_text = extract_text_from_pdf_binary(file_content)
+    # Define the YAML template with placeholders
+    template = """
+    ---
+    {name}{phoneNumbers}{websites}{emails}{dateOfBirth}{addresses}{summary}{education}{workExperience}{skills}{certifications}
+    {chat_history}
+    {human_input}
+    """
+    # Define sections based on the filter option
+    sections = {
+        "full": template,
+        "name": "{name}\n",
+        "phoneNumbers": "{phoneNumbers}\n",
+        "websites": "{websites}\n",
+        "emails": "{emails}\n",
+        "dateOfBirth": "{dateOfBirth}\n",
+        "addresses": "{addresses}\n",
+        "summary": "{summary}\n",
+        "education": "{education}\n",
+        "workExperience": "{workExperience}\n",
+        "skills": "{skills}\n",
+        "certifications": "{certifications}\n",
+        # Add placeholders for other sections you want to filter
     }
+    # Use the selected filter option to pick the appropriate template
+    filtered_template = sections.get(filter_option, template)
+    prompt = PromptTemplate(
+        input_variables=["chat_history", "human_input"],
+        template=filtered_template
+    )
+    memory = ConversationBufferMemory(memory_key="chat_history")
+    llm_chain = LLMChain(
+        llm=ChatOpenAI(model="gpt-3.5-turbo"),
+        prompt=prompt,
+        verbose=True,
+        memory=memory,
+    )
+    res = llm_chain.predict(human_input=resume_text)
+    return res
 def main():
     input_api_key = Textbox(label="Enter your OpenAI API Key")
     input_pdf_file = File(label="Upload your PDF resume", type="binary")
+    # Add a dropdown for filtering
+    filter_options = Dropdown(label="Filter", choices=["full", "name"] + list(sections.keys()))  # Include custom sections
     output_yaml = Textbox(label="Formatted Resume in YAML")
     iface = gr.Interface(
         fn=format_resume_to_yaml,
+        inputs=[input_api_key, input_pdf_file, filter_options],
         outputs=output_yaml,
         title="Resume to YAML Formatter",
+        description="Upload a PDF resume, enter your OpenAI API key, and choose a section to filter the output. (Full format by default)",
     )
     iface.launch(debug=True, share=True)
 if __name__ == "__main__":
     main()