Spaces:

barghavani
/

Resume_ATS

Sleeping

App Files Files Community

barghavani commited on Mar 31, 2024

Commit

9a1694c

verified ·

1 Parent(s): e249cd1

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -30

app.py CHANGED Viewed

@@ -1,42 +1,38 @@
 import gradio as gr
-from langchain.document_loaders import PyPDFLoader
 from langchain.llms import OpenAIChat
 from langchain.chains import LLMChain
 from langchain.memory import ConversationBufferMemory
 from langchain import PromptTemplate
-import os
-import tempfile
 # Updated imports for Gradio components
 from gradio.components import File, Textbox
-def format_resume_to_yaml(api_key, file):
     # Set the API key for OpenAI
     os.environ['OPENAI_API_KEY'] = api_key
-    file_content = file.read()
     # Check if the file content is not empty
     if not file_content:
         raise ValueError("The uploaded file is empty.")
-    # Save the uploaded file content to a temporary file
-    with tempfile.NamedTemporaryFile(delete=False, mode='wb+') as tmp_file:
-        tmp_file.write(file_content)
-        tmp_file.flush()
-        os.fsync(tmp_file.fileno())  # Ensure data is written to disk
-        temp_file_path = tmp_file.name
-    # Now we can use PyPDFLoader with the path to the temporary file
-    try:
-        loader = PyPDFLoader(temp_file_path)
-        docs = loader.load_and_split()  # This will return a list of text chunks from the PDF
-    except (IOError, PyPDF2.errors.PdfReaderError) as e:  # Handle potential PDF reading errors
-        raise ValueError(f"An error occurred while processing the PDF: {e}")
-    # Combine the text chunks into a single string
-    resume_text = " ".join(docs)
     template = """Format the provided resume to this YAML template:
     ---
@@ -70,7 +66,6 @@ def format_resume_to_yaml(api_key, file):
     - name: ''
     certifications:
     - name: ''
     {chat_history}
     {human_input}"""
@@ -91,13 +86,8 @@ def format_resume_to_yaml(api_key, file):
     res = llm_chain.predict(human_input=resume_text)
     return res['output_text']
-def on_file_upload(filename, file_content):
-    if not file_content:
-        gr.Interface.alert(title="Error", message="Please upload a valid PDF resume.")
 def main():
     input_api_key = Textbox(label="Enter your OpenAI API Key")
-    # Use 'binary' type to receive the file's content directly as a binary object
     input_pdf_file = File(label="Upload your PDF resume", type="binary")
     output_yaml = Textbox(label="Formatted Resume in YAML")
@@ -113,5 +103,3 @@ def main():
 if __name__ == "__main__":
     main()

 import gradio as gr
+import os
+import io
+import PyPDF2
 from langchain.llms import OpenAIChat
 from langchain.chains import LLMChain
 from langchain.memory import ConversationBufferMemory
 from langchain import PromptTemplate
 # Updated imports for Gradio components
 from gradio.components import File, Textbox
+def extract_text_from_pdf_binary(pdf_binary):
+    text = ""
+    pdf_data = io.BytesIO(pdf_binary)
+    reader = PyPDF2.PdfReader(pdf_data)
+    num_pages = len(reader.pages)
+    for page in range(num_pages):
+        current_page = reader.pages[page]
+        page_text = current_page.extract_text()
+        if page_text:  # Check if page_text is not None or empty
+            text += page_text
+    return text
+def format_resume_to_yaml(api_key, file_content):
     # Set the API key for OpenAI
     os.environ['OPENAI_API_KEY'] = api_key
     # Check if the file content is not empty
     if not file_content:
         raise ValueError("The uploaded file is empty.")
+    # Extract text from the uploaded PDF binary
+    resume_text = extract_text_from_pdf_binary(file_content)
     template = """Format the provided resume to this YAML template:
     ---
     - name: ''
     certifications:
     - name: ''
     {chat_history}
     {human_input}"""
     res = llm_chain.predict(human_input=resume_text)
     return res['output_text']
 def main():
     input_api_key = Textbox(label="Enter your OpenAI API Key")
     input_pdf_file = File(label="Upload your PDF resume", type="binary")
     output_yaml = Textbox(label="Formatted Resume in YAML")
 if __name__ == "__main__":
     main()