barghavani commited on
Commit
9a1694c
·
verified ·
1 Parent(s): e249cd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -30
app.py CHANGED
@@ -1,42 +1,38 @@
1
  import gradio as gr
2
- from langchain.document_loaders import PyPDFLoader
 
 
3
  from langchain.llms import OpenAIChat
4
  from langchain.chains import LLMChain
5
  from langchain.memory import ConversationBufferMemory
6
  from langchain import PromptTemplate
7
- import os
8
- import tempfile
9
 
10
  # Updated imports for Gradio components
11
  from gradio.components import File, Textbox
12
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- def format_resume_to_yaml(api_key, file):
15
  # Set the API key for OpenAI
16
  os.environ['OPENAI_API_KEY'] = api_key
17
 
18
- file_content = file.read()
19
-
20
  # Check if the file content is not empty
21
  if not file_content:
22
  raise ValueError("The uploaded file is empty.")
23
 
24
- # Save the uploaded file content to a temporary file
25
- with tempfile.NamedTemporaryFile(delete=False, mode='wb+') as tmp_file:
26
- tmp_file.write(file_content)
27
- tmp_file.flush()
28
- os.fsync(tmp_file.fileno()) # Ensure data is written to disk
29
- temp_file_path = tmp_file.name
30
-
31
- # Now we can use PyPDFLoader with the path to the temporary file
32
- try:
33
- loader = PyPDFLoader(temp_file_path)
34
- docs = loader.load_and_split() # This will return a list of text chunks from the PDF
35
- except (IOError, PyPDF2.errors.PdfReaderError) as e: # Handle potential PDF reading errors
36
- raise ValueError(f"An error occurred while processing the PDF: {e}")
37
-
38
- # Combine the text chunks into a single string
39
- resume_text = " ".join(docs)
40
 
41
  template = """Format the provided resume to this YAML template:
42
  ---
@@ -70,7 +66,6 @@ def format_resume_to_yaml(api_key, file):
70
  - name: ''
71
  certifications:
72
  - name: ''
73
-
74
  {chat_history}
75
  {human_input}"""
76
 
@@ -91,13 +86,8 @@ def format_resume_to_yaml(api_key, file):
91
  res = llm_chain.predict(human_input=resume_text)
92
  return res['output_text']
93
 
94
- def on_file_upload(filename, file_content):
95
- if not file_content:
96
- gr.Interface.alert(title="Error", message="Please upload a valid PDF resume.")
97
-
98
  def main():
99
  input_api_key = Textbox(label="Enter your OpenAI API Key")
100
- # Use 'binary' type to receive the file's content directly as a binary object
101
  input_pdf_file = File(label="Upload your PDF resume", type="binary")
102
  output_yaml = Textbox(label="Formatted Resume in YAML")
103
 
@@ -113,5 +103,3 @@ def main():
113
 
114
  if __name__ == "__main__":
115
  main()
116
-
117
-
 
1
  import gradio as gr
2
+ import os
3
+ import io
4
+ import PyPDF2
5
  from langchain.llms import OpenAIChat
6
  from langchain.chains import LLMChain
7
  from langchain.memory import ConversationBufferMemory
8
  from langchain import PromptTemplate
 
 
9
 
10
  # Updated imports for Gradio components
11
  from gradio.components import File, Textbox
12
 
13
+ def extract_text_from_pdf_binary(pdf_binary):
14
+ text = ""
15
+ pdf_data = io.BytesIO(pdf_binary)
16
+ reader = PyPDF2.PdfReader(pdf_data)
17
+ num_pages = len(reader.pages)
18
+
19
+ for page in range(num_pages):
20
+ current_page = reader.pages[page]
21
+ page_text = current_page.extract_text()
22
+ if page_text: # Check if page_text is not None or empty
23
+ text += page_text
24
+ return text
25
 
26
+ def format_resume_to_yaml(api_key, file_content):
27
  # Set the API key for OpenAI
28
  os.environ['OPENAI_API_KEY'] = api_key
29
 
 
 
30
  # Check if the file content is not empty
31
  if not file_content:
32
  raise ValueError("The uploaded file is empty.")
33
 
34
+ # Extract text from the uploaded PDF binary
35
+ resume_text = extract_text_from_pdf_binary(file_content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  template = """Format the provided resume to this YAML template:
38
  ---
 
66
  - name: ''
67
  certifications:
68
  - name: ''
 
69
  {chat_history}
70
  {human_input}"""
71
 
 
86
  res = llm_chain.predict(human_input=resume_text)
87
  return res['output_text']
88
 
 
 
 
 
89
  def main():
90
  input_api_key = Textbox(label="Enter your OpenAI API Key")
 
91
  input_pdf_file = File(label="Upload your PDF resume", type="binary")
92
  output_yaml = Textbox(label="Formatted Resume in YAML")
93
 
 
103
 
104
  if __name__ == "__main__":
105
  main()