barghavani commited on
Commit
da7bb3f
·
verified ·
1 Parent(s): 0a46070

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -107
app.py CHANGED
@@ -1,115 +1,115 @@
1
  import gradio as gr
2
- import io
3
- import PyPDF2
4
  from langchain.llms import OpenAIChat
5
  from langchain.chains import LLMChain
6
  from langchain.memory import ConversationBufferMemory
7
  from langchain import PromptTemplate
 
 
8
 
 
 
9
 
10
- os.environ['OPENAI_API_KEY'] = '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'
11
- template = """Format the provided resume to this YAML template:
12
- ---
13
- name: ''
14
- phoneNumbers:
15
- - ''
16
- websites:
17
- - ''
18
- emails:
19
- - ''
20
- dateOfBirth: ''
21
- addresses:
22
- - street: ''
23
- city: ''
24
- state: ''
25
- zip: ''
26
- country: ''
27
- summary: ''
28
- education:
29
- - school: ''
30
- degree: ''
31
- fieldOfStudy: ''
32
- startDate: ''
33
- endDate: ''
34
- workExperience:
35
- - company: ''
36
- position: ''
37
- startDate: ''
38
- endDate: ''
39
- skills:
40
- - name: ''
41
- certifications:
42
- - name: ''
43
-
44
- {chat_history}
45
- {human_input}"""
46
-
47
- prompt = PromptTemplate(
48
- input_variables=["chat_history", "human_input"],
49
- template=template
50
- )
51
-
52
- memory = ConversationBufferMemory(memory_key="chat_history")
53
-
54
- llm_chain = LLMChain(
55
- llm=OpenAIChat(model="gpt-3.5-turbo"),
56
- prompt=prompt,
57
- verbose=True,
58
- memory=memory,
59
- )
60
- def extract_text_from_binary(file_path):
61
- text = ""
62
- # Open the file in binary read mode
63
- with open(file_path, 'rb') as file:
64
- pdf_data = file.read()
65
-
66
- # Now that you have the binary content, use it with PyPDF2
67
- pdf_data = io.BytesIO(pdf_data)
68
- reader = PyPDF2.PdfReader(pdf_data)
69
- num_pages = len(reader.pages)
70
-
71
- for page in range(num_pages):
72
- current_page = reader.pages[page]
73
- page_text = current_page.extract_text()
74
- if page_text: # Check if page_text is not None or empty
75
- text += page_text
76
- return text
77
-
78
- def format_resume_to_yaml(resume):
79
- # Before calling the LLMChain, ensure the OPENAI_API_KEY environment variable is set
80
- res = llm_chain.predict(human_input=resume)
81
- return res
82
-
83
- def process_pdf(file, open_ai_key):
84
- # Save the uploaded PDF temporarily
85
- temp_pdf_path = "/tmp/uploaded_resume.pdf"
86
- with open(temp_pdf_path, 'wb') as f:
87
- f.write(file.read()) # Read the uploaded file as bytes
88
-
89
- # Extract text from the PDF
90
- extracted_text = extract_text_from_binary(temp_pdf_path)
91
-
92
- # Set the OpenAI API key
93
- os.environ['OPENAI_API_KEY'] = open_ai_key
94
-
95
- # Format the resume text to YAML
96
- formatted_resume_yaml = format_resume_to_yaml(extracted_text)
97
-
98
- return formatted_resume_yaml
99
-
100
-
101
-
102
- with gr.Blocks() as demo:
103
- gr.Markdown("### Upload a Resume in PDF Format")
104
- with gr.Row():
105
- pdf_file = gr.File(label="Select a PDF file", type="file")
106
- openai_key_input = gr.Textbox(label="OpenAI API Key", type="password")
107
- format_button = gr.Button("Format Resume")
108
- output_textbox = gr.Textbox(label="Formatted Resume in YAML", lines=20)
109
-
110
- format_button.click(
111
- fn=process_pdf,
112
- inputs=[pdf_file, openai_key_input],
113
- outputs=[output_textbox]
114
  )
115
- demo.launch(debug=True)
 
 
 
 
 
 
1
  import gradio as gr
2
+ from langchain.document_loaders import PyPDFLoader
 
3
  from langchain.llms import OpenAIChat
4
  from langchain.chains import LLMChain
5
  from langchain.memory import ConversationBufferMemory
6
  from langchain import PromptTemplate
7
+ import os
8
+ import tempfile
9
 
10
+ # Updated imports for Gradio components
11
+ from gradio.components import File, Textbox
12
 
13
+
14
+ def format_resume_to_yaml(api_key, file):
15
+ # Set the API key for OpenAI
16
+ os.environ['OPENAI_API_KEY'] = api_key
17
+
18
+ file_content = file.read()
19
+
20
+ # Check if the file content is not empty
21
+ if not file_content:
22
+ raise ValueError("The uploaded file is empty.")
23
+
24
+ # Save the uploaded file content to a temporary file
25
+ with tempfile.NamedTemporaryFile(delete=False, mode='wb+') as tmp_file:
26
+ tmp_file.write(file_content)
27
+ tmp_file.flush()
28
+ os.fsync(tmp_file.fileno()) # Ensure data is written to disk
29
+ temp_file_path = tmp_file.name
30
+
31
+ # Now we can use PyPDFLoader with the path to the temporary file
32
+ try:
33
+ loader = PyPDFLoader(temp_file_path)
34
+ docs = loader.load_and_split() # This will return a list of text chunks from the PDF
35
+ except (IOError, PyPDF2.errors.PdfReaderError) as e: # Handle potential PDF reading errors
36
+ raise ValueError(f"An error occurred while processing the PDF: {e}")
37
+
38
+ # Combine the text chunks into a single string
39
+ resume_text = " ".join(docs)
40
+
41
+ template = """Format the provided resume to this YAML template:
42
+ ---
43
+ name: ''
44
+ phoneNumbers:
45
+ - ''
46
+ websites:
47
+ - ''
48
+ emails:
49
+ - ''
50
+ dateOfBirth: ''
51
+ addresses:
52
+ - street: ''
53
+ city: ''
54
+ state: ''
55
+ zip: ''
56
+ country: ''
57
+ summary: ''
58
+ education:
59
+ - school: ''
60
+ degree: ''
61
+ fieldOfStudy: ''
62
+ startDate: ''
63
+ endDate: ''
64
+ workExperience:
65
+ - company: ''
66
+ position: ''
67
+ startDate: ''
68
+ endDate: ''
69
+ skills:
70
+ - name: ''
71
+ certifications:
72
+ - name: ''
73
+
74
+ {chat_history}
75
+ {human_input}"""
76
+
77
+ prompt = PromptTemplate(
78
+ input_variables=["chat_history", "human_input"],
79
+ template=template
80
+ )
81
+
82
+ memory = ConversationBufferMemory(memory_key="chat_history")
83
+
84
+ llm_chain = LLMChain(
85
+ llm=OpenAIChat(model="gpt-3.5-turbo"),
86
+ prompt=prompt,
87
+ verbose=True,
88
+ memory=memory,
89
+ )
90
+
91
+ res = llm_chain.predict(human_input=resume_text)
92
+ return res['output_text']
93
+
94
+ def on_file_upload(filename, file_content):
95
+ if not file_content:
96
+ gr.Interface.alert(title="Error", message="Please upload a valid PDF resume.")
97
+
98
+ def main():
99
+ input_api_key = Textbox(label="Enter your OpenAI API Key")
100
+ input_pdf_file = File(label="Upload your PDF resume")
101
+ output_yaml = Textbox(label="Formatted Resume in YAML")
102
+
103
+ iface = gr.Interface(
104
+ fn=format_resume_to_yaml,
105
+ inputs=[input_api_key, File(label="Upload your PDF resume", upload_event=on_file_upload)],
106
+ outputs=output_yaml,
107
+ title="Resume to YAML Formatter",
108
+ description="Upload a PDF resume and enter your OpenAI API key to get it formatted to a YAML template.",
 
 
 
 
 
 
 
 
109
  )
110
+
111
+ iface.launch(debug=True)
112
+
113
+
114
+ if __name__ == "__main__":
115
+ main()