Spaces:
Sleeping
Sleeping
import gradio as gr | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.llms import OpenAIChat | |
from langchain.chains import LLMChain | |
from langchain.memory import ConversationBufferMemory | |
from langchain import PromptTemplate | |
import os | |
import tempfile | |
# Updated imports for Gradio components | |
from gradio.components import File, Textbox | |
def format_resume_to_yaml(api_key, file): | |
# Set the API key for OpenAI | |
os.environ['OPENAI_API_KEY'] = api_key | |
file_content = file.read() | |
# Check if the file content is not empty | |
if not file_content: | |
raise ValueError("The uploaded file is empty.") | |
# Save the uploaded file content to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, mode='wb+') as tmp_file: | |
tmp_file.write(file_content) | |
tmp_file.flush() | |
os.fsync(tmp_file.fileno()) # Ensure data is written to disk | |
temp_file_path = tmp_file.name | |
# Now we can use PyPDFLoader with the path to the temporary file | |
try: | |
loader = PyPDFLoader(temp_file_path) | |
docs = loader.load_and_split() # This will return a list of text chunks from the PDF | |
except (IOError, PyPDF2.errors.PdfReaderError) as e: # Handle potential PDF reading errors | |
raise ValueError(f"An error occurred while processing the PDF: {e}") | |
# Combine the text chunks into a single string | |
resume_text = " ".join(docs) | |
template = """Format the provided resume to this YAML template: | |
--- | |
name: '' | |
phoneNumbers: | |
- '' | |
websites: | |
- '' | |
emails: | |
- '' | |
dateOfBirth: '' | |
addresses: | |
- street: '' | |
city: '' | |
state: '' | |
zip: '' | |
country: '' | |
summary: '' | |
education: | |
- school: '' | |
degree: '' | |
fieldOfStudy: '' | |
startDate: '' | |
endDate: '' | |
workExperience: | |
- company: '' | |
position: '' | |
startDate: '' | |
endDate: '' | |
skills: | |
- name: '' | |
certifications: | |
- name: '' | |
{chat_history} | |
{human_input}""" | |
prompt = PromptTemplate( | |
input_variables=["chat_history", "human_input"], | |
template=template | |
) | |
memory = ConversationBufferMemory(memory_key="chat_history") | |
llm_chain = LLMChain( | |
llm=OpenAIChat(model="gpt-3.5-turbo"), | |
prompt=prompt, | |
verbose=True, | |
memory=memory, | |
) | |
res = llm_chain.predict(human_input=resume_text) | |
return res['output_text'] | |
def on_file_upload(filename, file_content): | |
if not file_content: | |
gr.Interface.alert(title="Error", message="Please upload a valid PDF resume.") | |
def main(): | |
input_api_key = Textbox(label="Enter your OpenAI API Key") | |
# Use 'binary' type to receive the file's content directly as a binary object | |
input_pdf_file = File(label="Upload your PDF resume", type="binary") | |
output_yaml = Textbox(label="Formatted Resume in YAML") | |
iface = gr.Interface( | |
fn=format_resume_to_yaml, | |
inputs=[input_api_key, input_pdf_file], | |
outputs=output_yaml, | |
title="Resume to YAML Formatter", | |
description="Upload a PDF resume and enter your OpenAI API key to get it formatted to a YAML template.", | |
) | |
iface.launch(debug=True) | |
if __name__ == "__main__": | |
main() | |