import streamlit as st import os import tempfile # from pypdf import PdfReader, PdfWriter # from pdf2image import convert_from_path from langchain_community.document_loaders import PyPDFLoader from langchain.prompts import PromptTemplate from langchain_groq import ChatGroq from langchain.chains.llm import LLMChain import tempfile import markdown2 from weasyprint import HTML from io import BytesIO def format_string(input_string): # Find the index of the first occurrence of ":" index = input_string.find(":") # Check if ":" is found if index != -1: # Extract the substring starting from the found index to the end substring = input_string[(index+1):] else: # If ":" is not found, return an empty string or an appropriate message substring = input_string return substring def save_uploaded_file(uploadedfile): # Create a temporary directory to save the file temp_dir = tempfile.gettempdir() save_path = os.path.join(temp_dir, uploadedfile.name) with open(save_path, "wb") as f: f.write(uploadedfile.getbuffer()) return save_path def read_pdf(file_path): # Dummy processing: copying the original PDF content to a new PDF loader = PyPDFLoader(file_path) pages = loader.load_and_split() text = "" for page in pages: text = text + " "+page.page_content+ "\n\n" return text def generate_pdf_from_markup(markup_text): # Convert Markdown to HTML html_content = markdown2.markdown(markup_text) # Create a temporary file to save the PDF temp_dir = tempfile.gettempdir() pdf_path = os.path.join(temp_dir, "generated.pdf") # Convert HTML to PDF HTML(string=html_content).write_pdf(pdf_path) return pdf_path def parse_resume(data): llm = ChatGroq(api_key=os.getenv("GROQ_API_KEY"),model="llama3-70b-8192") system_prompt = """ You are an AI assistant designed to remove and format resume data. When provided with extracted text from a PDF resume, your task is to remove personal information and certain details while maintaining the professional content and structure. Follow the guidelines below: Keep projects, experience, technical skills as it is without any change. Remove Salutations: Mr, Mrs, Ms, etc. Remove Names: All instances of the candidate's names. Remove Gender: Any mention of gender. Remove Age/D.O.B./Astrology Info: Any references to age, date of birth, or astrological signs. Remove Links of personal accounts for example: exail id, github url, linkedin url and all the other urls except the project and experience urls. Remove email address, mobile number, or any other information that has personal identity. Anonymize Location: Replace specific locations with more general terms (e.g., "Willing to relocate, currently based in Leicester"). Anonymize Education Institutions: Replace the names of educational institutions/schools with "top university (e.g. highly reputable university on the global stage) or top school" if applicable. Anonymize Language Skills: Replace specific languages with regional groupings for multilingual candidates (e.g., "proficient in multiple European languages"). Remove Hobbies and INTERESTS: Remove specific details related to hobbies and interests Anonymize Other Fields: Make specific removals as needed to protect the candidate's identity. Remove professional summary, objective, agenda and all these type of sections. Keep only related skills ACHIEVEMENTS, awards and certificate which are writen by you. Ensure the remaining sections and information are formatted properly to maintain the professional appearance of the resume. Ensure proper formatting of the resume with proper content justifications, add markdown, add bullet points and spacing wherever required. Return the output of resume content only. Don't include any notes or comments. """ # Remove achievment, awards and certifactes that are not related to professional work. user_prompt_template = """ {resume_text} """ prompt_template = PromptTemplate( input_variables=["resume_text"], template=system_prompt + user_prompt_template ) anonymize_chain = LLMChain( llm=llm, prompt=prompt_template ) response=anonymize_chain.invoke(data) return response def handle_pdf(file_path): with st.spinner("Parsing Resume..."): data = read_pdf(file_path) modified_data = parse_resume(data) formatted_data = format_string(modified_data["text"]) st.write(formatted_data) pdf_path = "" print("Formatted text generated") print(formatted_data) if formatted_data: # if st.button("Generate PDF"): print("Button Clicked") # Add spinner while generating the PDF with st.spinner("Generating PDF..."): # Generate the PDF from markup text pdf_path = generate_pdf_from_markup(formatted_data) st.success("PDF generated successfully.") # Show the preview of the first page of the PDF with open(pdf_path, "rb") as f: pdf_bytes = f.read() st.download_button( label="Download PDF", data=pdf_bytes, file_name="generated.pdf", mime="application/pdf" ) print("AT LAST") def main(): st.title("Resume Parser") option = st.radio( "Choose an option:", ("Use Demo PDF", "Browse Files"), ) if option == "Use Demo PDF": demo_pdf_path = "demo.pdf" st.info("You have selected the demo PDF.") if st.button("Click to go with Demo pdf"): handle_pdf(demo_pdf_path) elif option == "Browse Files": uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") if uploaded_file is not None: original_file_path = save_uploaded_file(uploaded_file) st.success(f"File saved at {original_file_path}") handle_pdf(original_file_path) if __name__ == "__main__": main()