import os import requests import gradio as gr from openai import OpenAI import logging # Configure logging logging.basicConfig(level=logging.INFO) # Fetch API keys from environment variables OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") PROXYCURL_API_KEY = os.getenv("PROXYCURL_API_KEY") FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY") # Function to sanitize and validate data def sanitize_data(data, default_value=""): return data.strip() if isinstance(data, str) and data.strip() else default_value # Function to fetch LinkedIn data using the Proxycurl API def fetch_linkedin_data(linkedin_url): api_key = os.getenv("PROXYCURL_API_KEY") headers = {'Authorization': f'Bearer {api_key}'} api_endpoint = 'https://nubela.co/proxycurl/api/v2/linkedin' logging.info("Fetching LinkedIn data...") try: response = requests.get(api_endpoint, params={'url': linkedin_url}, headers=headers, timeout=10) if response.status_code == 200: logging.info("LinkedIn data fetched successfully.") return response.json() else: logging.error(f"Error fetching LinkedIn data: {response.text}") return None except Exception as e: logging.error(f"Exception during LinkedIn data fetch: {e}") return None # Function to fetch company information using Firecrawl API def fetch_company_info(company_url): api_key = os.getenv("FIRECRAWL_API_KEY") headers = { 'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json' } api_endpoint = 'https://api.firecrawl.dev/v1/crawl' data = { "url": company_url, "limit": 100, "scrapeOptions": { "formats": ["markdown", "html"] } } logging.info("Fetching company information...") try: response = requests.post(api_endpoint, json=data, headers=headers, timeout=15) if response.status_code == 200: logging.info("Company information fetched successfully.") return response.json() else: logging.error(f"Error fetching company information: {response.text}") return None except Exception as e: logging.error(f"Exception during company info fetch: {e}") return None # Function to structure the email dynamically with fallback for missing data def structure_email(user_data, linkedin_info, company_info): linkedin_role = sanitize_data(linkedin_info.get('current_role', user_data['role'])) linkedin_skills = sanitize_data(linkedin_info.get('skills', 'relevant skills')) linkedin_industry = sanitize_data(linkedin_info.get('industry', 'the industry')) company_name = sanitize_data(user_data['company_url'] or company_info.get('company_name', 'the company')) company_mission = sanitize_data(company_info.get('mission', f"{company_name}'s mission")) company_goal = sanitize_data(company_info.get('goal', 'achieving excellence')) # If essential data is missing, fill with defaults to ensure email has some content if not linkedin_role: linkedin_role = user_data['role'] if not linkedin_skills: linkedin_skills = "skills relevant to this position" if not linkedin_industry: linkedin_industry = "the industry" if not company_mission: company_mission = f"{company_name}'s mission" if not company_goal: company_goal = "the company's goals" # Construct the email with fully sanitized and available data email_body = ( f"Dear Hiring Manager,\n\n" f"I am writing to express my interest in the {sanitize_data(user_data['role'])} position at {company_name}. " f"{company_mission} aligns closely with my professional experience in {linkedin_industry}. " f"As a {linkedin_role}, I have developed expertise in {linkedin_skills}, which are highly relevant to this role.\n\n" f"My background in {linkedin_skills} will contribute significantly to {company_goal}. " f"I am eager to bring my expertise to {company_name} and collaborate with your team.\n\n" f"I would appreciate the opportunity to discuss how my background aligns with the needs of your organization. " f"Thank you for your time and consideration. I look forward to the possibility of contributing to your team.\n\n" f"Best regards,\n{sanitize_data(user_data['name'])}" ) return email_body # Function to validate the generated email based on critical components with improved flexibility def validate_email(email_content, user_data): logging.info("Validating email content...") # Basic components we want to check in the email required_keywords = [ user_data['name'], user_data['role'], "skills", "experience", "contribute", "Best regards" ] # Check if the email contains all the required elements, allow some flexibility missing_elements = [keyword for keyword in required_keywords if keyword.lower() not in email_content.lower()] if missing_elements: logging.info(f"Missing elements: {missing_elements}") return False else: logging.info("Email content validation passed.") return True # Custom Agent class following ReAct pattern class Agent: def __init__(self, name, instructions, user_data): self.name = name self.instructions = instructions self.user_data = user_data def act(self): if self.name == "Data Collection Agent": linkedin_info = fetch_linkedin_data(self.user_data['linkedin_url']) company_info = fetch_company_info(self.user_data['company_url']) if linkedin_info and company_info: return linkedin_info, company_info else: return None, None elif self.name == "Email Generation Agent": linkedin_info = self.user_data['linkedin_info'] company_info = self.user_data['company_info'] prompt = structure_email(self.user_data['user_data'], linkedin_info, company_info) return prompt # Simulated Swarm class to manage multiple agents class Swarm: def __init__(self): self.agents = [] def add_agent(self, agent): self.agents.append(agent) def run(self): # The data collection agent acts first linkedin_info, company_info = self.agents[0].act() if not linkedin_info or not company_info: return "Error: Could not retrieve data for LinkedIn or company information." return linkedin_info, company_info # Function to run the agent, using Swarm and ReAct def run_agent(name, email, phone, linkedin_url, company_url, role): user_data = { "name": name, "email": email, "phone": phone, "linkedin_url": linkedin_url, "company_url": company_url, "role": role } # Initialize Swarm and add the Data Collection Agent email_swarm = Swarm() data_collection_agent = Agent("Data Collection Agent", "Collect user inputs and relevant data", user_data) email_swarm.add_agent(data_collection_agent) linkedin_info, company_info = email_swarm.run() if isinstance(linkedin_info, str): return linkedin_info agent_data = { "user_data": user_data, "linkedin_info": linkedin_info, "company_info": company_info } email_agent = Agent("Email Generation Agent", "Generate the email content", agent_data) email_content = email_agent.act() # Iterative refinement using ReAct pattern max_iterations = 3 for i in range(max_iterations): if validate_email(email_content, user_data): return email_content else: logging.info(f"Iteration {i+1}: Refining email...") email_content = structure_email(user_data, linkedin_info, company_info) return "Unable to generate a valid email after 3 attempts." # Set up the Gradio interface final_interface = gr.Interface( fn=run_agent, inputs=[ gr.Textbox(label="Name"), gr.Textbox(label="Email"), gr.Textbox(label="Phone Number"), gr.Textbox(label="LinkedIn Profile URL"), gr.Textbox(label="Company URL or Name"), gr.Textbox(label="Role Being Applied For") ], outputs="text", title="Email Writing AI Agent", description="Autonomously generate a professional email tailored to the job application." ) if __name__ == "__main__": final_interface.launch()