from swarmai.agents.AgentBase import AgentBase from swarmai.utils.ai_engines import LanchainGoogleEngine, GPTConversEngine from swarmai.utils.task_queue.Task import Task from swarmai.utils.PromptFactory import PromptFactory from langchain.utilities import ApifyWrapper class CrunchbaseSearcher(AgentBase): """Very custom agent that can search for companies on Crunchbase and analyse them. """ def __init__(self, agent_id, agent_type, swarm, logger): super().__init__(agent_id, agent_type, swarm, logger) self.search_engine = LanchainGoogleEngine("gpt-3.5-turbo", 0.5, 1000) self.thinking_engine = GPTConversEngine("gpt-3.5-turbo", 0.5, 1000) self.TASK_METHODS = { Task.TaskTypes.crunchbase_search: self.domain_specific_search, } self.apify_engine = ApifyWrapper() def perform_task(self): self.step = "perform_task" try: # self.task is already taken in the beginning of the cycle in AgentBase if not isinstance(self.task, Task): raise Exception(f"Task is not of type Task, but {type(self.task)}") task_type = self.task.task_type if task_type not in self.TASK_METHODS: raise Exception(f"Task type {task_type} is not supported by the agent {self.agent_id} of type {self.agent_type}") self.result = self.TASK_METHODS[task_type](self.task.task_description) return True except Exception as e: self.log(message = f"Agent {self.agent_id} of type {self.agent_type} failed to perform the task {self.task.task_description} with error {e}", level = "error") return False def share(self): pass def domain_specific_search(self, task_description): self.step = "crunchbase_search" prompt = ( f"based on the task description:\n{task_description}\n\ngenerate a short google search query under 5 words to find relevant companies on Crunchbase" ) conversation = [ {"role": "user", "content": prompt}, ] search_query = self.thinking_engine.call_model(conversation) # remove ", \n, \t, ', from the search query search_query = search_query.lower().replace('"', "").replace("\n", "").replace("\t", "").replace("'", "").replace("’", "").replace("crunchbase", "") search_query += " site:crunchbase.com/organization" # getting the relevant links: sources = self.search_engine.search_sources(search_query, n=5) if len(sources) == 0: self.log(message = f"Agent {self.agent_id} of type {self.agent_type} failed to find any relevant links for the task {task_description}", level = "error") return None if 'Result' in sources[0]: if sources[0]['Result'] == 'No good Google Search Result was found': self.log(message = f"Agent {self.agent_id} of type {self.agent_type} failed to find any relevant links for the task {task_description}", level = "error") return None links = [item["link"] for item in sources] company_infos = "" for link in links: company_infos += self._get_crunchbase_data(link) self._send_data_to_swarm(company_infos) self.log(message = f"Agent {self.agent_id} of type {self.agent_type} search:\n{task_description}\n\nand got:\n{company_infos}", level = "info") return company_infos def _get_crunchbase_data(self, url): loader = self.apify_engine.call_actor( actor_id="epctex/crunchbase-scraper", run_input={"startUrls": [url],"proxy": { "useApifyProxy": True },}, dataset_mapping_function=self._crunchbase_dataset_mapping_function ) return loader.load().__repr__() def _crunchbase_dataset_mapping_function(self, parsed_data): mapped_data = {} # Mapping properties properties = parsed_data.get("properties", {}) identifier = properties.get("identifier", {}) cards = parsed_data.get("cards", {}) company = cards.get("company_about_fields2", {}) funding_summary = parsed_data.get("cards", {}).get("funding_rounds_summary", {}) funding_total = funding_summary.get("funding_total", {}) mapped_data["title"] = properties.get("title") mapped_data["short_description"] = properties.get("short_description") mapped_data["website"] = company.get("website", {}).get("value") mapped_data["country"] = None for location in company.get("location_identifiers", []): if location.get("location_type") == "country": mapped_data["country"] = location.get("value") break mapped_data["value_usd"] = funding_total.get("value_usd") # Mapping cards cards = parsed_data.get("cards", {}) return mapped_data