from typing import List from llama_index.core.vector_stores import ( MetadataFilter, MetadataFilters, ) from llama_index.core.tools import QueryEngineTool, ToolMetadata from llama_index.agent.openai import OpenAIAgent from llama_index.llms.openai import OpenAI from llama_index.core.query_engine import CitationQueryEngine from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.multi_modal_llms.openai import OpenAIMultiModal from llama_index.core import Settings from core.chat.chatstore import ChatStore from core.multimodal import MultimodalQueryEngine from config import GPTBOT_CONFIG from core.prompt import SYSTEM_BOT_TEMPLATE, ADDITIONAL_INFORMATIONS,SYSTEM_BOT_GENERAL_TEMPLATE, SYSTEM_BOT_IMAGE_TEMPLATE from core.parser import join_list class Engine: def __init__(self): self.llm = OpenAI( temperature=GPTBOT_CONFIG.temperature, model=GPTBOT_CONFIG.model, max_tokens=GPTBOT_CONFIG.max_tokens, api_key=GPTBOT_CONFIG.api_key, ) self.chat_store = ChatStore() Settings.llm = self.llm embed_model = OpenAIEmbedding(model="text-embedding-3-large") Settings.embed_model = embed_model def get_citation_engine(self, titles:List, index): model_multimodal = OpenAIMultiModal(model="gpt-4o-mini", max_new_tokens=4096) filters = [ MetadataFilter( key="title", value=title, operator="==", ) for title in titles ] filters = MetadataFilters(filters=filters, condition="or") # Create the QueryEngineTool with the index and filters kwargs = {"similarity_top_k": 10, "filters": filters} retriever = index.as_retriever(**kwargs) # citation_engine = CitationQueryEngine(retriever=retriever) # return CitationQueryEngine.from_args(index, retriever=retriever) return MultimodalQueryEngine(retriever=retriever, multi_modal_llm=model_multimodal) def get_chat_engine(self, session_id, index, titles=None, type_bot="general"): # Create the QueryEngineTool based on the type if type_bot == "general": # query_engine = index.as_query_engine(similarity_top_k=3) # citation_engine = CitationQueryEngine.from_args(index, similarity_top_k=5) model_multimodal = OpenAIMultiModal(model="gpt-4o-mini", max_new_tokens=4096) retriever = index.as_retriever(similarity_top_k=10) citation_engine = MultimodalQueryEngine(retriever=retriever, multi_modal_llm=model_multimodal) # description = "A book containing information about medicine" else: citation_engine = self.get_citation_engine(titles, index) # description = "A book containing information about medicine" # metadata = ToolMetadata(name="bot-belajar", description=description) # vector_query_engine = QueryEngineTool( # query_engine=citation_engine, metadata=metadata # ) vector_tool = QueryEngineTool.from_defaults( query_engine=citation_engine, name="vector_tool", description=( "Useful for retrieving specific context from the data from a book containing information about medicine" ), ) # Initialize the OpenAI agent with the tools # if type_bot == "general": # system_prompt = SYSTEM_BOT_GENERAL_TEMPLATE # else: # additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles)) # system_prompt = SYSTEM_BOT_TEMPLATE.format(additional_information=additional_information) # chat_engine = OpenAIAgent.from_tools( # tools=[vector_query_engine], # llm=self.llm, # memory=self.chat_store.initialize_memory_bot(session_id), # system_prompt=system_prompt, # ) if type_bot == "general": system_prompt = SYSTEM_BOT_IMAGE_TEMPLATE else: additional_information = ADDITIONAL_INFORMATIONS.format(titles=join_list(titles)) system_prompt = SYSTEM_BOT_IMAGE_TEMPLATE.format(additional_information=additional_information) chat_engine = OpenAIAgent.from_tools( tools=[vector_tool], llm=self.llm, memory=self.chat_store.initialize_memory_bot(session_id), system_prompt=system_prompt, ) return chat_engine