File size: 3,809 Bytes
198bfc2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import os
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.runnables import ConfigurableFieldSpec
from langchain_community.chat_message_histories import PostgresChatMessageHistory
from langchain_openai import OpenAIEmbeddings
from langchain_postgres.vectorstores import PGVector
from langchain_community.vectorstores import Pinecone
import logging
from dotenv import load_dotenv
load_dotenv()
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
os.environ["OPENAI_API_KEY"] =os.getenv("OPENAI_API_KEY")
POSTGRES_URL = os.getenv("POSTGRES_URL")
def create_postgres_chat_message_history(session_id, user_id):
return PostgresChatMessageHistory(connection_string=POSTGRES_URL,session_id=session_id)
def prepare_prompt_and_chain_with_history():
llm = ChatOpenAI(model="gpt-4o")
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"""You are an expert in data that hepls to summerize the graph and extract information from that images. Answer the question based only on the following context, which can include text, images and tables: "
** IMPORTANT INSTRUCTIONS -->
1. Reply in 50 words maximum.
2. Only answer the question related to the context else say you don't know the answer.""",
),
"Here is the context to answer user's questions everytime --> {data}. ",
MessagesPlaceholder(variable_name="history"),
("user", "{input}"),
]
)
runnable = prompt | llm
with_message_history = RunnableWithMessageHistory(
runnable,
create_postgres_chat_message_history,
input_messages_key="input",
history_messages_key="history",
history_factory_config=[
ConfigurableFieldSpec(
id="user_id",
annotation=str,
name="User ID",
description="Unique identifier for the user.",
default="",
is_shared=True,
),
ConfigurableFieldSpec(
id="session_id",
annotation=str,
name="Session ID",
description="Unique identifier for the conversation.",
default="",
is_shared=True,
),
],
verbose=True,
)
return with_message_history
def get_vectorstore_from_postgres(collection_name):
openai_ef = OpenAIEmbeddings()
vectorstore = PGVector(
embeddings=openai_ef,
collection_name=collection_name,
connection=POSTGRES_URL,
use_jsonb=True,
)
return vectorstore
def get_vectorstore_from_pinecone(index_name):
openai_ef = OpenAIEmbeddings()
vectorstore = Pinecone.from_existing_index(index_name, openai_ef)
return vectorstore
def get_context_from_vectorstore(vectorstore,user_query):
logging.info("Start postgres vector search......")
relevant_docs = vectorstore.similarity_search(user_query,k=4)
logging.info(relevant_docs)
context = ""
relevant_images = []
for d in relevant_docs:
if d.metadata['type'] == 'text':
context += '[text]' + d.metadata['original_content']
elif d.metadata['type'] == 'table':
context += '[table]' + d.metadata['original_content']
elif d.metadata['type'] == 'image':
context += '[image]' + d.page_content
relevant_images.append(d.metadata['original_content'])
return context |