# import streamlit as st # from dotenv import load_dotenv # from PyPDF2 import PdfReader # from langchain.text_splitter import RecursiveCharacterTextSplitter # from langchain.embeddings import HuggingFaceInstructEmbeddings # from langchain.vectorstores import FAISS # from langchain.memory import ConversationBufferMemory # from langchain.chains import ConversationalRetrievalChain # from htmlTemplates import css, bot_template, user_template # from langchain.llms import HuggingFaceHub # import psycopg2 # from pgvector import PGVector # # Database connection parameters # DB_HOST = "localhost" # DB_PORT = "5432" # DB_NAME = "chatbot" # DB_USER = "admin" # DB_PASSWORD = "admin" # #Function to establish a database connection # def connect_to_postgresql(): # return psycopg2.connect( # host=DB_HOST, # port=DB_PORT, # database=DB_NAME, # user=DB_USER, # password=DB_PASSWORD # ) # def store_embeddings_in_postgresql(text_chunks, conn): # """Function to store embeddings in PostgreSQL using pgvector""" # # Create a cursor # cursor = conn.cursor() # try: # # Create a table if not exists # cursor.execute(""" # CREATE TABLE IF NOT EXISTS embeddings ( # id SERIAL PRIMARY KEY, # vector PG_VECTOR # ) # """) # # Insert embeddings into the table # for text_chunk in text_chunks: # # To store embeddings in a 'vector' column in 'embeddings' table # cursor.execute("INSERT INTO embeddings (vector) VALUES (PG_VECTOR(%s))", (text_chunk,)) # # Commit the transaction # conn.commit() # st.success("Embeddings stored successfully in PostgreSQL.") # except Exception as e: # # Rollback in case of an error # conn.rollback() # st.error(f"Error storing embeddings in PostgreSQL: {str(e)}") # finally: # # Close the cursor # cursor.close() # def create_index_in_postgresql(conn): # """Function to create an index on the stored vectors using HNSW or IVFFIT""" # # Create a cursor # cursor = conn.cursor() # try: # # Create an index if not exists # cursor.execute(""" # CREATE INDEX IF NOT EXISTS embeddings_index # ON embeddings # USING ivfflat (vector) # """) # # Commit the transaction # conn.commit() # st.success("Index created successfully in PostgreSQL.") # except Exception as e: # # Rollback in case of an error # conn.rollback() # st.error(f"Error creating index in PostgreSQL: {str(e)}") # finally: # # Close the cursor # cursor.close() # def get_pdf_text(pdf): # """Upload pdf files and extract text""" # text = "" # pdf_reader = PdfReader(pdf) # for page in pdf_reader.pages: # text += page.extract_text() # return text # def get_files(text_doc): # """Upload text files and extraxt text""" # text ="" # for file in text_doc: # print(text) # if file.type == "text/plain": # # Read the text directly from the file # text += file.getvalue().decode("utf-8") # elif file.type == "application/pdf": # text += get_pdf_text(file) # return text # def get_text_chunks(text): # """Create chunks of the extracted text""" # text_splitter = RecursiveCharacterTextSplitter( # chunk_size=900, # chunk_overlap=0, # separators="\n", # add_start_index = True, # length_function= len # ) # chunks = text_splitter.split_text(text) # return chunks # def get_vectorstore(text_chunks, conn): # """Create embeddings for the chunks and store them in a vectorstore""" # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl") # vectorstore = PGVector.from_texts(texts=text_chunks, embedding=embeddings, connection=conn) # return vectorstore # def get_conversation_chain(vectorstore): # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.2, "max_length":1024}) # memory = ConversationBufferMemory( # memory_key='chat_history', return_messages=True) # conversation_chain = ConversationalRetrievalChain.from_llm( # llm=llm, # retriever=vectorstore.as_retriever(), # memory=memory # ) # return conversation_chain # def handle_userinput(user_question): # response = st.session_state.conversation({'question': user_question}) # st.session_state.chat_history = response['chat_history'] # for i, message in enumerate(st.session_state.chat_history): # if i % 2 == 0: # st.write(user_template.replace( # "{{MSG}}", message.content), unsafe_allow_html=True) # else: # st.write(bot_template.replace( # "{{MSG}}", message.content), unsafe_allow_html=True) # def main(): # load_dotenv() # st.set_page_config(page_title="ChatBot") # st.write(css, unsafe_allow_html=True) # if "conversation" not in st.session_state: # st.session_state.conversation = None # if "chat_history" not in st.session_state: # st.session_state.chat_history = None # # Connect to PostgreSQL # conn = connect_to_postgresql() # st.header("Chat Bot") # user_question = st.text_input("Ask a question:") # if user_question: # handle_userinput(user_question, conn) # with st.sidebar: # st.subheader("Your documents") # pdf_docs = st.file_uploader( # "Upload your PDFs here and click on 'Process'", accept_multiple_files=True) # if st.button("Process"): # with st.spinner("Processing"): # # get text # raw_text = get_files(pdf_docs) # # get the text chunks # text_chunks = get_text_chunks(raw_text) # # store embeddings in PostgreSQL # store_embeddings_in_postgresql(text_chunks, conn) # # create vector store # vectorstore = get_vectorstore(text_chunks, conn) # # create index in PostgreSQL # create_index_in_postgresql(conn) # # create conversation chain # st.session_state.conversation = get_conversation_chain( # vectorstore) # if __name__ == '__main__': # main()