File size: 6,560 Bytes
060c9d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# import streamlit as st
# from dotenv import load_dotenv
# from PyPDF2 import PdfReader
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.embeddings import HuggingFaceInstructEmbeddings
# from langchain.vectorstores import FAISS
# from langchain.memory import ConversationBufferMemory
# from langchain.chains import ConversationalRetrievalChain
# from htmlTemplates import css, bot_template, user_template
# from langchain.llms import HuggingFaceHub
# import psycopg2
# from pgvector import PGVector


# # Database connection parameters
# DB_HOST = "localhost"
# DB_PORT = "5432"
# DB_NAME = "chatbot"
# DB_USER = "admin"
# DB_PASSWORD = "admin"


# #Function to establish a database connection
# def connect_to_postgresql():
#     return psycopg2.connect(
#         host=DB_HOST,
#         port=DB_PORT,
#         database=DB_NAME,
#         user=DB_USER,
#         password=DB_PASSWORD
#     )


# def store_embeddings_in_postgresql(text_chunks, conn):
#     """Function to store embeddings in PostgreSQL using pgvector"""

#     # Create a cursor
#     cursor = conn.cursor()

#     try:
#         # Create a table if not exists
#         cursor.execute("""
#             CREATE TABLE IF NOT EXISTS embeddings (
#                 id SERIAL PRIMARY KEY,
#                 vector PG_VECTOR
#             )
#         """)

#         # Insert embeddings into the table
#         for text_chunk in text_chunks:
#             # To store embeddings in a 'vector' column in 'embeddings' table
#             cursor.execute("INSERT INTO embeddings (vector) VALUES (PG_VECTOR(%s))", (text_chunk,))

#         # Commit the transaction
#         conn.commit()
#         st.success("Embeddings stored successfully in PostgreSQL.")
#     except Exception as e:
#         # Rollback in case of an error
#         conn.rollback()
#         st.error(f"Error storing embeddings in PostgreSQL: {str(e)}")
#     finally:
#         # Close the cursor
#         cursor.close()


# def create_index_in_postgresql(conn):
#     """Function to create an index on the stored vectors using HNSW or IVFFIT"""

#     # Create a cursor
#     cursor = conn.cursor()

#     try:
#         # Create an index if not exists
#         cursor.execute("""
#             CREATE INDEX IF NOT EXISTS embeddings_index
#             ON embeddings
#             USING ivfflat (vector)
#         """)

#         # Commit the transaction
#         conn.commit()
#         st.success("Index created successfully in PostgreSQL.")
#     except Exception as e:
#         # Rollback in case of an error
#         conn.rollback()
#         st.error(f"Error creating index in PostgreSQL: {str(e)}")
#     finally:
#         # Close the cursor
#         cursor.close()


# def get_pdf_text(pdf):
#     """Upload pdf files and extract text"""
#     text = ""
#     pdf_reader = PdfReader(pdf)
#     for page in pdf_reader.pages:
#         text += page.extract_text()
#     return text


# def get_files(text_doc):
#     """Upload text files and extraxt text"""
#     text =""
#     for file in text_doc:
#         print(text)
#         if file.type == "text/plain":
#             # Read the text directly from the file
#             text += file.getvalue().decode("utf-8")
#         elif file.type == "application/pdf":
#             text += get_pdf_text(file)
#     return text

            
# def get_text_chunks(text):
#     """Create chunks of the extracted text"""
#     text_splitter = RecursiveCharacterTextSplitter(
#         chunk_size=900,
#         chunk_overlap=0,
#         separators="\n",
#         add_start_index = True,
#         length_function= len
#     )
#     chunks = text_splitter.split_text(text)
#     return chunks


# def get_vectorstore(text_chunks, conn):
#     """Create embeddings for the chunks and store them in a vectorstore"""
#     embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
#     vectorstore = PGVector.from_texts(texts=text_chunks, embedding=embeddings, connection=conn)
#     return vectorstore


# def get_conversation_chain(vectorstore):
#     llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.2, "max_length":1024})

#     memory = ConversationBufferMemory(
#         memory_key='chat_history', return_messages=True)
#     conversation_chain = ConversationalRetrievalChain.from_llm(
#         llm=llm,
#         retriever=vectorstore.as_retriever(),
#         memory=memory
#     )
#     return conversation_chain


# def handle_userinput(user_question):
#     response = st.session_state.conversation({'question': user_question})
#     st.session_state.chat_history = response['chat_history']

#     for i, message in enumerate(st.session_state.chat_history):
#         if i % 2 == 0:
#             st.write(user_template.replace(
#                 "{{MSG}}", message.content), unsafe_allow_html=True)
#         else:
#             st.write(bot_template.replace(
#                 "{{MSG}}", message.content), unsafe_allow_html=True)
            

# def main():
#     load_dotenv()
#     st.set_page_config(page_title="ChatBot")
#     st.write(css, unsafe_allow_html=True)

#     if "conversation" not in st.session_state:
#         st.session_state.conversation = None
#     if "chat_history" not in st.session_state:
#         st.session_state.chat_history = None

#     # Connect to PostgreSQL
#     conn = connect_to_postgresql()

#     st.header("Chat Bot")
#     user_question = st.text_input("Ask a question:")
#     if user_question:
#         handle_userinput(user_question, conn)

#     with st.sidebar:
#         st.subheader("Your documents")
#         pdf_docs = st.file_uploader(
#             "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
#         if st.button("Process"):
#             with st.spinner("Processing"):
#                 # get text
#                 raw_text = get_files(pdf_docs)

#                 # get the text chunks
#                 text_chunks = get_text_chunks(raw_text)

#                 # store embeddings in PostgreSQL
#                 store_embeddings_in_postgresql(text_chunks, conn)

#                 # create vector store
#                 vectorstore = get_vectorstore(text_chunks, conn)

#                 # create index in PostgreSQL
#                 create_index_in_postgresql(conn)

#                 # create conversation chain
#                 st.session_state.conversation = get_conversation_chain(
#                     vectorstore)

# if __name__ == '__main__':
#     main()