datascientist22 commited on
Commit
4e001cd
·
verified ·
1 Parent(s): df535b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -55
app.py CHANGED
@@ -1,6 +1,18 @@
1
  import streamlit as st
2
  import re
3
  import os
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  # Sidebar Style with Multicolored Background
6
  sidebar_bg_style = """
@@ -81,61 +93,66 @@ if 'chat_history' not in st.session_state:
81
 
82
  # Submit button for chat
83
  if st.button("Submit Query"):
84
- if query and url_input:
85
- # Blog loading logic based on user input URL
86
- loader = WebBaseLoader(
87
- web_paths=(url_input,), # Use the user-input URL
88
- bs_kwargs=dict(
89
- parse_only=bs4.SoupStrainer() # Adjust based on the user's URL structure
90
- ),
91
- )
92
- docs = loader.load()
93
-
94
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
95
- splits = text_splitter.split_documents(docs)
96
-
97
- # Define the embedding class
98
- class SentenceTransformerEmbedding:
99
- def __init__(self, model_name):
100
- self.model = SentenceTransformer(model_name)
101
-
102
- def embed_documents(self, texts):
103
- embeddings = self.model.encode(texts, convert_to_tensor=True)
104
- if isinstance(embeddings, torch.Tensor):
105
- return embeddings.cpu().detach().numpy().tolist() # Convert tensor to list
106
- return embeddings
107
-
108
- def embed_query(self, query):
109
- embedding = self.model.encode([query], convert_to_tensor=True)
110
- if isinstance(embedding, torch.Tensor):
111
- return embedding.cpu().detach().numpy().tolist()[0] # Convert tensor to list
112
- return embedding[0]
113
-
114
- # Initialize the embedding model
115
- embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
116
-
117
- # Initialize Chroma with the embedding class
118
- vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
119
-
120
- # Retrieve and generate using the relevant snippets of the blog
121
- retriever = vectorstore.as_retriever()
122
- prompt = hub.pull("rlm/rag-prompt")
123
-
124
- def format_docs(docs):
125
- return "\n\n".join(doc.page_content for doc in docs)
126
-
127
- rag_chain = (
128
- {"context": retriever | format_docs, "question": RunnablePassthrough()}
129
- | prompt
130
- | ChatGroq(model="llama3-8b-8192") # Replace `llm` with an appropriate language model
131
- | StrOutputParser()
132
- )
133
-
134
- # Generate the answer using the user's query
135
- result = rag_chain.invoke(query)
136
-
137
- # Store query and response in session for chat history
138
- st.session_state['chat_history'].append((query, result))
 
 
 
 
 
139
 
140
  # Display chat history
141
  for q, r in st.session_state['chat_history']:
 
1
  import streamlit as st
2
  import re
3
  import os
4
+ from langchain.chains import ConversationalRetrievalChain
5
+ from langchain.document_loaders import WebBaseLoader
6
+ from langchain.embeddings import SentenceTransformerEmbedding
7
+ from langchain.vectorstores import Chroma
8
+ from langchain.prompts import load_prompt
9
+ from langchain.chat_models import ChatGroq
10
+ from langchain.output_parsers import StrOutputParser
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ from langchain.runnables import RunnablePassthrough
13
+ import torch
14
+ from sentence_transformers import SentenceTransformer
15
+ import bs4
16
 
17
  # Sidebar Style with Multicolored Background
18
  sidebar_bg_style = """
 
93
 
94
  # Submit button for chat
95
  if st.button("Submit Query"):
96
+ if query:
97
+ if url_input:
98
+ # Blog loading logic based on user input URL
99
+ loader = WebBaseLoader(
100
+ web_paths=(url_input,), # Use the user-input URL
101
+ bs_kwargs=dict(
102
+ parse_only=bs4.SoupStrainer() # Adjust based on the user's URL structure
103
+ ),
104
+ )
105
+ docs = loader.load()
106
+
107
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
108
+ splits = text_splitter.split_documents(docs)
109
+
110
+ # Define the embedding class
111
+ class SentenceTransformerEmbedding:
112
+ def __init__(self, model_name):
113
+ self.model = SentenceTransformer(model_name)
114
+
115
+ def embed_documents(self, texts):
116
+ embeddings = self.model.encode(texts, convert_to_tensor=True)
117
+ if isinstance(embeddings, torch.Tensor):
118
+ return embeddings.cpu().detach().numpy().tolist() # Convert tensor to list
119
+ return embeddings
120
+
121
+ def embed_query(self, query):
122
+ embedding = self.model.encode([query], convert_to_tensor=True)
123
+ if isinstance(embedding, torch.Tensor):
124
+ return embedding.cpu().detach().numpy().tolist()[0] # Convert tensor to list
125
+ return embedding[0]
126
+
127
+ # Initialize the embedding model
128
+ embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
129
+
130
+ # Initialize Chroma with the embedding class
131
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
132
+
133
+ # Retrieve and generate using the relevant snippets of the blog
134
+ retriever = vectorstore.as_retriever()
135
+ prompt = load_prompt("rlm/rag-prompt")
136
+
137
+ def format_docs(docs):
138
+ return "\n\n".join(doc.page_content for doc in docs)
139
+
140
+ rag_chain = (
141
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
142
+ | prompt
143
+ | ChatGroq(model="llama3-8b-8192") # Replace `llm` with an appropriate language model
144
+ | StrOutputParser()
145
+ )
146
+
147
+ # Generate the answer using the user's query
148
+ result = rag_chain.invoke(query)
149
+
150
+ # Store query and response in session for chat history
151
+ st.session_state['chat_history'].append((query, result))
152
+ else:
153
+ st.warning("Please enter a valid URL.")
154
+ else:
155
+ st.warning("Please enter a question.")
156
 
157
  # Display chat history
158
  for q, r in st.session_state['chat_history']: