datascientist22 commited on
Commit
709bbfd
·
verified ·
1 Parent(s): 0953464

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -32
app.py CHANGED
@@ -132,50 +132,53 @@ if st.button("Submit Query"):
132
  elif not url_input:
133
  st.warning("Please enter a valid URL in the sidebar.")
134
  else:
135
- # Blog loading logic based on user input URL
136
- loader = WebBaseLoader(
137
- web_paths=(url_input,), # Use the user-input URL
138
- bs_kwargs=dict(
139
- parse_only=bs4.SoupStrainer() # Adjust based on the user's URL structure
140
- ),
141
- )
142
- docs = loader.load()
 
143
 
144
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=300)
145
- splits = text_splitter.split_documents(docs)
146
 
147
- # Initialize the embedding model
148
- embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
149
 
150
- # Initialize Chroma with the embedding class
151
- vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
152
 
153
- # Retrieve and generate using the relevant snippets of the blog
154
- retriever = vectorstore.as_retriever()
155
 
156
- # Retrieve relevant documents
157
- retrieved_docs = retriever.get_relevant_documents(query)
158
 
159
- # Format the retrieved documents
160
- def format_docs(docs):
161
- return "\n\n".join(doc.page_content for doc in docs)
162
 
163
- context = format_docs(retrieved_docs)
164
 
165
- # Initialize the language model
166
- custom_llm = CustomLanguageModel()
167
 
168
- # Initialize RAG chain using the prompt
169
- prompt = RAGPrompt()
170
 
171
- # Apply the prompt directly to the data (no chaining using `|`)
172
- prompt_data = prompt({"question": query, "context": context})
173
 
174
- # Generate the response using the language model, focusing on the answer from the retrieved context
175
- result = custom_llm.generate(prompt_data["question"], prompt_data["context"])
176
 
177
- # Store query and response in session for chat history
178
- st.session_state['chat_history'].append((query, result))
 
 
179
 
180
  # Display chat history
181
  for q, r in st.session_state['chat_history']:
 
132
  elif not url_input:
133
  st.warning("Please enter a valid URL in the sidebar.")
134
  else:
135
+ try:
136
+ # Blog loading logic based on user input URL
137
+ loader = WebBaseLoader(
138
+ web_paths=(url_input,), # Use the user-input URL
139
+ bs_kwargs=dict(
140
+ parse_only=bs4.SoupStrainer() # Adjust based on the user's URL structure
141
+ ),
142
+ )
143
+ docs = loader.load()
144
 
145
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=300)
146
+ splits = text_splitter.split_documents(docs)
147
 
148
+ # Initialize the embedding model
149
+ embedding_model = SentenceTransformerEmbedding('all-MiniLM-L6-v2')
150
 
151
+ # Initialize Chroma with the embedding class
152
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
153
 
154
+ # Retrieve and generate using the relevant snippets of the blog
155
+ retriever = vectorstore.as_retriever()
156
 
157
+ # Retrieve relevant documents
158
+ retrieved_docs = retriever.get_relevant_documents(query)
159
 
160
+ # Format the retrieved documents
161
+ def format_docs(docs):
162
+ return "\n\n".join(doc.page_content for doc in docs)
163
 
164
+ context = format_docs(retrieved_docs)
165
 
166
+ # Initialize the language model
167
+ custom_llm = CustomLanguageModel()
168
 
169
+ # Initialize RAG chain using the prompt
170
+ prompt = RAGPrompt()
171
 
172
+ # Apply the prompt directly to the data (no chaining using `|`)
173
+ prompt_data = prompt({"question": query, "context": context})
174
 
175
+ # Generate the response using the language model, focusing on the answer from the retrieved context
176
+ result = custom_llm.generate(prompt_data["question"], prompt_data["context"])
177
 
178
+ # Store query and response in session for chat history
179
+ st.session_state['chat_history'].append((query, result))
180
+ except Exception as e:
181
+ st.error(f"An error occurred: {e}")
182
 
183
  # Display chat history
184
  for q, r in st.session_state['chat_history']: