Shreyas094 commited on
Commit
806791d
·
verified ·
1 Parent(s): 5bab160

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -86
app.py CHANGED
@@ -116,38 +116,28 @@ def update_vectors(files, parser):
116
  label="Select documents to query"
117
  )
118
 
119
- def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, temperature=0.2, should_stop=False):
120
  print(f"Starting generate_chunked_response with {num_calls} calls")
121
  full_response = ""
122
- continuation_prompt = prompt
123
-
124
- for i in range(num_calls):
125
- print(f"Starting API call {i+1}")
126
- if should_stop:
127
- print("Stop clicked, breaking loop")
128
- break
129
-
130
- if i > 0:
131
- continuation_prompt = f"""
132
- Previous response: {full_response}
133
-
134
- Original query: {prompt}
135
-
136
- Please continue the response from where you left off, maintaining coherence and avoiding repetition.
137
- """
138
-
139
- try:
140
- if model == "@cf/meta/llama-3.1-8b-instruct":
141
- # Cloudflare API logic
142
  response = requests.post(
143
  f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
144
  headers={"Authorization": f"Bearer {API_TOKEN}"},
145
  json={
146
  "stream": True,
147
- "messages": [
148
- {"role": "system", "content": "You are a friendly assistant"},
149
- {"role": "user", "content": continuation_prompt}
150
- ],
151
  "max_tokens": max_tokens,
152
  "temperature": temperature
153
  },
@@ -163,13 +153,22 @@ def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, tempe
163
  json_data = json.loads(line.decode('utf-8').split('data: ')[1])
164
  chunk = json_data['response']
165
  full_response += chunk
 
166
  except json.JSONDecodeError:
167
  continue
168
- else:
169
- # Hugging Face API logic
170
- client = InferenceClient(model, token=huggingface_token)
171
- messages = [{"role": "user", "content": continuation_prompt}]
172
-
 
 
 
 
 
 
 
 
173
  for message in client.chat_completion(
174
  messages=messages,
175
  max_tokens=max_tokens,
@@ -182,17 +181,17 @@ def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, tempe
182
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
183
  chunk = message.choices[0].delta.content
184
  full_response += chunk
185
-
186
- print(f"API call {i+1} completed")
187
- except Exception as e:
188
- print(f"Error in generating response: {str(e)}")
189
-
190
- # Clean up the response (existing code)
191
  clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
192
  clean_response = clean_response.replace("Using the following context:", "").strip()
193
  clean_response = clean_response.replace("Using the following context from the PDF documents:", "").strip()
194
-
195
- # Remove duplicate paragraphs and sentences (existing code)
196
  paragraphs = clean_response.split('\n\n')
197
  unique_paragraphs = []
198
  for paragraph in paragraphs:
@@ -207,7 +206,7 @@ def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, tempe
207
  final_response = '\n\n'.join(unique_paragraphs)
208
 
209
  print(f"Final clean response: {final_response[:100]}...")
210
- return final_response
211
 
212
  def duckduckgo_search(query):
213
  with DDGS() as ddgs:
@@ -245,60 +244,28 @@ def retry_last_response(history, use_web_search, model, temperature, num_calls):
245
 
246
  return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
247
 
248
- def respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
249
- logging.info(f"User Query: {message}")
250
- logging.info(f"Model Used: {model}")
251
- logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
252
 
253
- logging.info(f"Selected Documents: {selected_docs}")
254
 
255
  try:
256
  if use_web_search:
257
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
258
  response = f"{main_content}\n\n{sources}"
259
- first_line = response.split('\n')[0] if response else ''
260
- logging.info(f"Generated Response (first line): {first_line}")
261
- yield response
262
  else:
263
- embed = get_embeddings()
264
- if os.path.exists("faiss_database"):
265
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
266
- retriever = database.as_retriever()
267
-
268
- # Filter relevant documents based on user selection
269
- all_relevant_docs = retriever.get_relevant_documents(message)
270
- relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
271
-
272
- if not relevant_docs:
273
- yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
274
- return
275
-
276
- context_str = "\n".join([doc.page_content for doc in relevant_docs])
277
- else:
278
- context_str = "No documents available."
279
- yield "No documents available. Please upload PDF documents to answer questions."
280
- return
281
-
282
- if model == "@cf/meta/llama-3.1-8b-instruct":
283
- # Use Cloudflare API
284
- for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
285
- first_line = partial_response.split('\n')[0] if partial_response else ''
286
- logging.info(f"Generated Response (first line): {first_line}")
287
- yield partial_response
288
- else:
289
- # Use Hugging Face API
290
- for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
291
- first_line = partial_response.split('\n')[0] if partial_response else ''
292
- logging.info(f"Generated Response (first line): {first_line}")
293
- yield partial_response
294
  except Exception as e:
295
- logging.error(f"Error with {model}: {str(e)}")
296
- if "microsoft/Phi-3-mini-4k-instruct" in model:
297
- logging.info("Falling back to Mistral model due to Phi-3 error")
298
- fallback_model = "mistralai/Mistral-7B-Instruct-v0.3"
299
- yield from respond(message, history, fallback_model, temperature, num_calls, use_web_search, selected_docs)
300
- else:
301
- yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
302
 
303
  logging.basicConfig(level=logging.DEBUG)
304
 
@@ -456,6 +423,31 @@ def vote(data: gr.LikeData):
456
  else:
457
  print(f"You downvoted this response: {data.value}")
458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
459
  css = """
460
  /* Add your custom CSS here */
461
  """
@@ -481,7 +473,7 @@ demo = gr.ChatInterface(
481
  gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
482
  gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
483
  use_web_search,
484
- document_selector # Add the document selector to the chat interface
485
  ],
486
  title="AI-powered Web Search and PDF Chat Assistant",
487
  description="Chat with your PDFs or use web search to answer questions.",
@@ -529,6 +521,19 @@ with demo:
529
  inputs=[file_input, parser_dropdown],
530
  outputs=[update_output, document_selector])
531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  gr.Markdown(
533
  """
534
  ## How to use
@@ -539,6 +544,7 @@ with demo:
539
  5. Toggle "Use Web Search" to switch between PDF chat and web search.
540
  6. Adjust Temperature and Number of API Calls to fine-tune the response generation.
541
  7. Use the provided examples or ask your own questions.
 
542
  """
543
  )
544
 
 
116
  label="Select documents to query"
117
  )
118
 
119
+ def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=1, temperature=0.2, should_stop=False, continuation=False):
120
  print(f"Starting generate_chunked_response with {num_calls} calls")
121
  full_response = ""
122
+ messages = [{"role": "user", "content": prompt}]
123
+
124
+ if continuation:
125
+ messages.insert(0, {"role": "system", "content": "This is a continuation of a previous response. Please continue from where you left off, maintaining coherence and avoiding repetition."})
126
+
127
+ if model == "@cf/meta/llama-3.1-8b-instruct":
128
+ # Cloudflare API logic
129
+ for i in range(num_calls):
130
+ print(f"Starting Cloudflare API call {i+1}")
131
+ if should_stop:
132
+ print("Stop clicked, breaking loop")
133
+ break
134
+ try:
 
 
 
 
 
 
 
135
  response = requests.post(
136
  f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
137
  headers={"Authorization": f"Bearer {API_TOKEN}"},
138
  json={
139
  "stream": True,
140
+ "messages": messages,
 
 
 
141
  "max_tokens": max_tokens,
142
  "temperature": temperature
143
  },
 
153
  json_data = json.loads(line.decode('utf-8').split('data: ')[1])
154
  chunk = json_data['response']
155
  full_response += chunk
156
+ yield full_response
157
  except json.JSONDecodeError:
158
  continue
159
+ print(f"Cloudflare API call {i+1} completed")
160
+ except Exception as e:
161
+ print(f"Error in generating response from Cloudflare: {str(e)}")
162
+ else:
163
+ # Original Hugging Face API logic
164
+ client = InferenceClient(model, token=huggingface_token)
165
+
166
+ for i in range(num_calls):
167
+ print(f"Starting Hugging Face API call {i+1}")
168
+ if should_stop:
169
+ print("Stop clicked, breaking loop")
170
+ break
171
+ try:
172
  for message in client.chat_completion(
173
  messages=messages,
174
  max_tokens=max_tokens,
 
181
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
182
  chunk = message.choices[0].delta.content
183
  full_response += chunk
184
+ yield full_response
185
+ print(f"Hugging Face API call {i+1} completed")
186
+ except Exception as e:
187
+ print(f"Error in generating response from Hugging Face: {str(e)}")
188
+
189
+ # Clean up the response
190
  clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
191
  clean_response = clean_response.replace("Using the following context:", "").strip()
192
  clean_response = clean_response.replace("Using the following context from the PDF documents:", "").strip()
193
+
194
+ # Remove duplicate paragraphs and sentences
195
  paragraphs = clean_response.split('\n\n')
196
  unique_paragraphs = []
197
  for paragraph in paragraphs:
 
206
  final_response = '\n\n'.join(unique_paragraphs)
207
 
208
  print(f"Final clean response: {final_response[:100]}...")
209
+ yield final_response
210
 
211
  def duckduckgo_search(query):
212
  with DDGS() as ddgs:
 
244
 
245
  return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
246
 
247
+ def respond(message, history, use_web_search, model, temperature, num_calls, selected_docs, continuation=False):
248
+ if not message.strip():
249
+ return "", history
 
250
 
251
+ history = history + [(message, "")]
252
 
253
  try:
254
  if use_web_search:
255
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
256
  response = f"{main_content}\n\n{sources}"
257
+ history[-1] = (message, response)
258
+ yield history, gr.update(visible=True) # Make Continue Generation button visible
 
259
  else:
260
+ for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature, continuation=continuation):
261
+ history[-1] = (message, partial_response)
262
+ yield history, gr.update(visible=True) # Make Continue Generation button visible
263
+ except gr.CancelledError:
264
+ yield history, gr.update(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  except Exception as e:
266
+ logging.error(f"Unexpected error in respond: {str(e)}")
267
+ history[-1] = (message, f"An unexpected error occurred: {str(e)}")
268
+ yield history, gr.update(visible=False)
 
 
 
 
269
 
270
  logging.basicConfig(level=logging.DEBUG)
271
 
 
423
  else:
424
  print(f"You downvoted this response: {data.value}")
425
 
426
+ def continue_generation(history, model, temperature, num_calls, use_web_search, selected_docs):
427
+ if not history:
428
+ return history
429
+
430
+ last_user_msg = history[-1][0]
431
+ last_ai_response = history[-1][1]
432
+
433
+ continuation_prompt = f"""
434
+ Previous response: {last_ai_response}
435
+
436
+ Original query: {last_user_msg}
437
+
438
+ Please continue the response from where you left off, maintaining coherence and avoiding repetition.
439
+ """
440
+
441
+ try:
442
+ for response in respond(continuation_prompt, history[:-1], use_web_search, model, temperature, num_calls, selected_docs, continuation=True):
443
+ new_response = f"{last_ai_response}\n\n{response[-1][1]}"
444
+ history[-1] = (last_user_msg, new_response)
445
+ yield history
446
+ except Exception as e:
447
+ logging.error(f"Error in continue_generation: {str(e)}")
448
+ history[-1] = (last_user_msg, f"{last_ai_response}\n\nError continuing generation: {str(e)}")
449
+ yield history
450
+
451
  css = """
452
  /* Add your custom CSS here */
453
  """
 
473
  gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
474
  gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
475
  use_web_search,
476
+ document_selector
477
  ],
478
  title="AI-powered Web Search and PDF Chat Assistant",
479
  description="Chat with your PDFs or use web search to answer questions.",
 
521
  inputs=[file_input, parser_dropdown],
522
  outputs=[update_output, document_selector])
523
 
524
+ # Add the Continue Generation button
525
+ continue_btn = gr.Button("Continue Generation", visible=False)
526
+
527
+ # Add the click event for the Continue Generation button
528
+ continue_btn.click(continue_generation,
529
+ inputs=[demo.chatbot,
530
+ gr.Dropdown(choices=MODELS, label="Select Model"),
531
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
532
+ gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
533
+ use_web_search,
534
+ document_selector],
535
+ outputs=[demo.chatbot])
536
+
537
  gr.Markdown(
538
  """
539
  ## How to use
 
544
  5. Toggle "Use Web Search" to switch between PDF chat and web search.
545
  6. Adjust Temperature and Number of API Calls to fine-tune the response generation.
546
  7. Use the provided examples or ask your own questions.
547
+ 8. If a response is incomplete, click "Continue Generation" for more information.
548
  """
549
  )
550