Shreyas094 commited on
Commit
4d152e0
·
verified ·
1 Parent(s): 8650279

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -188
app.py CHANGED
@@ -20,14 +20,13 @@ from langchain_core.runnables import RunnableParallel, RunnablePassthrough
20
  from langchain_core.documents import Document
21
  from sklearn.feature_extraction.text import TfidfVectorizer
22
  from sklearn.metrics.pairwise import cosine_similarity
23
- from datetime import datetime
24
- from huggingface_hub.utils import HfHubHTTPError
25
 
26
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
27
 
28
  # Memory database to store question-answer pairs
29
  memory_database = {}
30
  conversation_history = []
 
31
 
32
  def load_and_split_document_basic(file):
33
  """Loads and splits the document into pages."""
@@ -101,25 +100,15 @@ def get_model(temperature, top_p, repetition_penalty):
101
  huggingfacehub_api_token=huggingface_token
102
  )
103
 
104
- def generate_chunked_response(model, prompt, max_tokens=200):
105
  full_response = ""
106
- total_length = len(prompt.split()) # Approximate token count of prompt
107
-
108
- while total_length < 7800: # Leave some margin
109
- try:
110
- chunk = model(prompt + full_response, max_new_tokens=min(200, 7800 - total_length))
111
- chunk = chunk.strip()
112
- if not chunk:
113
- break
114
  full_response += chunk
115
- total_length += len(chunk.split()) # Approximate token count
116
-
117
- if chunk.endswith((".", "!", "?")):
118
- break
119
- except Exception as e:
120
- print(f"Error generating response: {str(e)}")
121
  break
122
-
123
  return full_response.strip()
124
 
125
  def manage_conversation_history(question, answer, history, max_history=5):
@@ -197,10 +186,8 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
197
  print(f"Found {len(result_block)} results on this page")
198
  for result in result_block:
199
  link = result.find("a", href=True)
200
- title = result.find("h3")
201
- if link and title:
202
  link = link["href"]
203
- title = title.get_text()
204
  print(f"Processing link: {link}")
205
  try:
206
  webpage = session.get(link, headers=headers, timeout=timeout)
@@ -208,21 +195,20 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
208
  visible_text = extract_text_from_webpage(webpage.text)
209
  if len(visible_text) > max_chars_per_page:
210
  visible_text = visible_text[:max_chars_per_page] + "..."
211
- all_results.append({"link": link, "title": title, "text": visible_text})
212
  print(f"Successfully extracted text from {link}")
213
  except requests.exceptions.RequestException as e:
214
  print(f"Error retrieving webpage content: {e}")
215
- all_results.append({"link": link, "title": title, "text": None})
216
  else:
217
- print("No link or title found for this result")
218
- all_results.append({"link": None, "title": None, "text": None})
219
  start += len(result_block)
220
 
221
  print(f"Search completed. Total results: {len(all_results)}")
222
  print("Search results:")
223
  for i, result in enumerate(all_results, 1):
224
  print(f"Result {i}:")
225
- print(f" Title: {result['title']}")
226
  print(f" Link: {result['link']}")
227
  if result['text']:
228
  print(f" Text: {result['text'][:100]}...") # Print first 100 characters
@@ -232,61 +218,92 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
232
 
233
  if not all_results:
234
  print("No search results found. Returning a default message.")
235
- return [{"link": None, "title": "No Results", "text": "No information found in the web search results."}]
236
 
237
  return all_results
238
 
239
- def summarize_content(content, model):
240
- if content is None:
241
- return "No content available to summarize."
242
-
243
- summary_prompt = f"""
244
- You are a financial analyst and given a task to summarize the following news article in concise and coherent brief paragraph. Focus on the key points, main events, significant details and any point that could have major implications. Ensure the summary is informative and relevant to current news:
245
-
246
- {content[:3000]} # Limit input to avoid token limits
247
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  Summary:
249
  """
250
- summary = generate_chunked_response(model, summary_prompt, max_tokens=300) # Adjust max_tokens as needed
 
 
251
  return summary
252
 
253
- def rank_search_results(titles, summaries, model):
254
- if not titles or not summaries:
255
- print("No titles or summaries to rank.")
256
- return list(range(1, len(titles) + 1))
257
-
258
- ranking_prompt = (
259
- "Rank the following search results from a financial analyst perspective. "
260
- f"Assign a rank from 1 to {len(titles)} based on relevance, with 1 being the most relevant. "
261
- "Return only the numeric ranks in order, separated by commas.\n\n"
262
- "Titles and summaries:\n"
263
- )
264
 
265
- for i, (title, summary) in enumerate(zip(titles, summaries), 1):
266
- ranking_prompt += f"{i}. Title: {title}\nSummary: {summary}\n\n"
267
 
268
- ranking_prompt += "Ranks:"
 
 
 
 
 
 
 
 
 
269
 
270
- try:
271
- ranks_str = generate_chunked_response(model, ranking_prompt)
272
- print(f"Model output for ranking: {ranks_str}")
273
-
274
- if not ranks_str.strip():
275
- print("Model returned an empty string for ranking.")
276
- return list(range(1, len(titles) + 1))
277
-
278
- ranks = [float(rank.strip()) for rank in ranks_str.split(',') if rank.strip()]
279
-
280
- if len(ranks) != len(titles):
281
- print(f"Warning: Number of ranks ({len(ranks)}) does not match number of titles ({len(titles)})")
282
- return list(range(1, len(titles) + 1))
283
-
284
- return ranks
285
- except Exception as e:
286
- print(f"Error in ranking: {str(e)}. Using fallback ranking method.")
287
- return list(range(1, len(titles) + 1))
288
 
289
- def ask_question(question, temperature, top_p, repetition_penalty, web_search):
 
 
 
 
 
 
 
 
 
 
290
  global conversation_history
291
 
292
  if not question:
@@ -295,39 +312,24 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
295
  model = get_model(temperature, top_p, repetition_penalty)
296
  embed = get_embeddings()
297
 
 
 
 
 
 
 
298
  if web_search:
299
  search_results = google_search(question)
 
300
 
301
- processed_results = []
302
- for index, result in enumerate(search_results, start=1):
303
- if result["text"] is not None:
304
- try:
305
- summary = summarize_content(result["text"], model)
306
- processed_results.append({
307
- "title": result.get("title", f"Result {index}"),
308
- "summary": summary,
309
- "index": index
310
- })
311
- except Exception as e:
312
- print(f"Error processing search result {index}: {str(e)}")
313
- else:
314
- print(f"Skipping result {index} due to None content")
315
 
316
- if not processed_results:
317
- return "No valid search results found."
318
-
319
- print(f"Number of processed results: {len(processed_results)}")
320
-
321
- # For news requests, return the summaries directly
322
- if "news" in question.lower():
323
- news_response = "Here are the latest news summaries on this topic:\n\n"
324
- for result in processed_results[:5]: # Limit to top 5 results
325
- news_response += f"Title: {result['title']}\n\nSummary: {result['summary']}\n\n---\n\n"
326
- return news_response.strip()
327
-
328
- # For other questions, use the summaries as context
329
- context_str = "\n\n".join([f"Title: {r['title']}\nSummary: {r['summary']}"
330
- for r in processed_results])
331
 
332
  prompt_template = """
333
  Answer the question based on the following web search results:
@@ -335,17 +337,31 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
335
  {context}
336
  Current Question: {question}
337
  If the web search results don't contain relevant information, state that the information is not available in the search results.
338
- Provide a concise and direct answer to the question:
339
  """
340
  prompt_val = ChatPromptTemplate.from_template(prompt_template)
341
  formatted_prompt = prompt_val.format(context=context_str, question=question)
342
-
343
- answer = generate_chunked_response(model, formatted_prompt)
344
- else:
345
- if not os.path.exists("faiss_database"):
346
- return "No documents available. Please upload documents or enable web search to answer questions."
347
 
348
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
 
350
  history_str = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in conversation_history])
351
 
@@ -359,9 +375,26 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
359
  prompt_val = ChatPromptTemplate.from_template(prompt)
360
  formatted_prompt = prompt_val.format(history=history_str, context=context_str, question=question)
361
 
362
- answer = generate_chunked_response(model, formatted_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
- if not web_search:
365
  memory_database[question] = answer
366
  conversation_history = manage_conversation_history(question, answer, conversation_history)
367
 
@@ -393,67 +426,6 @@ def update_vectors(files, use_recursive_splitter):
393
 
394
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
395
 
396
- def update_vector_db_with_search_results(search_results, ranks, current_date):
397
- embed = get_embeddings()
398
-
399
- documents = []
400
- for result, rank in zip(search_results, ranks):
401
- if result.get("summary"):
402
- doc = Document(
403
- page_content=result["summary"],
404
- metadata={
405
- "search_date": current_date,
406
- "search_title": result.get("title", ""),
407
- "search_content": result.get("content", ""),
408
- "search_summary": result["summary"],
409
- "rank": rank
410
- }
411
- )
412
- documents.append(doc)
413
-
414
- if not documents:
415
- print("No valid documents to add to the database.")
416
- return
417
-
418
- texts = [doc.page_content for doc in documents]
419
- metadatas = [doc.metadata for doc in documents]
420
-
421
- print(f"Number of documents to embed: {len(texts)}")
422
- print(f"First document text: {texts[0][:100]}...") # Print first 100 characters of the first document
423
-
424
- try:
425
- if os.path.exists("faiss_database"):
426
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
427
- database.add_texts(texts, metadatas=metadatas)
428
- else:
429
- database = FAISS.from_texts(texts, embed, metadatas=metadatas)
430
-
431
- database.save_local("faiss_database")
432
- print("Database updated successfully.")
433
- except Exception as e:
434
- print(f"Error updating database: {str(e)}")
435
-
436
- def export_vector_db_to_excel():
437
- embed = get_embeddings()
438
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
439
-
440
- documents = database.docstore._dict.values()
441
- data = [{
442
- "Search Date": doc.metadata["search_date"],
443
- "Search Title": doc.metadata["search_title"],
444
- "Search Content": doc.metadata["search_content"],
445
- "Search Summary": doc.metadata["search_summary"],
446
- "Rank": doc.metadata["rank"]
447
- } for doc in documents]
448
-
449
- df = pd.DataFrame(data)
450
-
451
- with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
452
- excel_path = tmp.name
453
- df.to_excel(excel_path, index=False)
454
-
455
- return excel_path
456
-
457
  def extract_db_to_excel():
458
  embed = get_embeddings()
459
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
@@ -485,7 +457,7 @@ def export_memory_db_to_excel():
485
 
486
  # Gradio interface
487
  with gr.Blocks() as demo:
488
- gr.Markdown("# Chat with your PDF documents")
489
 
490
  with gr.Row():
491
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
@@ -498,34 +470,30 @@ with gr.Blocks() as demo:
498
  with gr.Row():
499
  with gr.Column(scale=2):
500
  chatbot = gr.Chatbot(label="Conversation")
501
- question_input = gr.Textbox(label="Ask a question about your documents")
502
  submit_button = gr.Button("Submit")
503
  with gr.Column(scale=1):
504
  temperature_slider = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.5, step=0.1)
505
  top_p_slider = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=0.9, step=0.1)
506
  repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
507
  web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
 
508
 
509
- def chat(question, history, temperature, top_p, repetition_penalty, web_search):
510
- answer = ask_question(question, temperature, top_p, repetition_penalty, web_search)
511
-
512
- if "news" in question.lower():
513
- # Split the answer into individual news items
514
- news_items = answer.split("---")
515
- for item in news_items:
516
- if item.strip():
517
- history.append((question, item.strip()))
518
- else:
519
- history.append((question, answer))
520
-
521
  return "", history
522
 
523
- submit_button.click(chat, inputs=[question_input, chatbot, temperature_slider, top_p_slider, repetition_penalty_slider, web_search_checkbox], outputs=[question_input, chatbot])
 
 
524
 
525
- export_vector_db_button = gr.Button("Export Vector DB to Excel")
526
- vector_db_excel_output = gr.File(label="Download Vector DB Excel File")
527
- export_vector_db_button.click(export_vector_db_to_excel, inputs=[], outputs=vector_db_excel_output)
528
-
529
  extract_button = gr.Button("Extract Database to Excel")
530
  excel_output = gr.File(label="Download Excel File")
531
  extract_button.click(extract_db_to_excel, inputs=[], outputs=excel_output)
@@ -534,6 +502,10 @@ with gr.Blocks() as demo:
534
  memory_excel_output = gr.File(label="Download Memory Excel File")
535
  export_memory_button.click(export_memory_db_to_excel, inputs=[], outputs=memory_excel_output)
536
 
 
 
 
 
537
  clear_button = gr.Button("Clear Cache")
538
  clear_output = gr.Textbox(label="Cache Status")
539
  clear_button.click(clear_cache, inputs=[], outputs=clear_output)
 
20
  from langchain_core.documents import Document
21
  from sklearn.feature_extraction.text import TfidfVectorizer
22
  from sklearn.metrics.pairwise import cosine_similarity
 
 
23
 
24
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
25
 
26
  # Memory database to store question-answer pairs
27
  memory_database = {}
28
  conversation_history = []
29
+ news_database = []
30
 
31
  def load_and_split_document_basic(file):
32
  """Loads and splits the document into pages."""
 
100
  huggingfacehub_api_token=huggingface_token
101
  )
102
 
103
+ def generate_chunked_response(model, prompt, max_tokens=1000, max_chunks=5):
104
  full_response = ""
105
+ for i in range(max_chunks):
106
+ chunk = model(prompt + full_response, max_new_tokens=max_tokens)
107
+ chunk = chunk.strip()
108
+ if chunk.endswith((".", "!", "?")):
 
 
 
 
109
  full_response += chunk
 
 
 
 
 
 
110
  break
111
+ full_response += chunk
112
  return full_response.strip()
113
 
114
  def manage_conversation_history(question, answer, history, max_history=5):
 
186
  print(f"Found {len(result_block)} results on this page")
187
  for result in result_block:
188
  link = result.find("a", href=True)
189
+ if link:
 
190
  link = link["href"]
 
191
  print(f"Processing link: {link}")
192
  try:
193
  webpage = session.get(link, headers=headers, timeout=timeout)
 
195
  visible_text = extract_text_from_webpage(webpage.text)
196
  if len(visible_text) > max_chars_per_page:
197
  visible_text = visible_text[:max_chars_per_page] + "..."
198
+ all_results.append({"link": link, "text": visible_text})
199
  print(f"Successfully extracted text from {link}")
200
  except requests.exceptions.RequestException as e:
201
  print(f"Error retrieving webpage content: {e}")
202
+ all_results.append({"link": link, "text": None})
203
  else:
204
+ print("No link found for this result")
205
+ all_results.append({"link": None, "text": None})
206
  start += len(result_block)
207
 
208
  print(f"Search completed. Total results: {len(all_results)}")
209
  print("Search results:")
210
  for i, result in enumerate(all_results, 1):
211
  print(f"Result {i}:")
 
212
  print(f" Link: {result['link']}")
213
  if result['text']:
214
  print(f" Text: {result['text'][:100]}...") # Print first 100 characters
 
218
 
219
  if not all_results:
220
  print("No search results found. Returning a default message.")
221
+ return [{"link": None, "text": "No information found in the web search results."}]
222
 
223
  return all_results
224
 
225
+ def fetch_google_news_rss(query, num_results=10):
226
+ base_url = "https://news.google.com/rss/search"
227
+ params = {
228
+ "q": query,
229
+ "hl": "en-US",
230
+ "gl": "US",
231
+ "ceid": "US:en"
232
+ }
233
+ url = f"{base_url}?{urllib.parse.urlencode(params)}"
234
+
235
+ feed = feedparser.parse(url)
236
+ articles = []
237
+
238
+ for entry in feed.entries[:num_results]:
239
+ article = {
240
+ "published_date": entry.published,
241
+ "title": entry.title,
242
+ "url": entry.link,
243
+ "content": entry.summary
244
+ }
245
+ articles.append(article)
246
+
247
+ return articles
248
+
249
+ def summarize_news_content(content, model):
250
+ prompt_template = """
251
+ Summarize the following news article in a concise manner:
252
+ {content}
253
+
254
  Summary:
255
  """
256
+ prompt = ChatPromptTemplate.from_template(prompt_template)
257
+ formatted_prompt = prompt.format(content=content)
258
+ summary = generate_chunked_response(model, formatted_prompt, max_tokens=200)
259
  return summary
260
 
261
+ def process_google_news_rss(query, temperature, top_p, repetition_penalty):
262
+ model = get_model(temperature, top_p, repetition_penalty)
263
+ embed = get_embeddings()
 
 
 
 
 
 
 
 
264
 
265
+ articles = fetch_google_news_rss(query)
266
+ processed_articles = []
267
 
268
+ for article in articles:
269
+ summary = summarize_news_content(article["content"], model)
270
+ processed_article = {
271
+ "published_date": article["published_date"],
272
+ "title": article["title"],
273
+ "url": article["url"],
274
+ "content": article["content"],
275
+ "summary": summary
276
+ }
277
+ processed_articles.append(processed_article)
278
 
279
+ # Add processed articles to the database
280
+ docs = [Document(page_content=article["summary"], metadata={"url": article["url"], "title": article["title"], "published_date": article["published_date"]}) for article in processed_articles]
281
+
282
+ if os.path.exists("faiss_database"):
283
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
284
+ database.add_documents(docs)
285
+ else:
286
+ database = FAISS.from_documents(docs, embed)
287
+
288
+ database.save_local("faiss_database")
289
+
290
+ # Update news_database for excel export
291
+ global news_database
292
+ news_database.extend(processed_articles)
293
+
294
+ return f"Processed and added {len(processed_articles)} news articles to the database."
 
 
295
 
296
+ def export_news_to_excel():
297
+ global news_database
298
+ df = pd.DataFrame(news_database)
299
+
300
+ with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
301
+ excel_path = tmp.name
302
+ df.to_excel(excel_path, index=False)
303
+
304
+ return excel_path
305
+
306
+ def ask_question(question, temperature, top_p, repetition_penalty, web_search, google_news_rss):
307
  global conversation_history
308
 
309
  if not question:
 
312
  model = get_model(temperature, top_p, repetition_penalty)
313
  embed = get_embeddings()
314
 
315
+ # Check if the FAISS database exists
316
+ if os.path.exists("faiss_database"):
317
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
318
+ else:
319
+ database = None
320
+
321
  if web_search:
322
  search_results = google_search(question)
323
+ web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
324
 
325
+ if database is None:
326
+ database = FAISS.from_documents(web_docs, embed)
327
+ else:
328
+ database.add_documents(web_docs)
 
 
 
 
 
 
 
 
 
 
329
 
330
+ database.save_local("faiss_database")
331
+
332
+ context_str = "\n".join([doc.page_content for doc in web_docs])
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
  prompt_template = """
335
  Answer the question based on the following web search results:
 
337
  {context}
338
  Current Question: {question}
339
  If the web search results don't contain relevant information, state that the information is not available in the search results.
340
+ Provide a concise and direct answer to the question without mentioning the web search or these instructions:
341
  """
342
  prompt_val = ChatPromptTemplate.from_template(prompt_template)
343
  formatted_prompt = prompt_val.format(context=context_str, question=question)
344
+ elif google_news_rss:
345
+ if database is None:
346
+ return "No news articles available. Please fetch news articles first."
 
 
347
 
348
+ retriever = database.as_retriever()
349
+ relevant_docs = retriever.get_relevant_documents(question)
350
+ context_str = "\n".join([f"Title: {doc.metadata['title']}\nURL: {doc.metadata['url']}\nSummary: {doc.page_content}" for doc in relevant_docs])
351
+
352
+ prompt_template = """
353
+ Answer the question based on the following news summaries:
354
+ News Summaries:
355
+ {context}
356
+ Current Question: {question}
357
+ If the news summaries don't contain relevant information, state that the information is not available in the news articles.
358
+ Provide a concise and direct answer to the question without mentioning the news summaries or these instructions:
359
+ """
360
+ prompt_val = ChatPromptTemplate.from_template(prompt_template)
361
+ formatted_prompt = prompt_val.format(context=context_str, question=question)
362
+ else:
363
+ if database is None:
364
+ return "No documents available. Please upload documents, enable web search, or fetch news articles to answer questions."
365
 
366
  history_str = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in conversation_history])
367
 
 
375
  prompt_val = ChatPromptTemplate.from_template(prompt)
376
  formatted_prompt = prompt_val.format(history=history_str, context=context_str, question=question)
377
 
378
+ full_response = generate_chunked_response(model, formatted_prompt)
379
+
380
+ # Extract only the part after the last occurrence of a prompt-like sentence
381
+ answer_patterns = [
382
+ r"Provide a concise and direct answer to the question without mentioning the web search or these instructions:",
383
+ r"Provide a concise and direct answer to the question without mentioning the news summaries or these instructions:",
384
+ r"Provide a concise and direct answer to the question:",
385
+ r"Answer:"
386
+ ]
387
+
388
+ for pattern in answer_patterns:
389
+ match = re.split(pattern, full_response, flags=re.IGNORECASE)
390
+ if len(match) > 1:
391
+ answer = match[-1].strip()
392
+ break
393
+ else:
394
+ # If no pattern is found, return the full response
395
+ answer = full_response.strip()
396
 
397
+ if not web_search and not google_news_rss:
398
  memory_database[question] = answer
399
  conversation_history = manage_conversation_history(question, answer, conversation_history)
400
 
 
426
 
427
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
  def extract_db_to_excel():
430
  embed = get_embeddings()
431
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
 
457
 
458
  # Gradio interface
459
  with gr.Blocks() as demo:
460
+ gr.Markdown("# Chat with your PDF documents and News")
461
 
462
  with gr.Row():
463
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
 
470
  with gr.Row():
471
  with gr.Column(scale=2):
472
  chatbot = gr.Chatbot(label="Conversation")
473
+ question_input = gr.Textbox(label="Ask a question about your documents or news")
474
  submit_button = gr.Button("Submit")
475
  with gr.Column(scale=1):
476
  temperature_slider = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.5, step=0.1)
477
  top_p_slider = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=0.9, step=0.1)
478
  repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
479
  web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
480
+ google_news_rss_checkbox = gr.Checkbox(label="Google News RSS", value=False)
481
 
482
+ with gr.Row():
483
+ news_query_input = gr.Textbox(label="Enter news query")
484
+ fetch_news_button = gr.Button("Fetch News")
485
+
486
+ news_fetch_output = gr.Textbox(label="News Fetch Status")
487
+
488
+ def chat(question, history, temperature, top_p, repetition_penalty, web_search, google_news_rss):
489
+ answer = ask_question(question, temperature, top_p, repetition_penalty, web_search, google_news_rss)
490
+ history.append((question, answer))
 
 
 
491
  return "", history
492
 
493
+ submit_button.click(chat, inputs=[question_input, chatbot, temperature_slider, top_p_slider, repetition_penalty_slider, web_search_checkbox, google_news_rss_checkbox], outputs=[question_input, chatbot])
494
+
495
+ fetch_news_button.click(process_google_news_rss, inputs=[news_query_input, temperature_slider, top_p_slider, repetition_penalty_slider], outputs=news_fetch_output)
496
 
 
 
 
 
497
  extract_button = gr.Button("Extract Database to Excel")
498
  excel_output = gr.File(label="Download Excel File")
499
  extract_button.click(extract_db_to_excel, inputs=[], outputs=excel_output)
 
502
  memory_excel_output = gr.File(label="Download Memory Excel File")
503
  export_memory_button.click(export_memory_db_to_excel, inputs=[], outputs=memory_excel_output)
504
 
505
+ export_news_button = gr.Button("Download News Excel File")
506
+ news_excel_output = gr.File(label="Download News Excel File")
507
+ export_news_button.click(export_news_to_excel, inputs=[], outputs=news_excel_output)
508
+
509
  clear_button = gr.Button("Clear Cache")
510
  clear_output = gr.Textbox(label="Cache Status")
511
  clear_button.click(clear_cache, inputs=[], outputs=clear_output)