from typing import final import gradio as gr import os import json from bs4 import BeautifulSoup import requests from huggingface_hub import InferenceClient from datetime import datetime, timedelta import json # Define global variables BOT_AVATAR = 'https://automatedstockmining.org/wp-content/uploads/2024/08/south-west-value-mining-logo.webp' client = InferenceClient(token=os.getenv("HF_TOKEN")) custom_css = ''' .gradio-container { font-family: 'Roboto', sans-serif; } .main-header { text-align: center; color: #4a4a4a; margin-bottom: 2rem; } .tab-header { font-size: 1.2rem; font-weight: bold; margin-bottom: 1rem; } .custom-chatbot { border-radius: 10px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); } .custom-button { background-color: #3498db; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer; transition: background-color 0.3s ease; } .custom-button:hover { background-color: #2980b9; } ''' def extract_text_from_webpage(html): soup = BeautifulSoup(html, "html.parser") # Extract visible text, removing unnecessary elements (e.g., scripts, styles) for script in soup(["script", "style"]): script.decompose() visible_text = soup.get_text(separator=" ", strip=True) return visible_text def search(query): term = query max_chars_per_page = 8000 all_results = [] with requests.Session() as session: try: # Send a search request to Google resp = session.get( url="https://www.google.com/search", headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"}, params={"q": term, "num": 4}, timeout=5 ) resp.raise_for_status() # Ensure the request was successful soup = BeautifulSoup(resp.text, "html.parser") result_block = soup.find_all("div", attrs={"class": "g"}) for result in result_block: link = result.find("a", href=True) if link: link = link["href"] try: # Fetch the webpage at the found link webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0"}, timeout=5) webpage.raise_for_status() # Extract visible text from the webpage visible_text = extract_text_from_webpage(webpage.text) if len(visible_text) > max_chars_per_page: visible_text = visible_text[:max_chars_per_page] all_results.append({"link": link, "text": visible_text}) except requests.exceptions.RequestException as e: print(f"Failed to retrieve {link}: {e}") all_results.append({"link": link, "text": None}) except requests.exceptions.RequestException as e: print(f"Google search failed: {e}") return all_results def process_query(user_input, history): if len(history) > 0 and history[-1]['role'] == 'user' and history[-1]['content'].lower() == user_input.lower(): gr.Info('Searching the web for the latest data...', duration=4) else: # Append new user message to the history history.append({"role": "user", "content": user_input}) search_results = search(user_input) search_results_str = json.dumps(search_results) response = client.chat_completion( model="Qwen/Qwen2.5-72B-Instruct", messages=[{"role": "user", "content": f"YOU ARE IM.X, AN INVESTMENT CHATBOT BUILT BY automatedstockmining.org. Answer the user's request '{user_input}' using the following information: {search_results_str} and the chat history{history}. Provide a concise, direct answer in no more than 2-3 sentences. use the appropriate emojis for some of your responses"}], max_tokens=400, stream=False ) final_response = response.choices[0].message['content'] history.append({"role": "assistant", "content": final_response}) return history, "" # Clear the input box after sending the response def clear_history(): return [], "" # Return empty history and clear the input box # Function to undo the last user-bot message pair def undo_last(history): if len(history) >= 2: # Ensure that there's at least one user-bot message pair history.pop() # Remove the bot's response history.pop() # Remove the user's input return history, "" # Return updated history and clear the input box # Gradio UI setup theme = gr.themes.Citrus( primary_hue="blue", neutral_hue="slate", ) with gr.Blocks(theme = theme,css = custom_css) as demo: gr.Markdown("