from langchain.tools import tool import requests from pydantic import BaseModel, Field import datetime import pandas as pd from langchain.prompts import MessagesPlaceholder dataf = pd.read_csv( "HW 1 newest version.csv" ) # Import create_pandas_dataframe_agent from langchain_experimental.agents from langchain_experimental.agents import create_pandas_dataframe_agent from langchain.chat_models import ChatOpenAI from langchain.agents.agent_types import AgentType # Define the create_dataframe_agent_tool function @tool def dataframeagent(value: str) -> str: """ This function searches the entire dataframe to find rows where any column contains the specified value. Parameters: value (str): The value to search for in all columns. Returns: str: A string representation of the filtered dataframe and the extremes for specified columns. """ # First, search the entire dataframe for the specified value #filtered_data = dataf[dataf.apply(lambda row: row.astype(str).str.contains(value, case=False).any(), axis=1)] #if filtered_data.empty: #return f"No matches found for '{value}'." # Columns for finding highest and lowest values columns_to_check = ['Profit Margin', 'Operating Margin  (ttm)', 'Return on Assets  (ttm)', 'Return on Equity  (ttm)', 'Revenue  (ttm)', 'Revenue Per Share  (ttm)'] result = [f"Search Results for '{value}':\n{dataf.to_string(index=False)}\n"] # Find and display highest and lowest values for numerical columns for column in columns_to_check: try: # Convert column to numeric (removing symbols like '%' and 'M' for millions) dataf[column] = pd.to_numeric(dataf[column].str.replace('%', '').str.replace('M', ''), errors='coerce') highest_row = dataf.loc[dataf[column].idxmax()] lowest_row = dataf.loc[dataf[column].idxmin()] result.append(f"Highest {column}:\n{highest_row.to_string()}\n") result.append(f"Lowest {column}:\n{lowest_row.to_string()}\n") except Exception as e: result.append(f"Error processing column {column}: {str(e)}\n") return "\n".join(result) import json from pathlib import Path import pandas as pd example_filepath = "QA_summary_zh.csv" # Read the CSV file csv_data = pd.read_csv(example_filepath, encoding="utf-8") # Convert CSV to JSON json_data = csv_data.to_json(orient='records', force_ascii=False) json_data # Save the JSON data to a file json_file_path = "QA_summary_zh.json" with open(json_file_path, 'w', encoding='utf-8') as json_file: json_file.write(json_data) data = json.loads(Path(json_file_path).read_text()) from langchain.document_loaders import JSONLoader from langchain.text_splitter import RecursiveCharacterTextSplitter file_path='QA_summary_zh.json' # Define jq schema to extract text content. # This assumes your JSON has a field named 'text' containing the relevant text. jq_schema='.[] | {Question: .Question , Answer: .Answer , description: .description }' loader = JSONLoader( file_path=file_path, jq_schema=jq_schema, # Add the jq_schema argument here text_content=False) # Load the documents docs = loader.load() print(docs) all_splits = docs import json from pathlib import Path import pandas as pd import os from langchain_chroma import Chroma from langchain_openai import OpenAIEmbeddings os.environ["OPENAI_API_KEY"] = "sk-proj-vErxLzVKAuHM8QuXOGnCT3BlbkFJM3q6IDbWmRHnWB6ZeHXZ" vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings()) # Import necessary modules from langchain import hub from langchain.prompts import PromptTemplate from langchain.schema import StrOutputParser from langchain.chains import ConversationChain from langchain.memory import ConversationBufferMemory from langchain.chat_models import ChatOpenAI from langchain.schema import HumanMessage from langchain_core.runnables import RunnablePassthrough, RunnableLambda @tool def FAQ(question: str) -> str: """Processes a question, retrieves relevant context, and generates a response.""" # Define the prompt template template = """ 您是一個繁體中文的助理,以下是從知識庫中檢索到的相關內容,請根據它們回答用戶的問題。 內容: {context} 問題: {question} """ # Function to format documents def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) # Initialize the language model llm = ChatOpenAI(temperature=0.0) # Initialize the retriever (assuming `vectorstore` is predefined) retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 1}) # Initialize the conversation memory memory = ConversationBufferMemory() conversation = ConversationChain( llm=llm, memory=memory, verbose=True ) # Retrieve documents using the retriever retrieved_docs = retriever.invoke(question) context = format_docs(retrieved_docs) # Prepare the prompt input prompt_input = { "context": context, "question": question, } # Format prompt_input as a string formatted_prompt_input = template.format( context=prompt_input["context"], question=prompt_input["question"], ) # Use the conversation chain to process the formatted input response = conversation.predict(input=formatted_prompt_input) return response import requests from bs4 import BeautifulSoup import random # List of different headers to mimic various browser requests user_agents = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0", "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1" ] @tool def gresb(query: str) -> str: """Processes a question, retrieves relevant context, and generates a response. 1. article_text 2. article_url """ base_url = "https://www.gresb.com/nl-en?s=" search_url = f"{base_url}{query.replace(' ', '+')}" # Select a random User-Agent header headers = { "User-Agent": random.choice(user_agents) } # Make a request to the search URL with headers response = requests.get(search_url, headers=headers) # Check if the request was successful if response.status_code == 200: # Parse the HTML content soup = BeautifulSoup(response.content, 'html.parser') # Extract search results (adjust the selector based on the website structure) results = soup.find_all('a', class_='overlay-link z-index-1') # Check if there are any results if results: # Get the first result's link article_url = results[0]['href'] # Fetch the HTML content of the article article_response = requests.get(article_url, headers=headers) if article_response.status_code == 200: # Extract the article text and return it with the URL article_text = extract_article_text(article_response.content) return f"Article Text: {article_text}\n\nArticle URL: {article_url}" else: return f"Failed to retrieve the article page. Status code: {article_response.status_code}" else: return "No search results found." else: return f"Failed to retrieve search results. Status code: {response.status_code}" def extract_article_text(html_content): soup = BeautifulSoup(html_content, 'html.parser') # Look for common article structures on GRESB's website article = soup.find('div', class_='wysiwyg') if article: paragraphs = article.find_all(['p', 'ul', 'blockquote', 'h2', 'h4']) # Includes

,