Spaces:
Runtime error
Runtime error
from langchain.tools import tool | |
import requests | |
from pydantic import BaseModel, Field | |
import datetime | |
import pandas as pd | |
from langchain.prompts import MessagesPlaceholder | |
dataf = pd.read_csv( | |
"HW 1 newest version.csv" | |
) | |
# Import create_pandas_dataframe_agent from langchain_experimental.agents | |
from langchain_experimental.agents import create_pandas_dataframe_agent | |
from langchain.chat_models import ChatOpenAI | |
from langchain.agents.agent_types import AgentType | |
# Define the create_dataframe_agent_tool function | |
def dataframeagent(value: str) -> str: | |
""" | |
This function searches the entire dataframe to find rows where any column contains the specified value. | |
Parameters: | |
value (str): The value to search for in all columns. | |
Returns: | |
str: A string representation of the filtered dataframe and the extremes for specified columns. | |
""" | |
# First, search the entire dataframe for the specified value | |
#filtered_data = dataf[dataf.apply(lambda row: row.astype(str).str.contains(value, case=False).any(), axis=1)] | |
#if filtered_data.empty: | |
#return f"No matches found for '{value}'." | |
# Columns for finding highest and lowest values | |
columns_to_check = ['Profit Margin', 'Operating Margin (ttm)', 'Return on Assets (ttm)', | |
'Return on Equity (ttm)', 'Revenue (ttm)', 'Revenue Per Share (ttm)'] | |
result = [f"Search Results for '{value}':\n{dataf.to_string(index=False)}\n"] | |
# Find and display highest and lowest values for numerical columns | |
for column in columns_to_check: | |
try: | |
# Convert column to numeric (removing symbols like '%' and 'M' for millions) | |
dataf[column] = pd.to_numeric(dataf[column].str.replace('%', '').str.replace('M', ''), errors='coerce') | |
highest_row = dataf.loc[dataf[column].idxmax()] | |
lowest_row = dataf.loc[dataf[column].idxmin()] | |
result.append(f"Highest {column}:\n{highest_row.to_string()}\n") | |
result.append(f"Lowest {column}:\n{lowest_row.to_string()}\n") | |
except Exception as e: | |
result.append(f"Error processing column {column}: {str(e)}\n") | |
return "\n".join(result) | |
import json | |
from pathlib import Path | |
import pandas as pd | |
example_filepath = "QA_summary_zh.csv" | |
# Read the CSV file | |
csv_data = pd.read_csv(example_filepath, encoding="utf-8") | |
# Convert CSV to JSON | |
json_data = csv_data.to_json(orient='records', force_ascii=False) | |
json_data | |
# Save the JSON data to a file | |
json_file_path = "QA_summary_zh.json" | |
with open(json_file_path, 'w', encoding='utf-8') as json_file: | |
json_file.write(json_data) | |
data = json.loads(Path(json_file_path).read_text()) | |
from langchain.document_loaders import JSONLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
file_path='QA_summary_zh.json' | |
# Define jq schema to extract text content. | |
# This assumes your JSON has a field named 'text' containing the relevant text. | |
jq_schema='.[] | {Question: .Question , Answer: .Answer , description: .description }' | |
loader = JSONLoader( | |
file_path=file_path, | |
jq_schema=jq_schema, # Add the jq_schema argument here | |
text_content=False) | |
# Load the documents | |
docs = loader.load() | |
print(docs) | |
all_splits = docs | |
import json | |
from pathlib import Path | |
import pandas as pd | |
import os | |
from langchain_chroma import Chroma | |
from langchain_openai import OpenAIEmbeddings | |
os.environ["OPENAI_API_KEY"] = "sk-proj-vErxLzVKAuHM8QuXOGnCT3BlbkFJM3q6IDbWmRHnWB6ZeHXZ" | |
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings()) | |
# Import necessary modules | |
from langchain import hub | |
from langchain.prompts import PromptTemplate | |
from langchain.schema import StrOutputParser | |
from langchain.chains import ConversationChain | |
from langchain.memory import ConversationBufferMemory | |
from langchain.chat_models import ChatOpenAI | |
from langchain.schema import HumanMessage | |
from langchain_core.runnables import RunnablePassthrough, RunnableLambda | |
def FAQ(question: str) -> str: | |
"""Processes a question, retrieves relevant context, and generates a response.""" | |
# Define the prompt template | |
template = """ | |
您是一個繁體中文的助理,以下是從知識庫中檢索到的相關內容,請根據它們回答用戶的問題。 | |
內容: {context} | |
問題: {question} | |
""" | |
# Function to format documents | |
def format_docs(docs): | |
return "\n\n".join(doc.page_content for doc in docs) | |
# Initialize the language model | |
llm = ChatOpenAI(temperature=0.0) | |
# Initialize the retriever (assuming `vectorstore` is predefined) | |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 1}) | |
# Initialize the conversation memory | |
memory = ConversationBufferMemory() | |
conversation = ConversationChain( | |
llm=llm, | |
memory=memory, | |
verbose=True | |
) | |
# Retrieve documents using the retriever | |
retrieved_docs = retriever.invoke(question) | |
context = format_docs(retrieved_docs) | |
# Prepare the prompt input | |
prompt_input = { | |
"context": context, | |
"question": question, | |
} | |
# Format prompt_input as a string | |
formatted_prompt_input = template.format( | |
context=prompt_input["context"], | |
question=prompt_input["question"], | |
) | |
# Use the conversation chain to process the formatted input | |
response = conversation.predict(input=formatted_prompt_input) | |
return response | |
import requests | |
from bs4 import BeautifulSoup | |
import random | |
# List of different headers to mimic various browser requests | |
user_agents = [ | |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", | |
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15", | |
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36", | |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0", | |
"Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1" | |
] | |
def gresb(query: str) -> str: | |
"""Processes a question, retrieves relevant context, and generates a response. | |
1. article_text | |
2. article_url | |
""" | |
base_url = "https://www.gresb.com/nl-en?s=" | |
search_url = f"{base_url}{query.replace(' ', '+')}" | |
# Select a random User-Agent header | |
headers = { | |
"User-Agent": random.choice(user_agents) | |
} | |
# Make a request to the search URL with headers | |
response = requests.get(search_url, headers=headers) | |
# Check if the request was successful | |
if response.status_code == 200: | |
# Parse the HTML content | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# Extract search results (adjust the selector based on the website structure) | |
results = soup.find_all('a', class_='overlay-link z-index-1') | |
# Check if there are any results | |
if results: | |
# Get the first result's link | |
article_url = results[0]['href'] | |
# Fetch the HTML content of the article | |
article_response = requests.get(article_url, headers=headers) | |
if article_response.status_code == 200: | |
# Extract the article text and return it with the URL | |
article_text = extract_article_text(article_response.content) | |
return f"Article Text: {article_text}\n\nArticle URL: {article_url}" | |
else: | |
return f"Failed to retrieve the article page. Status code: {article_response.status_code}" | |
else: | |
return "No search results found." | |
else: | |
return f"Failed to retrieve search results. Status code: {response.status_code}" | |
def extract_article_text(html_content): | |
soup = BeautifulSoup(html_content, 'html.parser') | |
# Look for common article structures on GRESB's website | |
article = soup.find('div', class_='wysiwyg') | |
if article: | |
paragraphs = article.find_all(['p', 'ul', 'blockquote', 'h2', 'h4']) # Includes <p>, <ul>, <blockquote>, <h2>, <h4> tags | |
return ' '.join(p.get_text() for p in paragraphs).strip() | |
return "Article content not found in the provided structure." | |
# Example usage | |
#query = "london office" | |
#article_text = search_and_extract_gresb(query) | |
#print(article_text) # This will print the extracted article content or any status messages | |
import os | |
import openai | |
os.environ["OPENAI_API_KEY"] = "sk-proj-vErxLzVKAuHM8QuXOGnCT3BlbkFJM3q6IDbWmRHnWB6ZeHXZ" | |
openai.api_key = os.environ['OPENAI_API_KEY'] | |
tools = [gresb, dataframeagent,FAQ] | |
from langchain.chat_models import ChatOpenAI | |
from langchain.prompts import ChatPromptTemplate | |
from langchain.tools.render import format_tool_to_openai_function | |
from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser | |
functions = [format_tool_to_openai_function(f) for f in tools] | |
model = ChatOpenAI(temperature=0).bind(functions=functions) | |
def run_agent(user_input): | |
# 初始化一個空列表,用於存放中間步驟的結果和觀察值 | |
intermediate_steps = [] | |
max_iterations = 20 # 設置最大迭代次數,以避免無限循環 | |
iteration_count = 0 | |
# 進入循環,直到代理完成任務或者達到最大迭代次數 | |
while iteration_count < max_iterations: | |
iteration_count += 1 | |
# 調用處理鏈 (agent_chain) 並傳遞用戶輸入和中間步驟數據 | |
result = agent_chain.invoke({ | |
"input": user_input, # 傳遞用戶輸入,這裡是用戶查詢 | |
"intermediate_steps": intermediate_steps # 傳遞中間步驟,初始為空列表 | |
}) | |
# 如果結果是 AgentFinish 類型,說明代理已經完成任務,返回結果 | |
if isinstance(result, AgentFinish): | |
return result.return_values # 返回代理的最終輸出 | |
# Now it's safe to print the message log | |
print(result.message_log) | |
# 根據結果中的工具名稱選擇合適的工具函數 | |
tool = { | |
"gresb": gresb, | |
"dataframeagent": dataframeagent, | |
"FAQ":FAQ | |
}.get(result.tool) | |
# 如果工具函數存在,則運行工具函數 | |
if tool: | |
observation = tool.run(result.tool_input) | |
# 將當前步驟的結果和觀察值加入 intermediate_steps 列表中 | |
intermediate_steps.append((result, observation)) | |
else: | |
print(f"未找到合適的工具: {result.tool}") | |
break | |
# 如果迭代次數超過最大限制,返回錯誤信息 | |
return "無法完成任務,請稍後再試。" | |
from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate | |
prompt = ChatPromptTemplate.from_messages([ | |
("system", | |
"""You are a helpful assistant. There are three tools to use based on different scenarios. | |
1. gresb Tool: | |
Usage Scenario: Use this tool when you need to search for fund information related to a specific area, city, or keyword on the GRESB website. It is ideal for searching fund details in specific locations such as "London office" or "Paris commercial real estate." | |
2. dataframeagent Tool: | |
Usage Scenario: This dataframe contains 'Fund Name', 'Region', 'Ticker','Profit Margin', 'Operating Margin (ttm)', 'Return on Assets (ttm)', 'Return on Equity (ttm)', | |
'Revenue (ttm)', and 'Revenue Per Share (ttm)', choose one to search in the dataframe | |
You have access to the following note: GRESB is not a foud. | |
3. FAQ Tool | |
Usage Scenario: use this tool to search for 綠建築標章申請審核認可及使用作業要點. | |
example:「綠建築標章申請審核認可及使用作業要點」規定,修正重點為何? | |
example:109年7月1日起申請綠建築標章評定有何改變? | |
"""), | |
MessagesPlaceholder(variable_name="chat_history"), | |
("user", "{input}"), | |
MessagesPlaceholder(variable_name="agent_scratchpad") | |
]) | |
from langchain.agents.format_scratchpad import format_to_openai_functions | |
from langchain.schema.runnable import RunnablePassthrough | |
from langchain.schema.agent import AgentFinish | |
agent_chain = RunnablePassthrough.assign( | |
agent_scratchpad= lambda x: format_to_openai_functions(x["intermediate_steps"]) | |
) | prompt | model | OpenAIFunctionsAgentOutputParser() | |
from langchain.memory import ConversationBufferMemory | |
memory = ConversationBufferMemory(return_messages=True,memory_key="chat_history") | |
from langchain.agents import AgentExecutor | |
agent_executor = AgentExecutor(agent=agent_chain, tools=tools, verbose=True, memory=memory) | |
import gradio as gr | |
# 處理函數,提取 AIMessage 的內容 | |
def process_input(user_input): | |
# 使用 agent_executor.invoke 來處理輸入 | |
memory.clear() | |
result = agent_executor.invoke({"input": user_input}) | |
# 從結果中提取 AIMessage 的內容 | |
if 'output' in result: | |
return result['output'] | |
else: | |
return "No output found." | |
# 建立 Gradio 介面 | |
iface = gr.Interface( | |
fn=process_input, # 處理函數 | |
inputs="text", # 使用者輸入類型 | |
outputs="text", # 輸出類型 | |
title="TABC", # 介面標題 | |
description="The chatbot contains: Extracting YahooFinancial data, Scraping GRESB Website, and Retrieving 綠建築申請資料" # 介面描述 | |
) | |
# 啟動介面 | |
iface.launch() | |