Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import os
|
2 |
-
|
3 |
from langchain import PromptTemplate
|
4 |
from langchain.agents import initialize_agent, Tool
|
5 |
from langchain.agents import AgentType
|
@@ -20,14 +19,23 @@ import time
|
|
20 |
from duckduckgo_search import DDGS
|
21 |
from itertools import islice
|
22 |
|
23 |
-
# serper_api_key = os.environ.get('SERPER_API_KEY')
|
24 |
|
25 |
-
# 1. Tool for search
|
26 |
def search(query, max_retries=5):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
for attempt in range(max_retries):
|
28 |
try:
|
29 |
result = []
|
30 |
|
|
|
31 |
with DDGS() as ddgs:
|
32 |
response = ddgs.text(query, region='wt-wt', safesearch='Off', timelimit='y')
|
33 |
for r in islice(response, 20):
|
@@ -35,68 +43,40 @@ def search(query, max_retries=5):
|
|
35 |
return result
|
36 |
|
37 |
except requests.RequestException as e:
|
|
|
38 |
print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
|
39 |
-
if attempt < max_retries - 1:
|
40 |
time.sleep(1)
|
41 |
|
42 |
-
except Exception as e:
|
|
|
43 |
print(f"An unexpected error occurred on attempt {attempt + 1}: {e}. Retrying...")
|
44 |
if attempt < max_retries - 1:
|
45 |
time.sleep(1)
|
46 |
|
47 |
else:
|
|
|
48 |
print("Max retries reached. Exiting...")
|
49 |
return None
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
# def search(query, max_retries=5):
|
54 |
-
# url = "https://google.serper.dev/search"
|
55 |
-
|
56 |
-
# payload = json.dumps({
|
57 |
-
# "q": query
|
58 |
-
# })
|
59 |
-
|
60 |
-
# headers = {
|
61 |
-
# 'X-API-KEY': serper_api_key,
|
62 |
-
# 'Content-Type': 'application/json'
|
63 |
-
# }
|
64 |
-
|
65 |
-
# for attempt in range(max_retries):
|
66 |
-
# try:
|
67 |
-
# response = requests.request("POST", url, headers=headers, data=payload, verify=False)
|
68 |
-
|
69 |
-
# # Check if response is successful (e.g., HTTP 200 OK)
|
70 |
-
# if response.status_code == 200:
|
71 |
-
# print(response.text)
|
72 |
-
# return response.text
|
73 |
-
# else:
|
74 |
-
# print(f"Attempt {attempt + 1} failed with status code {response.status_code}. Retrying...")
|
75 |
-
# if attempt < max_retries - 1: # no need to sleep on the last attempt
|
76 |
-
# time.sleep(1)
|
77 |
-
# else:
|
78 |
-
# print("Max retries reached. Exiting...")
|
79 |
-
|
80 |
-
# except requests.RequestException as e:
|
81 |
-
# print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
|
82 |
-
# if attempt < max_retries - 1: # no need to sleep on the last attempt
|
83 |
-
# time.sleep(1)
|
84 |
-
# else:
|
85 |
-
# print("Max retries reached. Exiting...")
|
86 |
-
|
87 |
-
# return None
|
88 |
-
|
89 |
-
|
90 |
-
# 2. Tool for scraping
|
91 |
def scrape_website(objective: str, url: str):
|
92 |
-
|
93 |
-
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
print("Scraping website...")
|
96 |
try:
|
|
|
97 |
article = NewsPlease.from_url(url)
|
98 |
print(f'{article.title} - {article.url}')
|
99 |
text = article.maintext
|
|
|
100 |
if len(text) > 10000:
|
101 |
output = summary(objective, text)
|
102 |
return output
|
@@ -105,61 +85,67 @@ def scrape_website(objective: str, url: str):
|
|
105 |
except:
|
106 |
pass
|
107 |
|
108 |
-
|
109 |
def summary(objective, content):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
|
111 |
|
112 |
-
|
113 |
-
|
114 |
docs = text_splitter.create_documents([content])
|
|
|
115 |
map_prompt = """
|
116 |
Write a summary of the following text for {objective}:
|
117 |
"{text}"
|
118 |
SUMMARY:
|
119 |
"""
|
120 |
-
map_prompt_template = PromptTemplate(
|
121 |
-
template=map_prompt, input_variables=["text", "objective"])
|
122 |
-
|
123 |
-
summary_chain = load_summarize_chain(
|
124 |
-
llm=llm,
|
125 |
-
chain_type='map_reduce',
|
126 |
-
map_prompt=map_prompt_template,
|
127 |
-
combine_prompt=map_prompt_template,
|
128 |
-
verbose=True
|
129 |
-
)
|
130 |
|
131 |
-
|
|
|
132 |
|
|
|
133 |
return output
|
134 |
|
135 |
-
|
136 |
class ScrapeWebsiteInput(BaseModel):
|
137 |
-
"""Inputs for scrape_website"""
|
138 |
-
objective: str = Field(
|
139 |
-
description="The objective & task that users give to the agent")
|
140 |
url: str = Field(description="The url of the website to be scraped")
|
141 |
|
142 |
-
|
143 |
class ScrapeWebsiteTool(BaseTool):
|
|
|
|
|
|
|
144 |
name = "scrape_website"
|
145 |
description = "useful when you need to get data from a website url, passing both url and objective to the function; DO NOT make up any url, the url should only be from the search results"
|
146 |
args_schema: Type[BaseModel] = ScrapeWebsiteInput
|
147 |
|
148 |
def _run(self, objective: str, url: str):
|
|
|
149 |
return scrape_website(objective, url)
|
150 |
|
151 |
def _arun(self, url: str):
|
|
|
152 |
raise NotImplementedError("error here")
|
153 |
|
154 |
@cl.langchain_factory(use_async=False)
|
155 |
def run():
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
157 |
tools = [
|
158 |
-
Tool(
|
159 |
-
name="Search",
|
160 |
-
func=search,
|
161 |
-
description="useful for when you need to answer questions about current events, data. You should ask targeted questions"
|
162 |
-
),
|
163 |
ScrapeWebsiteTool(),
|
164 |
]
|
165 |
|
@@ -175,21 +161,14 @@ def run():
|
|
175 |
5/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research
|
176 |
6/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research"""
|
177 |
)
|
178 |
-
|
179 |
agent_kwargs = {
|
180 |
"extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
|
181 |
"system_message": system_message,
|
182 |
}
|
183 |
|
|
|
184 |
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
|
185 |
-
memory = ConversationSummaryBufferMemory(
|
186 |
-
|
187 |
-
|
188 |
-
return initialize_agent(
|
189 |
-
tools,
|
190 |
-
llm,
|
191 |
-
agent=AgentType.OPENAI_FUNCTIONS,
|
192 |
-
verbose=True,
|
193 |
-
agent_kwargs=agent_kwargs,
|
194 |
-
memory=memory,
|
195 |
-
)
|
|
|
1 |
import os
|
|
|
2 |
from langchain import PromptTemplate
|
3 |
from langchain.agents import initialize_agent, Tool
|
4 |
from langchain.agents import AgentType
|
|
|
19 |
from duckduckgo_search import DDGS
|
20 |
from itertools import islice
|
21 |
|
|
|
22 |
|
|
|
23 |
def search(query, max_retries=5):
|
24 |
+
"""
|
25 |
+
Search the given query using DuckDuckGo.
|
26 |
+
|
27 |
+
Args:
|
28 |
+
- query (str): The search query.
|
29 |
+
- max_retries (int): Maximum number of retries in case of request failure.
|
30 |
+
|
31 |
+
Returns:
|
32 |
+
- list[dict]: A list of search results with 'title' and 'url'.
|
33 |
+
"""
|
34 |
for attempt in range(max_retries):
|
35 |
try:
|
36 |
result = []
|
37 |
|
38 |
+
# Initialize the DuckDuckGo search object.
|
39 |
with DDGS() as ddgs:
|
40 |
response = ddgs.text(query, region='wt-wt', safesearch='Off', timelimit='y')
|
41 |
for r in islice(response, 20):
|
|
|
43 |
return result
|
44 |
|
45 |
except requests.RequestException as e:
|
46 |
+
# Handle request exceptions.
|
47 |
print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
|
48 |
+
if attempt < max_retries - 1:
|
49 |
time.sleep(1)
|
50 |
|
51 |
+
except Exception as e:
|
52 |
+
# Handle other exceptions.
|
53 |
print(f"An unexpected error occurred on attempt {attempt + 1}: {e}. Retrying...")
|
54 |
if attempt < max_retries - 1:
|
55 |
time.sleep(1)
|
56 |
|
57 |
else:
|
58 |
+
# If max retries reached, exit the function.
|
59 |
print("Max retries reached. Exiting...")
|
60 |
return None
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
def scrape_website(objective: str, url: str):
|
63 |
+
"""
|
64 |
+
Scrape and potentially summarize the content of a website based on a given objective.
|
65 |
|
66 |
+
Args:
|
67 |
+
- objective (str): The objective & task that users give to the agent.
|
68 |
+
- url (str): The URL of the website to be scraped.
|
69 |
+
|
70 |
+
Returns:
|
71 |
+
- str: Extracted or summarized content of the website.
|
72 |
+
"""
|
73 |
print("Scraping website...")
|
74 |
try:
|
75 |
+
# Use NewsPlease to scrape the website.
|
76 |
article = NewsPlease.from_url(url)
|
77 |
print(f'{article.title} - {article.url}')
|
78 |
text = article.maintext
|
79 |
+
# Summarize if content is too large.
|
80 |
if len(text) > 10000:
|
81 |
output = summary(objective, text)
|
82 |
return output
|
|
|
85 |
except:
|
86 |
pass
|
87 |
|
|
|
88 |
def summary(objective, content):
|
89 |
+
"""
|
90 |
+
Generate a summary for a given content based on the objective.
|
91 |
+
|
92 |
+
Args:
|
93 |
+
- objective (str): The objective for the summary.
|
94 |
+
- content (str): The content to be summarized.
|
95 |
+
|
96 |
+
Returns:
|
97 |
+
- str: Summarized content.
|
98 |
+
"""
|
99 |
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
|
100 |
|
101 |
+
# Split the content into manageable chunks.
|
102 |
+
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500)
|
103 |
docs = text_splitter.create_documents([content])
|
104 |
+
|
105 |
map_prompt = """
|
106 |
Write a summary of the following text for {objective}:
|
107 |
"{text}"
|
108 |
SUMMARY:
|
109 |
"""
|
110 |
+
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text", "objective"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
+
# Load the summary chain with necessary configurations.
|
113 |
+
summary_chain = load_summarize_chain(llm=llm, chain_type='map_reduce', map_prompt=map_prompt_template, combine_prompt=map_prompt_template, verbose=True)
|
114 |
|
115 |
+
output = summary_chain.run(input_documents=docs, objective=objective)
|
116 |
return output
|
117 |
|
|
|
118 |
class ScrapeWebsiteInput(BaseModel):
|
119 |
+
"""Inputs for scrape_website function."""
|
120 |
+
objective: str = Field(description="The objective & task that users give to the agent")
|
|
|
121 |
url: str = Field(description="The url of the website to be scraped")
|
122 |
|
|
|
123 |
class ScrapeWebsiteTool(BaseTool):
|
124 |
+
"""
|
125 |
+
A tool that provides functionality to scrape a website.
|
126 |
+
"""
|
127 |
name = "scrape_website"
|
128 |
description = "useful when you need to get data from a website url, passing both url and objective to the function; DO NOT make up any url, the url should only be from the search results"
|
129 |
args_schema: Type[BaseModel] = ScrapeWebsiteInput
|
130 |
|
131 |
def _run(self, objective: str, url: str):
|
132 |
+
"""Runs the scrape_website function."""
|
133 |
return scrape_website(objective, url)
|
134 |
|
135 |
def _arun(self, url: str):
|
136 |
+
"""Asynchronous version of _run. (Currently not implemented)"""
|
137 |
raise NotImplementedError("error here")
|
138 |
|
139 |
@cl.langchain_factory(use_async=False)
|
140 |
def run():
|
141 |
+
"""
|
142 |
+
Initialize and return a langchain agent with search and scraping tools.
|
143 |
+
|
144 |
+
Returns:
|
145 |
+
- Agent: Initialized langchain agent.
|
146 |
+
"""
|
147 |
tools = [
|
148 |
+
Tool(name="Search", func=search, description="useful for when you need to answer questions about current events, data. You should ask targeted questions"),
|
|
|
|
|
|
|
|
|
149 |
ScrapeWebsiteTool(),
|
150 |
]
|
151 |
|
|
|
161 |
5/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research
|
162 |
6/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research"""
|
163 |
)
|
|
|
164 |
agent_kwargs = {
|
165 |
"extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
|
166 |
"system_message": system_message,
|
167 |
}
|
168 |
|
169 |
+
# Initialize the ChatOpenAI model.
|
170 |
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
|
171 |
+
memory = ConversationSummaryBufferMemory(memory_key="memory", return_messages=True, llm=llm)
|
172 |
+
|
173 |
+
# Initialize the agent with tools and other configurations.
|
174 |
+
return initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True, agent_kwargs=agent_kwargs, memory=memory)
|
|
|
|
|
|
|
|
|
|
|
|
|
|