snrspeaks commited on
Commit
f04acf1
·
1 Parent(s): 860b13f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -84
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import os
2
-
3
  from langchain import PromptTemplate
4
  from langchain.agents import initialize_agent, Tool
5
  from langchain.agents import AgentType
@@ -20,14 +19,23 @@ import time
20
  from duckduckgo_search import DDGS
21
  from itertools import islice
22
 
23
- # serper_api_key = os.environ.get('SERPER_API_KEY')
24
 
25
- # 1. Tool for search
26
  def search(query, max_retries=5):
 
 
 
 
 
 
 
 
 
 
27
  for attempt in range(max_retries):
28
  try:
29
  result = []
30
 
 
31
  with DDGS() as ddgs:
32
  response = ddgs.text(query, region='wt-wt', safesearch='Off', timelimit='y')
33
  for r in islice(response, 20):
@@ -35,68 +43,40 @@ def search(query, max_retries=5):
35
  return result
36
 
37
  except requests.RequestException as e:
 
38
  print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
39
- if attempt < max_retries - 1: # no need to sleep on the last attempt
40
  time.sleep(1)
41
 
42
- except Exception as e: # Generic error handling
 
43
  print(f"An unexpected error occurred on attempt {attempt + 1}: {e}. Retrying...")
44
  if attempt < max_retries - 1:
45
  time.sleep(1)
46
 
47
  else:
 
48
  print("Max retries reached. Exiting...")
49
  return None
50
 
51
-
52
-
53
- # def search(query, max_retries=5):
54
- # url = "https://google.serper.dev/search"
55
-
56
- # payload = json.dumps({
57
- # "q": query
58
- # })
59
-
60
- # headers = {
61
- # 'X-API-KEY': serper_api_key,
62
- # 'Content-Type': 'application/json'
63
- # }
64
-
65
- # for attempt in range(max_retries):
66
- # try:
67
- # response = requests.request("POST", url, headers=headers, data=payload, verify=False)
68
-
69
- # # Check if response is successful (e.g., HTTP 200 OK)
70
- # if response.status_code == 200:
71
- # print(response.text)
72
- # return response.text
73
- # else:
74
- # print(f"Attempt {attempt + 1} failed with status code {response.status_code}. Retrying...")
75
- # if attempt < max_retries - 1: # no need to sleep on the last attempt
76
- # time.sleep(1)
77
- # else:
78
- # print("Max retries reached. Exiting...")
79
-
80
- # except requests.RequestException as e:
81
- # print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
82
- # if attempt < max_retries - 1: # no need to sleep on the last attempt
83
- # time.sleep(1)
84
- # else:
85
- # print("Max retries reached. Exiting...")
86
-
87
- # return None
88
-
89
-
90
- # 2. Tool for scraping
91
  def scrape_website(objective: str, url: str):
92
- # scrape website, and also will summarize the content based on objective if the content is too large
93
- # objective is the original objective & task that user give to the agent, url is the url of the website to be scraped
94
 
 
 
 
 
 
 
 
95
  print("Scraping website...")
96
  try:
 
97
  article = NewsPlease.from_url(url)
98
  print(f'{article.title} - {article.url}')
99
  text = article.maintext
 
100
  if len(text) > 10000:
101
  output = summary(objective, text)
102
  return output
@@ -105,61 +85,67 @@ def scrape_website(objective: str, url: str):
105
  except:
106
  pass
107
 
108
-
109
  def summary(objective, content):
 
 
 
 
 
 
 
 
 
 
110
  llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
111
 
112
- text_splitter = RecursiveCharacterTextSplitter(
113
- separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500)
114
  docs = text_splitter.create_documents([content])
 
115
  map_prompt = """
116
  Write a summary of the following text for {objective}:
117
  "{text}"
118
  SUMMARY:
119
  """
120
- map_prompt_template = PromptTemplate(
121
- template=map_prompt, input_variables=["text", "objective"])
122
-
123
- summary_chain = load_summarize_chain(
124
- llm=llm,
125
- chain_type='map_reduce',
126
- map_prompt=map_prompt_template,
127
- combine_prompt=map_prompt_template,
128
- verbose=True
129
- )
130
 
131
- output = summary_chain.run(input_documents=docs, objective=objective)
 
132
 
 
133
  return output
134
 
135
-
136
  class ScrapeWebsiteInput(BaseModel):
137
- """Inputs for scrape_website"""
138
- objective: str = Field(
139
- description="The objective & task that users give to the agent")
140
  url: str = Field(description="The url of the website to be scraped")
141
 
142
-
143
  class ScrapeWebsiteTool(BaseTool):
 
 
 
144
  name = "scrape_website"
145
  description = "useful when you need to get data from a website url, passing both url and objective to the function; DO NOT make up any url, the url should only be from the search results"
146
  args_schema: Type[BaseModel] = ScrapeWebsiteInput
147
 
148
  def _run(self, objective: str, url: str):
 
149
  return scrape_website(objective, url)
150
 
151
  def _arun(self, url: str):
 
152
  raise NotImplementedError("error here")
153
 
154
  @cl.langchain_factory(use_async=False)
155
  def run():
156
- # 3. Create langchain agent with the tools above
 
 
 
 
 
157
  tools = [
158
- Tool(
159
- name="Search",
160
- func=search,
161
- description="useful for when you need to answer questions about current events, data. You should ask targeted questions"
162
- ),
163
  ScrapeWebsiteTool(),
164
  ]
165
 
@@ -175,21 +161,14 @@ def run():
175
  5/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research
176
  6/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research"""
177
  )
178
-
179
  agent_kwargs = {
180
  "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
181
  "system_message": system_message,
182
  }
183
 
 
184
  llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
185
- memory = ConversationSummaryBufferMemory(
186
- memory_key="memory", return_messages=True, llm=llm)
187
-
188
- return initialize_agent(
189
- tools,
190
- llm,
191
- agent=AgentType.OPENAI_FUNCTIONS,
192
- verbose=True,
193
- agent_kwargs=agent_kwargs,
194
- memory=memory,
195
- )
 
1
  import os
 
2
  from langchain import PromptTemplate
3
  from langchain.agents import initialize_agent, Tool
4
  from langchain.agents import AgentType
 
19
  from duckduckgo_search import DDGS
20
  from itertools import islice
21
 
 
22
 
 
23
  def search(query, max_retries=5):
24
+ """
25
+ Search the given query using DuckDuckGo.
26
+
27
+ Args:
28
+ - query (str): The search query.
29
+ - max_retries (int): Maximum number of retries in case of request failure.
30
+
31
+ Returns:
32
+ - list[dict]: A list of search results with 'title' and 'url'.
33
+ """
34
  for attempt in range(max_retries):
35
  try:
36
  result = []
37
 
38
+ # Initialize the DuckDuckGo search object.
39
  with DDGS() as ddgs:
40
  response = ddgs.text(query, region='wt-wt', safesearch='Off', timelimit='y')
41
  for r in islice(response, 20):
 
43
  return result
44
 
45
  except requests.RequestException as e:
46
+ # Handle request exceptions.
47
  print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
48
+ if attempt < max_retries - 1:
49
  time.sleep(1)
50
 
51
+ except Exception as e:
52
+ # Handle other exceptions.
53
  print(f"An unexpected error occurred on attempt {attempt + 1}: {e}. Retrying...")
54
  if attempt < max_retries - 1:
55
  time.sleep(1)
56
 
57
  else:
58
+ # If max retries reached, exit the function.
59
  print("Max retries reached. Exiting...")
60
  return None
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  def scrape_website(objective: str, url: str):
63
+ """
64
+ Scrape and potentially summarize the content of a website based on a given objective.
65
 
66
+ Args:
67
+ - objective (str): The objective & task that users give to the agent.
68
+ - url (str): The URL of the website to be scraped.
69
+
70
+ Returns:
71
+ - str: Extracted or summarized content of the website.
72
+ """
73
  print("Scraping website...")
74
  try:
75
+ # Use NewsPlease to scrape the website.
76
  article = NewsPlease.from_url(url)
77
  print(f'{article.title} - {article.url}')
78
  text = article.maintext
79
+ # Summarize if content is too large.
80
  if len(text) > 10000:
81
  output = summary(objective, text)
82
  return output
 
85
  except:
86
  pass
87
 
 
88
  def summary(objective, content):
89
+ """
90
+ Generate a summary for a given content based on the objective.
91
+
92
+ Args:
93
+ - objective (str): The objective for the summary.
94
+ - content (str): The content to be summarized.
95
+
96
+ Returns:
97
+ - str: Summarized content.
98
+ """
99
  llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
100
 
101
+ # Split the content into manageable chunks.
102
+ text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500)
103
  docs = text_splitter.create_documents([content])
104
+
105
  map_prompt = """
106
  Write a summary of the following text for {objective}:
107
  "{text}"
108
  SUMMARY:
109
  """
110
+ map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text", "objective"])
 
 
 
 
 
 
 
 
 
111
 
112
+ # Load the summary chain with necessary configurations.
113
+ summary_chain = load_summarize_chain(llm=llm, chain_type='map_reduce', map_prompt=map_prompt_template, combine_prompt=map_prompt_template, verbose=True)
114
 
115
+ output = summary_chain.run(input_documents=docs, objective=objective)
116
  return output
117
 
 
118
  class ScrapeWebsiteInput(BaseModel):
119
+ """Inputs for scrape_website function."""
120
+ objective: str = Field(description="The objective & task that users give to the agent")
 
121
  url: str = Field(description="The url of the website to be scraped")
122
 
 
123
  class ScrapeWebsiteTool(BaseTool):
124
+ """
125
+ A tool that provides functionality to scrape a website.
126
+ """
127
  name = "scrape_website"
128
  description = "useful when you need to get data from a website url, passing both url and objective to the function; DO NOT make up any url, the url should only be from the search results"
129
  args_schema: Type[BaseModel] = ScrapeWebsiteInput
130
 
131
  def _run(self, objective: str, url: str):
132
+ """Runs the scrape_website function."""
133
  return scrape_website(objective, url)
134
 
135
  def _arun(self, url: str):
136
+ """Asynchronous version of _run. (Currently not implemented)"""
137
  raise NotImplementedError("error here")
138
 
139
  @cl.langchain_factory(use_async=False)
140
  def run():
141
+ """
142
+ Initialize and return a langchain agent with search and scraping tools.
143
+
144
+ Returns:
145
+ - Agent: Initialized langchain agent.
146
+ """
147
  tools = [
148
+ Tool(name="Search", func=search, description="useful for when you need to answer questions about current events, data. You should ask targeted questions"),
 
 
 
 
149
  ScrapeWebsiteTool(),
150
  ]
151
 
 
161
  5/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research
162
  6/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research"""
163
  )
 
164
  agent_kwargs = {
165
  "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
166
  "system_message": system_message,
167
  }
168
 
169
+ # Initialize the ChatOpenAI model.
170
  llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
171
+ memory = ConversationSummaryBufferMemory(memory_key="memory", return_messages=True, llm=llm)
172
+
173
+ # Initialize the agent with tools and other configurations.
174
+ return initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True, agent_kwargs=agent_kwargs, memory=memory)