Spaces:
Running
Running
updates
Browse files
app.py
CHANGED
@@ -1,196 +1,20 @@
|
|
1 |
-
|
2 |
-
import os
|
3 |
from PIL import Image
|
4 |
import sys
|
5 |
|
6 |
-
from omegaconf import OmegaConf
|
7 |
-
import requests
|
8 |
-
from typing import Tuple
|
9 |
-
from bs4 import BeautifulSoup
|
10 |
-
|
11 |
import streamlit as st
|
12 |
from streamlit_pills import pills
|
13 |
|
14 |
-
from
|
15 |
-
load_dotenv(override=True)
|
16 |
|
17 |
-
from
|
18 |
-
from vectara_agent.agent import Agent, AgentStatusType
|
19 |
-
from vectara_agent.tools import ToolsFactory, VectaraToolFactory
|
20 |
-
from vectara_agent.tools_catalog import summarize_text
|
21 |
|
22 |
initial_prompt = "How can I help you today?"
|
23 |
|
24 |
-
get_headers = {
|
25 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0",
|
26 |
-
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
|
27 |
-
"Accept-Language": "en-US,en;q=0.5",
|
28 |
-
"Accept-Encoding": "gzip, deflate",
|
29 |
-
"Connection": "keep-alive",
|
30 |
-
}
|
31 |
-
|
32 |
-
|
33 |
-
def create_assistant_tools(cfg):
|
34 |
-
|
35 |
-
class QueryHackerNews(BaseModel):
|
36 |
-
query: str = Field(..., description="The user query.")
|
37 |
-
|
38 |
-
vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key,
|
39 |
-
vectara_customer_id=cfg.customer_id,
|
40 |
-
vectara_corpus_id=cfg.corpus_id)
|
41 |
-
tools_factory = ToolsFactory()
|
42 |
-
|
43 |
-
ask_hackernews = vec_factory.create_rag_tool(
|
44 |
-
tool_name = "ask_hackernews",
|
45 |
-
tool_description = """
|
46 |
-
Responds to query based on information and stories in hacker news from the last 6-9 months.
|
47 |
-
""",
|
48 |
-
tool_args_schema = QueryHackerNews,
|
49 |
-
reranker = "multilingual_reranker_v1", rerank_k = 100,
|
50 |
-
n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005,
|
51 |
-
summary_num_results = 10,
|
52 |
-
vectara_summarizer = 'vectara-summary-ext-24-05-med-omni',
|
53 |
-
include_citations = True,
|
54 |
-
)
|
55 |
-
|
56 |
-
def get_top_stories(
|
57 |
-
n_stories: int = Field(default=10, description="The number of top stories to return.")
|
58 |
-
) -> list[str]:
|
59 |
-
"""
|
60 |
-
Get the top stories from hacker news.
|
61 |
-
Returns a list of story IDS for the top stories right now. These are the top stories on hacker news.
|
62 |
-
"""
|
63 |
-
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
64 |
-
top_stories = requests.get(f"{db_url}topstories.json").json()
|
65 |
-
return top_stories[:n_stories]
|
66 |
-
|
67 |
-
def get_show_stories(
|
68 |
-
n_stories: int = Field(default=10, description="The number of top SHOW HN stories to return.")
|
69 |
-
) -> list[str]:
|
70 |
-
"""
|
71 |
-
Get the top SHOW HN stories from hacker news.
|
72 |
-
Returns a list of story IDS for the top SHOW HN stories right now. These are stories where users show their projects.
|
73 |
-
"""
|
74 |
-
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
75 |
-
top_stories = requests.get(f"{db_url}showstories.json").json()
|
76 |
-
return top_stories[:n_stories]
|
77 |
-
|
78 |
-
def get_ask_stories(
|
79 |
-
n_stories: int = Field(default=10, description="The number of top ASK HN stories to return.")
|
80 |
-
) -> list[str]:
|
81 |
-
"""
|
82 |
-
Get the top ASK HN stories from hacker news.
|
83 |
-
Returns a list of story IDS for the top ASK HN stories right now. These are stories where users ask questions to the community.
|
84 |
-
"""
|
85 |
-
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
86 |
-
top_stories = requests.get(f"{db_url}askstories.json").json()
|
87 |
-
return top_stories[:n_stories]
|
88 |
-
|
89 |
-
def get_story_details(
|
90 |
-
story_id: str = Field(..., description="The story ID.")
|
91 |
-
) -> Tuple[str, str]:
|
92 |
-
"""
|
93 |
-
Get the title of a story from hacker news.
|
94 |
-
Returns:
|
95 |
-
- The title of the story (str)
|
96 |
-
- The main URL of the story (str)
|
97 |
-
- The external link pointed to in the story (str)
|
98 |
-
"""
|
99 |
-
db_url = 'https://hacker-news.firebaseio.com/v0/'
|
100 |
-
story = requests.get(f"{db_url}item/{story_id}.json").json()
|
101 |
-
story_url = f'https://news.ycombinator.com/item?id={story_id}'
|
102 |
-
return story['title'], story_url, story['url'],
|
103 |
-
|
104 |
-
def get_story_text(
|
105 |
-
story_id: str = Field(..., description="The story ID.")
|
106 |
-
) -> str:
|
107 |
-
"""
|
108 |
-
Get the text of the story from hacker news (original text + all comments)
|
109 |
-
Returns the extracted text of the story as a string.
|
110 |
-
"""
|
111 |
-
url = f'https://news.ycombinator.com/item?id={story_id}'
|
112 |
-
html = requests.get(url, headers=get_headers).text
|
113 |
-
soup = BeautifulSoup(html, 'html5lib')
|
114 |
-
for element in soup.find_all(['script', 'style']):
|
115 |
-
element.decompose()
|
116 |
-
text = soup.get_text(" ", strip=True).replace('\n', ' ')
|
117 |
-
return text
|
118 |
-
|
119 |
-
def whats_new(
|
120 |
-
n_stories: int = Field(default=10, description="The number of new stories to return.")
|
121 |
-
) -> list[str]:
|
122 |
-
"""
|
123 |
-
Provides a succint summary of what is new in the hackernews community
|
124 |
-
by summarizing the content and comments of top stories.
|
125 |
-
Returns a string with the summary.
|
126 |
-
"""
|
127 |
-
stories = get_top_stories(n_stories)
|
128 |
-
texts = [get_story_text(story_id) for story_id in stories[:n_stories]]
|
129 |
-
all_stories = '---------\n\n'.join(texts)
|
130 |
-
return summarize_text(all_stories)
|
131 |
-
|
132 |
-
def update_story(
|
133 |
-
story_id: str = Field(..., description="The story ID.")
|
134 |
-
) -> str:
|
135 |
-
"""
|
136 |
-
Update the story with the latest information.
|
137 |
-
Returns a string indicating that the story has been updated.
|
138 |
-
"""
|
139 |
-
title, story_url, external_link = get_story_details(story_id)
|
140 |
-
print(f"{title}: ({story_url})")
|
141 |
-
return "story updated!"
|
142 |
-
|
143 |
-
return (
|
144 |
-
[tools_factory.create_tool(tool) for tool in
|
145 |
-
[
|
146 |
-
get_top_stories,
|
147 |
-
get_show_stories,
|
148 |
-
get_ask_stories,
|
149 |
-
get_story_details,
|
150 |
-
get_story_text,
|
151 |
-
whats_new,
|
152 |
-
update_story, ### TEPM DEBUG
|
153 |
-
]
|
154 |
-
] +
|
155 |
-
tools_factory.get_llama_index_tools("tavily_research", "TavilyToolSpec", api_key=cfg.tavily_api_key) +
|
156 |
-
tools_factory.standard_tools() +
|
157 |
-
tools_factory.guardrail_tools() +
|
158 |
-
[ask_hackernews]
|
159 |
-
)
|
160 |
-
|
161 |
-
def initialize_agent(_cfg):
|
162 |
-
if 'agent' in st.session_state:
|
163 |
-
return st.session_state.agent
|
164 |
-
|
165 |
-
bot_instructions = """
|
166 |
-
- You are a helpful assistant, with expertise in answering user questions based on Hacker News stories and comments.
|
167 |
-
- IMPORTANT: Use the ask_hackernews tool to find relevant Hacker News stories and respond to user queries based on that information.
|
168 |
-
- Never discuss politics, and always respond politely.
|
169 |
-
- IMPORTANT: when you include links to Hacker News stories, use the actual title of the story as the link's displayed text.
|
170 |
-
Don't use text like "Source" which doesn't tell the user what the link is about.
|
171 |
-
- Don't include external links in your responses unless the user asks for them.
|
172 |
-
- Give slight preference to newer stories when answering questions.
|
173 |
-
"""
|
174 |
-
|
175 |
-
def update_func(status_type: AgentStatusType, msg: str):
|
176 |
-
if status_type != AgentStatusType.AGENT_UPDATE:
|
177 |
-
output = f"{status_type.value} - {msg}"
|
178 |
-
st.session_state.log_messages.append(output)
|
179 |
-
|
180 |
-
agent = Agent(
|
181 |
-
tools=create_assistant_tools(_cfg),
|
182 |
-
topic="hacker news",
|
183 |
-
custom_instructions=bot_instructions,
|
184 |
-
update_func=update_func
|
185 |
-
)
|
186 |
-
agent.report()
|
187 |
-
return agent
|
188 |
-
|
189 |
def toggle_logs():
|
190 |
st.session_state.show_logs = not st.session_state.show_logs
|
191 |
|
192 |
def show_example_questions():
|
193 |
-
if len(st.session_state.example_messages) > 0 and st.session_state.first_turn:
|
194 |
selected_example = pills("Queries to Try:", st.session_state.example_messages, index=None)
|
195 |
if selected_example:
|
196 |
st.session_state.ex_prompt = selected_example
|
@@ -198,6 +22,12 @@ def show_example_questions():
|
|
198 |
return True
|
199 |
return False
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
def launch_bot():
|
202 |
def reset():
|
203 |
st.session_state.messages = [{"role": "assistant", "content": initial_prompt, "avatar": "🦖"}]
|
@@ -207,31 +37,25 @@ def launch_bot():
|
|
207 |
st.session_state.ex_prompt = None
|
208 |
st.session_state.first_turn = True
|
209 |
st.session_state.show_logs = False
|
|
|
|
|
210 |
|
211 |
-
st.set_page_config(page_title="Hacker News Bot", layout="wide")
|
212 |
if 'cfg' not in st.session_state:
|
213 |
-
cfg =
|
214 |
-
|
215 |
-
'corpus_id': str(os.environ['VECTARA_CORPUS_ID']),
|
216 |
-
'api_key': str(os.environ['VECTARA_API_KEY']),
|
217 |
-
'examples': os.environ.get('QUERY_EXAMPLES', None),
|
218 |
-
'tavily_api_key': str(os.environ['TAVILY_API_KEY']),
|
219 |
-
})
|
220 |
st.session_state.cfg = cfg
|
221 |
st.session_state.ex_prompt = None
|
222 |
example_messages = [example.strip() for example in cfg.examples.split(",")] if cfg.examples else []
|
223 |
st.session_state.example_messages = [em for em in example_messages if len(em)>0]
|
224 |
-
reset()
|
225 |
-
|
226 |
cfg = st.session_state.cfg
|
227 |
-
if 'agent' not in st.session_state:
|
228 |
-
st.session_state.agent = initialize_agent(cfg)
|
229 |
|
230 |
# left side content
|
231 |
with st.sidebar:
|
232 |
image = Image.open('Vectara-logo.png')
|
233 |
st.image(image, width=175)
|
234 |
-
st.markdown("##
|
|
|
235 |
|
236 |
st.markdown("\n\n")
|
237 |
bc1, _ = st.columns([1, 1])
|
@@ -246,7 +70,6 @@ def launch_bot():
|
|
246 |
"This app was built with [Vectara](https://vectara.com).\n\n"
|
247 |
"It demonstrates the use of Agentic RAG functionality with Vectara"
|
248 |
)
|
249 |
-
st.markdown("---")
|
250 |
|
251 |
if "messages" not in st.session_state.keys():
|
252 |
reset()
|
@@ -277,7 +100,7 @@ def launch_bot():
|
|
277 |
print(f"Starting new question: {prompt}\n")
|
278 |
st.write(prompt)
|
279 |
st.session_state.ex_prompt = None
|
280 |
-
|
281 |
# Generate a new response if last message is not from assistant
|
282 |
if st.session_state.prompt:
|
283 |
with st.chat_message("assistant", avatar='🤖'):
|
@@ -289,6 +112,7 @@ def launch_bot():
|
|
289 |
st.markdown(res)
|
290 |
st.session_state.ex_prompt = None
|
291 |
st.session_state.prompt = None
|
|
|
292 |
st.rerun()
|
293 |
|
294 |
log_placeholder = st.empty()
|
@@ -301,9 +125,7 @@ def launch_bot():
|
|
301 |
if len(st.session_state.log_messages) > 0:
|
302 |
st.button("Show Logs", on_click=toggle_logs)
|
303 |
|
304 |
-
|
305 |
sys.stdout.flush()
|
306 |
|
307 |
if __name__ == "__main__":
|
308 |
-
launch_bot()
|
309 |
-
|
|
|
|
|
|
|
1 |
from PIL import Image
|
2 |
import sys
|
3 |
|
|
|
|
|
|
|
|
|
|
|
4 |
import streamlit as st
|
5 |
from streamlit_pills import pills
|
6 |
|
7 |
+
from vectara_agent.agent import AgentStatusType
|
|
|
8 |
|
9 |
+
from agent import initialize_agent, get_agent_config
|
|
|
|
|
|
|
10 |
|
11 |
initial_prompt = "How can I help you today?"
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def toggle_logs():
|
14 |
st.session_state.show_logs = not st.session_state.show_logs
|
15 |
|
16 |
def show_example_questions():
|
17 |
+
if len(st.session_state.example_messages) > 0 and st.session_state.first_turn:
|
18 |
selected_example = pills("Queries to Try:", st.session_state.example_messages, index=None)
|
19 |
if selected_example:
|
20 |
st.session_state.ex_prompt = selected_example
|
|
|
22 |
return True
|
23 |
return False
|
24 |
|
25 |
+
def update_func(status_type: AgentStatusType, msg: str):
|
26 |
+
if status_type != AgentStatusType.AGENT_UPDATE:
|
27 |
+
output = f"{status_type.value} - {msg}"
|
28 |
+
st.session_state.log_messages.append(output)
|
29 |
+
|
30 |
+
|
31 |
def launch_bot():
|
32 |
def reset():
|
33 |
st.session_state.messages = [{"role": "assistant", "content": initial_prompt, "avatar": "🦖"}]
|
|
|
37 |
st.session_state.ex_prompt = None
|
38 |
st.session_state.first_turn = True
|
39 |
st.session_state.show_logs = False
|
40 |
+
if 'agent' not in st.session_state:
|
41 |
+
st.session_state.agent = initialize_agent(cfg, update_func=update_func)
|
42 |
|
|
|
43 |
if 'cfg' not in st.session_state:
|
44 |
+
cfg = get_agent_config()
|
45 |
+
st.set_page_config(page_title=cfg['title'], layout="wide")
|
|
|
|
|
|
|
|
|
|
|
46 |
st.session_state.cfg = cfg
|
47 |
st.session_state.ex_prompt = None
|
48 |
example_messages = [example.strip() for example in cfg.examples.split(",")] if cfg.examples else []
|
49 |
st.session_state.example_messages = [em for em in example_messages if len(em)>0]
|
50 |
+
reset()
|
|
|
51 |
cfg = st.session_state.cfg
|
|
|
|
|
52 |
|
53 |
# left side content
|
54 |
with st.sidebar:
|
55 |
image = Image.open('Vectara-logo.png')
|
56 |
st.image(image, width=175)
|
57 |
+
st.markdown(f"## {cfg['demo_welcome']}")
|
58 |
+
st.markdown(f"{cfg['demo_description']}")
|
59 |
|
60 |
st.markdown("\n\n")
|
61 |
bc1, _ = st.columns([1, 1])
|
|
|
70 |
"This app was built with [Vectara](https://vectara.com).\n\n"
|
71 |
"It demonstrates the use of Agentic RAG functionality with Vectara"
|
72 |
)
|
|
|
73 |
|
74 |
if "messages" not in st.session_state.keys():
|
75 |
reset()
|
|
|
100 |
print(f"Starting new question: {prompt}\n")
|
101 |
st.write(prompt)
|
102 |
st.session_state.ex_prompt = None
|
103 |
+
|
104 |
# Generate a new response if last message is not from assistant
|
105 |
if st.session_state.prompt:
|
106 |
with st.chat_message("assistant", avatar='🤖'):
|
|
|
112 |
st.markdown(res)
|
113 |
st.session_state.ex_prompt = None
|
114 |
st.session_state.prompt = None
|
115 |
+
st.session_state.first_turn = False
|
116 |
st.rerun()
|
117 |
|
118 |
log_placeholder = st.empty()
|
|
|
125 |
if len(st.session_state.log_messages) > 0:
|
126 |
st.button("Show Logs", on_click=toggle_logs)
|
127 |
|
|
|
128 |
sys.stdout.flush()
|
129 |
|
130 |
if __name__ == "__main__":
|
131 |
+
launch_bot()
|
|