Spaces:
Sleeping
Sleeping
Didrik Nathaniel LLoyd Aasland Skjelbred
commited on
Commit
·
17973dc
1
Parent(s):
814460f
update
Browse files- retriever.py +183 -0
- tools.py +180 -0
retriever.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datasets
|
2 |
+
from langchain.docstore.document import Document
|
3 |
+
#-------------------------------------#
|
4 |
+
# Step 1: Load and Prepare the Dataset
|
5 |
+
#--------------------------------------#
|
6 |
+
# Load the dataset
|
7 |
+
guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train")
|
8 |
+
|
9 |
+
# Convert dataset entries into documents objects
|
10 |
+
docs = [
|
11 |
+
Document(
|
12 |
+
page_content="\n".join([
|
13 |
+
f"Name: {guest['name']}",
|
14 |
+
f"Relation: {guest['relation']}",
|
15 |
+
f"Description: {guest['description']}",
|
16 |
+
f"Email: {guest['email']}"
|
17 |
+
]),
|
18 |
+
metadata={"name": {guest['email']}}
|
19 |
+
)
|
20 |
+
for guest in guest_dataset
|
21 |
+
]
|
22 |
+
|
23 |
+
#In the code above, we:
|
24 |
+
# ● Load the dataset
|
25 |
+
# ● Convert each guest entry into a Document object with formatted content
|
26 |
+
# ● Store the Document objects in a list
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
#-------------------------------------#
|
33 |
+
# Step 2: Create the Retriever Tool
|
34 |
+
#--------------------------------------#
|
35 |
+
#We will use the BM25Retriever from the langchain_community.retrievers module to create a retriever tool.
|
36 |
+
from smolagents import Tool
|
37 |
+
from langchain_community.retrievers import BM25Retriever
|
38 |
+
|
39 |
+
class GuestInfoRetriever(Tool):
|
40 |
+
name = "Guest_Info_Retreiver"
|
41 |
+
description = "Retrieves detailed information about gala guests based on their name or relation."
|
42 |
+
inputs = {
|
43 |
+
"query": {
|
44 |
+
"type": "string",
|
45 |
+
"description": "The name or relation of the guest you want information about."
|
46 |
+
}
|
47 |
+
}
|
48 |
+
output_type = "string"
|
49 |
+
|
50 |
+
def __init__(self,docs):
|
51 |
+
self.is_initialized = False
|
52 |
+
self.retriever = BM25Retriever.from_documents(docs)
|
53 |
+
|
54 |
+
def forward(self, query: str):
|
55 |
+
results = self.retriever.get_relevant_documents(query)
|
56 |
+
if results:
|
57 |
+
response_text = []
|
58 |
+
for doc in results[:3]: #Henter 3 første tabellene/dokumentene i datasettet
|
59 |
+
lines = doc.page_content.split("\n")
|
60 |
+
name = ""
|
61 |
+
description = ""
|
62 |
+
for line in lines:
|
63 |
+
if line.startswith("Name"):
|
64 |
+
name = line.split("Name: ")[1].strip()
|
65 |
+
if line.startswith("Description"):
|
66 |
+
description = line.split("Description: ")[1].strip()
|
67 |
+
|
68 |
+
conversation_starter = f"Conversation starter: you could ask {name} about {description.lower()}"
|
69 |
+
response = doc.page_content + "\n" + conversation_starter
|
70 |
+
response_text.append(response)
|
71 |
+
|
72 |
+
if results:
|
73 |
+
return "\n\n".join(response_text)
|
74 |
+
else:
|
75 |
+
return "No mathing guest information found"
|
76 |
+
|
77 |
+
guest_info_tool = GuestInfoRetriever(docs)
|
78 |
+
|
79 |
+
#Let’s understand this tool step-by-step:
|
80 |
+
|
81 |
+
#The (name) and (description) help the agent understand when and how to use this tool
|
82 |
+
#The (inputs) define what parameters the tool expects (in this case, a search query)
|
83 |
+
#We’re using a (BM25Retriever), which is a powerful text retrieval algorithm that doesn’t require (embeddings)
|
84 |
+
#The (forward) method processes the query and returns the most relevant guest information
|
85 |
+
|
86 |
+
|
87 |
+
#---------------------------------------#
|
88 |
+
# Step 3: Integrate the Tool with Alfred
|
89 |
+
#---------------------------------------#
|
90 |
+
#Finally, let’s bring everything together by creating our agent and equipping it with our custom tool:
|
91 |
+
|
92 |
+
from smolagents import CodeAgent, InferenceClientModel
|
93 |
+
|
94 |
+
|
95 |
+
#Initalize the hugging face model
|
96 |
+
model = InferenceClientModel()
|
97 |
+
|
98 |
+
#Create Alfred, our gala agent, with the guest info tool
|
99 |
+
alfred = CodeAgent(tools=[guest_info_tool], model=model)
|
100 |
+
|
101 |
+
#Exsample query Alfred might receive during the gala
|
102 |
+
response = alfred.run("Tell me about our guest named 'Lady Ada Lovelace")
|
103 |
+
|
104 |
+
print("🎩 Alfred's Response:")
|
105 |
+
print(response)
|
106 |
+
|
107 |
+
####OUTPUT#######
|
108 |
+
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------# ││ Tell me about our guest named 'Lady Ada Lovelace ││ │╰─ InferenceClientModel - Qwen/Qwen2.5-Coder-32B-Instruct ��─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 1 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
109 |
+
# ─ Executing parsed code: ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
110 |
+
# lady_ada_info = Guest_Info_Retreiver(query="Lady Ada Lovelace")
|
111 |
+
# print(lady_ada_info)
|
112 |
+
# ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
113 |
+
# c:\Users\didri\Desktop\AI-Agents\AI-Agent\Huggingface Agent Course\unit-3 Use Case for Agentic RAG\Text files\Building Agent (RAG) Project\Smolagents\retriever.py:58: LangChainDeprecationWarning: The method `BaseRetriever.get_relevant_documents` was deprecated in langchain-core 0.1.46 and will be removed in 1.0. Use :meth:`~invoke` instead.
|
114 |
+
# results = self.retriever.get_relevant_documents(query)
|
115 |
+
# Execution logs:
|
116 |
+
# Name: Ada Lovelace
|
117 |
+
# Relation: best friend
|
118 |
+
# Description: Lady Ada Lovelace is my best friend. She is an esteemed mathematician and friend. She is renowned for her pioneering work in mathematics and computing, often celebrated as the first computer programmer due to her work on Charles Babbage's Analytical Engine.
|
119 |
+
# Email: [email protected]
|
120 |
+
|
121 |
+
# Name: Marie Curie
|
122 |
+
# Relation: no relation
|
123 |
+
# Description: Marie Curie was a groundbreaking physicist and chemist, famous for her research on radioactivity.
|
124 |
+
# Email: [email protected]
|
125 |
+
|
126 |
+
# Name: Dr. Nikola Tesla
|
127 |
+
# Relation: old friend from university days
|
128 |
+
# Description: Dr. Nikola Tesla is an old friend from your university days. He's recently patented a new wireless energy transmission system and would be delighted to discuss it with you. Just remember he's passionate about pigeons, so that might make for good small talk.
|
129 |
+
# Email: [email protected]
|
130 |
+
|
131 |
+
# Out: None
|
132 |
+
# [Step 1: Duration 3.59 seconds| Input tokens: 2,058 | Output tokens: 68]
|
133 |
+
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Step 2 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ─ Executing parsed code: ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
134 |
+
# # Splitting the observation into lines
|
135 |
+
# lines = lady_ada_info.split('\n')
|
136 |
+
|
137 |
+
# # Extracting the relevant information
|
138 |
+
# name = lines[0].split(': ')[1]
|
139 |
+
# relation = lines[1].split(': ')[1]
|
140 |
+
# description = lines[2].split(': ')[1]
|
141 |
+
|
142 |
+
# # Formatting the final answer
|
143 |
+
# final_answer_string = f"Name: {name}\nRelation: {relation}\nDescription: {description}"
|
144 |
+
# final_answer(final_answer_string)
|
145 |
+
# ──────────────────────────────────────────────────────────────────────────────────────────────────────────────��────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
146 |
+
# Out - Final answer: Name: Ada Lovelace
|
147 |
+
# Relation: best friend
|
148 |
+
# Description: Lady Ada Lovelace is my best friend. She is an esteemed mathematician and friend. She is renowned for her pioneering work in mathematics and computing, often celebrated as the first computer programmer due to her work on Charles Babbage's Analytical Engine.
|
149 |
+
# [Step 2: Duration 4.17 seconds| Input tokens: 4,463 | Output tokens: 198]
|
150 |
+
# 🎩 Alfred's Response:
|
151 |
+
# Name: Ada Lovelace
|
152 |
+
# Relation: best friend
|
153 |
+
# Description: Lady Ada Lovelace is my best friend. She is an esteemed mathematician and friend. She is renowned for her pioneering work in mathematics and computing, often celebrated as the first computer programmer due to her work on Charles Babbage's Analytical Engine.
|
154 |
+
# PS C:\Users\didri\Desktop\AI-Agents\AI-Agent\Huggingface Agent Course>
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
|
159 |
+
|
160 |
+
|
161 |
+
|
162 |
+
|
163 |
+
|
164 |
+
#---------------------------------------#
|
165 |
+
# Example Interaction
|
166 |
+
#---------------------------------------#
|
167 |
+
# During the gala, a conversation might flow like this:
|
168 |
+
# You: “Alfred, who is that gentleman talking to the ambassador?”
|
169 |
+
# Alfred: quickly searches the guest database “That’s Dr. Nikola Tesla, sir. He’s an old friend from your university days.
|
170 |
+
# He’s recently patented a new wireless energy transmission system and would be delighted to discuss it with you. Just remember he’s passionate about pigeons, so that might make for good small talk.”
|
171 |
+
|
172 |
+
|
173 |
+
|
174 |
+
#---------------------------------------#
|
175 |
+
# Taking It Further
|
176 |
+
#---------------------------------------#
|
177 |
+
# Now that Alfred can retrieve guest information, consider how you might enhance this system:
|
178 |
+
|
179 |
+
# 1. Improve the retriever to use a more sophisticated algorithm like sentence-transformers
|
180 |
+
# 2. Implement a conversation memory so Alfred remembers previous interactions
|
181 |
+
# 3. Combine with web search to get the latest information on unfamiliar guests
|
182 |
+
# 4. Integrate multiple indexes to get more complete information from verified sources
|
183 |
+
#Now Alfred is fully equipped to handle guest inquiries effortlessly, ensuring your gala is remembered as the most sophisticated and delightful event of the century!
|
tools.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#Building and Integrating Tools for Your Agent
|
2 |
+
|
3 |
+
|
4 |
+
#---------------------------------#
|
5 |
+
# Give Your Agent Access to the Web
|
6 |
+
#---------------------------------#
|
7 |
+
from smolagents import DuckDuckGoSearchTool
|
8 |
+
from typing import List
|
9 |
+
|
10 |
+
|
11 |
+
#Initalize the DuckDuckgo search tool
|
12 |
+
search_tool = DuckDuckGoSearchTool()
|
13 |
+
|
14 |
+
#Exsample usage:
|
15 |
+
results = search_tool("Who's the current president of France?")
|
16 |
+
print(results)
|
17 |
+
# Expected output: The current President of France in Emmanuel Macron.
|
18 |
+
|
19 |
+
|
20 |
+
#-----------------------------------------------------------------------------------------#
|
21 |
+
# Creating a Custom Tool that can be used to get the latest news about a specific topic.
|
22 |
+
#-----------------------------------------------------------------------------------------#
|
23 |
+
from newsapi import NewsApiClient
|
24 |
+
class GetLatestNewsTool(Tool):
|
25 |
+
name="Latest_news"
|
26 |
+
description="""Fetch the latest breaking headline news worldwide. supports filtering by keyword, country, category, or specific sources.
|
27 |
+
Supports filtering by keyword, country, category, or specific sources.
|
28 |
+
**Note:** you cannot use 'sources' together with 'country' or 'category';
|
29 |
+
choose either sources OR country/category filters.
|
30 |
+
"""
|
31 |
+
inputs = {
|
32 |
+
"Query": {
|
33 |
+
"type": "string",
|
34 |
+
"description": "keywords or phrase to search for in headlines."
|
35 |
+
},
|
36 |
+
"Country": {
|
37 |
+
"type": "string",
|
38 |
+
"descripton": "2-letter country code (e.g., 'us', 'gb'). optional",
|
39 |
+
"required": False
|
40 |
+
},
|
41 |
+
"category": {
|
42 |
+
"type": "string",
|
43 |
+
"description": "News category (e.g, 'buisness', 'sports'). optional",
|
44 |
+
"required": False
|
45 |
+
},
|
46 |
+
"sources": {
|
47 |
+
"type": "string",
|
48 |
+
"description": "Comma-seperated list of news source ID'S to get headliens from. Optional",
|
49 |
+
"required": False
|
50 |
+
}
|
51 |
+
}
|
52 |
+
output = {
|
53 |
+
"articles": {
|
54 |
+
"type": "list",
|
55 |
+
"description": "List of matching news articles. Each article contains: "
|
56 |
+
"`source` (ID and name), `author`, `title`, `description`,"
|
57 |
+
"`url`, `urlToImage`, `PublishedAt`, and `content`."
|
58 |
+
}
|
59 |
+
}
|
60 |
+
|
61 |
+
def __init__(self, api_key):
|
62 |
+
self.newsapi = NewsApiClient(api_key=api_key)
|
63 |
+
|
64 |
+
def run(self, Query=None, Country=None, Category=None, sources= None):
|
65 |
+
"""
|
66 |
+
Run the tool: Call NewsApi with the provided filters.
|
67 |
+
"""
|
68 |
+
if sources and (Country or Category):
|
69 |
+
return "You cannot use `sources` together with 'country' or 'category'"
|
70 |
+
|
71 |
+
|
72 |
+
response = self.newsapi.get_top_headlines(
|
73 |
+
q=Query,
|
74 |
+
country=Country,
|
75 |
+
category=Category,
|
76 |
+
sources=sources
|
77 |
+
)
|
78 |
+
|
79 |
+
return response
|
80 |
+
|
81 |
+
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
|
86 |
+
|
87 |
+
|
88 |
+
#------------------------------------------------------------------------------#
|
89 |
+
# Creating a Custom Tool for Weather Information to Schedule the Fireworks
|
90 |
+
#------------------------------------------------------------------------------#
|
91 |
+
from smolagents import Tool
|
92 |
+
import random
|
93 |
+
|
94 |
+
class weatherinfoTool(Tool):
|
95 |
+
name = "weather_info"
|
96 |
+
description ="Fetches dummy weather information for a given location."
|
97 |
+
inputs = {
|
98 |
+
"location": {
|
99 |
+
"type": "string",
|
100 |
+
"description": "The location to get weather information for."
|
101 |
+
}
|
102 |
+
}
|
103 |
+
output_type = "string"
|
104 |
+
|
105 |
+
def forward(self, location: str):
|
106 |
+
# Dummy weather data
|
107 |
+
weather_conditions = [
|
108 |
+
{"condition": "Rainy", "temp_c": 15},
|
109 |
+
{"condition": "Clear", "temp_c": 25},
|
110 |
+
{"condition": "Windy", "temp_c": 20}
|
111 |
+
]
|
112 |
+
#Randomaly select a weather condition
|
113 |
+
data = random.choice(weather_conditions)
|
114 |
+
return f"Weather in {location}: {data['condition']}, {data['temp_c']}°C"
|
115 |
+
|
116 |
+
#Initalize the tool
|
117 |
+
weather_info_tool = weatherinfoTool()
|
118 |
+
|
119 |
+
|
120 |
+
#--------------------------------------------------------#
|
121 |
+
# Creating a Hub Stats Tool for Influential AI Builders
|
122 |
+
#--------------------------------------------------------#
|
123 |
+
from smolagents import Tool
|
124 |
+
from huggingface_hub import list_models
|
125 |
+
|
126 |
+
class HubStatsTool(Tool):
|
127 |
+
name = "hub_stats"
|
128 |
+
description = "Fetches the most downloaded model from a specific author on the Hugging Face Hub."
|
129 |
+
inputs = {
|
130 |
+
"author": {
|
131 |
+
"type": "string",
|
132 |
+
"description": "The username of the model author/organization to find models from."
|
133 |
+
}
|
134 |
+
}
|
135 |
+
output_type = "string"
|
136 |
+
|
137 |
+
def forward(self, author: str):
|
138 |
+
try:
|
139 |
+
models = List(list_models(author=author, sort="downloads", direction=-1, limit=1))
|
140 |
+
if models:
|
141 |
+
model = models[0]
|
142 |
+
return f"The most downloaded model by {author} is {model.id} with {model.downloads:,} downloads"
|
143 |
+
else:
|
144 |
+
return f"No models found for author: {author}."
|
145 |
+
except Exception as e:
|
146 |
+
print(f"Error fetching model for {author}: {str(e)}")
|
147 |
+
|
148 |
+
#Initalize the tool
|
149 |
+
hub_stats_tool = HubStatsTool()
|
150 |
+
|
151 |
+
#Exsample usage
|
152 |
+
print(hub_stats_tool("facebook"))
|
153 |
+
#Expected output: The most downloaded model by facebook is facebook/esmfold_v1 with 12,544,550 downloads.
|
154 |
+
|
155 |
+
#--------------------------------------------------------#
|
156 |
+
# Integrating Tools with Alfred
|
157 |
+
#--------------------------------------------------------#
|
158 |
+
|
159 |
+
from smolagents import CodeAgent, InferenceClientModel
|
160 |
+
|
161 |
+
model = InferenceClientModel()
|
162 |
+
|
163 |
+
alfred = CodeAgent(
|
164 |
+
tools=[search_tool, weather_info_tool, hub_stats_tool,GetLatestNewsTool()],
|
165 |
+
model=model
|
166 |
+
)
|
167 |
+
|
168 |
+
#Exsample query Alfred might recieve during the gala
|
169 |
+
response = alfred.run("what is facebook and what's their most popular model?")
|
170 |
+
print("🎩 Alfred's Response:")
|
171 |
+
print(response)
|
172 |
+
#Expected output: 🎩 Alfred's Response: Facebook is a social networking website where users can connect, share information, and interact with others. The most downloaded model by Facebook on the Hugging Face Hub is ESMFold_v1.
|
173 |
+
|
174 |
+
|
175 |
+
#--------------------------------------------------------#
|
176 |
+
# Conclusion
|
177 |
+
#--------------------------------------------------------#
|
178 |
+
#By integrating these tools, Alfred is now equipped to handle a variety of tasks, from web searches to weather updates and model statistics. This ensures he remains the most informed and engaging host at the gala.
|
179 |
+
|
180 |
+
|