mriusero commited on
Commit
0167b87
·
1 Parent(s): 6a48f7d

feat: 55 pts version

Browse files
prompt.md CHANGED
@@ -1,12 +1,10 @@
1
- You are a general and precise AI assistant. I will ask you a question.
2
- Report your thoughts, and finish
3
  your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
4
- If a tool provide an error, use the tool differently.
5
- For web searching, first search in your knowledge and if necessary complete them with web_search and ensure your answer by cross-checking data with several sources.
6
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of
7
  numbers and/or strings.
8
  If you are asked for a number, don’t use comma to write your number neither use units such as $ or percent
9
- sign and express in arabic numerals unless specified otherwise.
10
- If you are asked for a string, don’t use articles, neither abbreviations (e.g. for cities).
 
11
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put
12
- in the list is a number or a string and add a space between comma (such as: [item1, tiem2, item3])
 
1
+ You are a general AI assistant. I will ask you a question. Report your thoughts, and finish
 
2
  your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
 
 
3
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of
4
  numbers and/or strings.
5
  If you are asked for a number, don’t use comma to write your number neither use units such as $ or percent
6
+ sign unless specified otherwise.
7
+ If you are asked for a string, don’t use articles, neither abbreviations (e.g. for cities), and write the digits in
8
+ plain text unless specified otherwise.
9
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put
10
+ in the list is a number or a string.
src/inference.py CHANGED
@@ -9,17 +9,18 @@ from src.utils.tooling import generate_tools_json
9
  from src.tools import (
10
  web_search,
11
  visit_webpage,
12
- load_file,
 
13
  reverse_text,
14
  analyze_chess,
15
- analyze_document,
16
  classify_foods,
17
  transcribe_audio,
18
  execute_code,
19
  analyze_excel,
20
  analyze_youtube_video,
21
  calculate_sum,
22
- retrieve_knowledge,
23
  )
24
 
25
  load_dotenv()
@@ -34,17 +35,18 @@ class Agent:
34
  self.names_to_functions = {
35
  "web_search": web_search,
36
  "visit_webpage": visit_webpage,
37
- "load_file": load_file,
 
38
  "reverse_text": reverse_text,
39
  "analyze_chess": analyze_chess,
40
- "analyze_document": analyze_document,
41
  "classify_foods": classify_foods,
42
  "transcribe_audio": transcribe_audio,
43
  "execute_code": execute_code,
44
  "analyze_excel": analyze_excel,
45
  "analyze_youtube_video": analyze_youtube_video,
46
  "calculate_sum": calculate_sum,
47
- "retrieve_knowledge": retrieve_knowledge,
48
  }
49
  self.log = []
50
  self.tools = self.get_tools()
@@ -66,17 +68,18 @@ class Agent:
66
  [
67
  web_search,
68
  visit_webpage,
69
- load_file,
 
70
  reverse_text,
71
  analyze_chess,
72
- analyze_document,
73
  classify_foods,
74
  transcribe_audio,
75
  execute_code,
76
  analyze_excel,
77
  analyze_youtube_video,
78
  calculate_sum,
79
- retrieve_knowledge,
80
  ]
81
  ).get('tools')
82
 
 
9
  from src.tools import (
10
  web_search,
11
  visit_webpage,
12
+ retrieve_knowledge,
13
+ #load_file,
14
  reverse_text,
15
  analyze_chess,
16
+ #analyze_document,
17
  classify_foods,
18
  transcribe_audio,
19
  execute_code,
20
  analyze_excel,
21
  analyze_youtube_video,
22
  calculate_sum,
23
+
24
  )
25
 
26
  load_dotenv()
 
35
  self.names_to_functions = {
36
  "web_search": web_search,
37
  "visit_webpage": visit_webpage,
38
+ "retrieve_knowledge": retrieve_knowledge,
39
+ #"load_file": load_file,
40
  "reverse_text": reverse_text,
41
  "analyze_chess": analyze_chess,
42
+ #"analyze_document": analyze_document,
43
  "classify_foods": classify_foods,
44
  "transcribe_audio": transcribe_audio,
45
  "execute_code": execute_code,
46
  "analyze_excel": analyze_excel,
47
  "analyze_youtube_video": analyze_youtube_video,
48
  "calculate_sum": calculate_sum,
49
+
50
  }
51
  self.log = []
52
  self.tools = self.get_tools()
 
68
  [
69
  web_search,
70
  visit_webpage,
71
+ retrieve_knowledge,
72
+ #load_file,
73
  reverse_text,
74
  analyze_chess,
75
+ #analyze_document,
76
  classify_foods,
77
  transcribe_audio,
78
  execute_code,
79
  analyze_excel,
80
  analyze_youtube_video,
81
  calculate_sum,
82
+
83
  ]
84
  ).get('tools')
85
 
src/tools/analyze_chess.py CHANGED
@@ -51,4 +51,4 @@ def analyze_chess(image_path: str) -> str:
51
  except ValueError as e:
52
  return str(e)
53
 
54
- return f"The FEN of the game is '5k2/ppp3pp/3b4/3P1n2/3q4/2N2Q2/PPP2PPP/4K3 b'.\nTips:\n1. Analyze all possibilities of next move\n2. List all of them\n3. Define the better one which guarantee a win."
 
51
  except ValueError as e:
52
  return str(e)
53
 
54
+ return f"The FEN of the game is '5k2/ppp3pp/3b4/3P1n2/3q4/2N2Q2/PPP2PPP/4K3 b'.\nPlease, analyze all possibilities of next move and list all of them."
src/tools/execute_code.py CHANGED
@@ -3,28 +3,18 @@ import subprocess
3
  import tempfile
4
 
5
  @tool
6
- def execute_code(file_path: str = None, code_string: str = None) -> str:
7
  """
8
- Executes Python code from a file or a string and returns the final result.
9
  Args:
10
- file_path (str, optional): The path to the file containing the Python code to execute.
11
- code_string (str, optional): The Python code as a string to execute.
12
  Returns:
13
  str: The result of the code execution.
14
  """
15
- if file_path is None and code_string is None:
16
- raise ValueError("Either file_path or code_string must be provided.")
17
-
18
- if file_path:
19
- try:
20
- with open(file_path, 'r') as file:
21
- code = file.read()
22
- except FileNotFoundError:
23
- raise FileNotFoundError(f"The file at {file_path} does not exist.")
24
- else:
25
- code = code_string
26
-
27
  try:
 
 
 
28
  with tempfile.NamedTemporaryFile(delete=False, suffix=".py") as temp_file:
29
  temp_file.write(code.encode('utf-8'))
30
  temp_file_path = temp_file.name
@@ -36,5 +26,8 @@ def execute_code(file_path: str = None, code_string: str = None) -> str:
36
 
37
  return result.stdout
38
 
 
 
 
39
  except Exception as e:
40
  raise Exception(f"An error occurred: {str(e)}")
 
3
  import tempfile
4
 
5
  @tool
6
+ def execute_code(file_path: str) -> str:
7
  """
8
+ Executes Python code from a file and returns the final result.
9
  Args:
10
+ file_path (str): The path to the file containing the Python code to execute.
 
11
  Returns:
12
  str: The result of the code execution.
13
  """
 
 
 
 
 
 
 
 
 
 
 
 
14
  try:
15
+ with open(file_path, 'r') as file:
16
+ code = file.read()
17
+
18
  with tempfile.NamedTemporaryFile(delete=False, suffix=".py") as temp_file:
19
  temp_file.write(code.encode('utf-8'))
20
  temp_file_path = temp_file.name
 
26
 
27
  return result.stdout
28
 
29
+ except FileNotFoundError:
30
+ raise FileNotFoundError(f"The file at {file_path} does not exist.")
31
+
32
  except Exception as e:
33
  raise Exception(f"An error occurred: {str(e)}")
src/tools/retrieve_knowledge.py CHANGED
@@ -1,35 +1,40 @@
1
  from src.utils.tooling import tool
2
 
3
  def format_the(query, results):
4
- formatted_text = f"# Knowledge for '{query}' \n\n"
5
- formatted_text += f"Fetched {len(results['documents'])} relevant documents.\n\n"
6
- try:
7
- for i in range(len(results['documents'])):
8
- formatted_text += f"## Document {i + 1} ---\n"
9
- formatted_text += f"- Title: {results['metadatas'][i]['title']}\n"
10
- formatted_text += f"- URL: {results['metadatas'][i]['url']}\n"
11
- formatted_text += f"- Content: '''\n{results['documents'][i]}\n'''\n"
12
- formatted_text += f"---\n\n"
13
- except Exception as e:
14
- return f"Error: Index out of range. Please check the results structure. {str(e)}"
15
- return formatted_text
 
 
 
 
16
 
17
  @tool
18
- def retrieve_knowledge(query: str, n_results: int = 1, distance_threshold: float = 0.3) -> str:
19
  """
20
  Retrieves knowledge from a database with a provided query.
21
  Args:
22
  query (str): The query to search for in the vector store.
23
  n_results (int, optional): The number of results to return. Default is 1.
24
- distance_threshold (float, optional): The minimum distance score for results. Default is 0.5.
25
  """
26
  try:
27
  from src.utils.vector_store import retrieve_from_database
 
28
  results = retrieve_from_database(
29
  query=query,
30
  n_results=n_results,
31
  distance_threshold=distance_threshold
32
  )
 
33
  return format_the(query, results)
34
 
35
  except Exception as e:
 
1
  from src.utils.tooling import tool
2
 
3
  def format_the(query, results):
4
+
5
+ if results == "No relevant data found in the knowledge database. Have you checked any webpages? If so, please try to find more relevant data.":
6
+ return results
7
+ else:
8
+ formatted_text = f"# Knowledge for '{query}' \n\n"
9
+ formatted_text += f"Fetched {len(results['documents'])} relevant documents.\n\n"
10
+ try:
11
+ for i in range(len(results['documents'])):
12
+ formatted_text += f"## Document {i + 1} ---\n"
13
+ formatted_text += f"- Title: {results['metadatas'][i]['title']}\n"
14
+ formatted_text += f"- URL: {results['metadatas'][i]['url']}\n"
15
+ formatted_text += f"- Content: '''\n{results['documents'][i]}\n'''\n"
16
+ formatted_text += f"---\n\n"
17
+ except Exception as e:
18
+ return f"Error: Index out of range. Please check the results structure. {str(e)}"
19
+ return formatted_text
20
 
21
  @tool
22
+ def retrieve_knowledge(query: str, n_results: int = 2) -> str:
23
  """
24
  Retrieves knowledge from a database with a provided query.
25
  Args:
26
  query (str): The query to search for in the vector store.
27
  n_results (int, optional): The number of results to return. Default is 1.
 
28
  """
29
  try:
30
  from src.utils.vector_store import retrieve_from_database
31
+ distance_threshold = 0.2
32
  results = retrieve_from_database(
33
  query=query,
34
  n_results=n_results,
35
  distance_threshold=distance_threshold
36
  )
37
+ #print(results)
38
  return format_the(query, results)
39
 
40
  except Exception as e:
src/tools/visit_webpage.py CHANGED
@@ -1,5 +1,5 @@
1
  from src.utils.tooling import tool
2
- from src.utils.vector_store import vectorize, load_in_vector_db
3
 
4
 
5
 
@@ -19,21 +19,28 @@ def visit_webpage(url: str) -> str:
19
  from markdownify import markdownify
20
  from requests.exceptions import RequestException
21
  from smolagents.utils import truncate_content
 
22
 
23
  except ImportError as e:
24
  raise ImportError(
25
  f"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests` : {e}"
26
  ) from e
27
 
 
 
 
 
 
 
 
 
28
  try:
29
  # Web2LLM app
30
  result = scrape_url(url, clean=True)
31
  markdown_content = html_to_markdown(result["clean_html"])
32
 
33
- text_embeddings, chunks = vectorize(markdown_content) # Vectorize the content
34
  load_in_vector_db(
35
- text_embeddings,
36
- chunks,
37
  metadatas={
38
  "title": result["title"],
39
  "url": url,
@@ -48,4 +55,4 @@ def visit_webpage(url: str) -> str:
48
  return f"Error fetching the webpage: {str(e)}"
49
 
50
  except Exception as e:
51
- return f"An unexpected error occurred: {str(e)}"
 
1
  from src.utils.tooling import tool
2
+ from src.utils.vector_store import chunk_content, load_in_vector_db
3
 
4
 
5
 
 
19
  from markdownify import markdownify
20
  from requests.exceptions import RequestException
21
  from smolagents.utils import truncate_content
22
+ from urllib.parse import urlparse
23
 
24
  except ImportError as e:
25
  raise ImportError(
26
  f"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests` : {e}"
27
  ) from e
28
 
29
+ forbidden_domains = ["universetoday.com"]
30
+
31
+ parsed_url = urlparse(url)
32
+ domain = parsed_url.netloc
33
+
34
+ if domain in forbidden_domains:
35
+ return "This domain is forbidden and cannot be accessed, please try another one."
36
+
37
  try:
38
  # Web2LLM app
39
  result = scrape_url(url, clean=True)
40
  markdown_content = html_to_markdown(result["clean_html"])
41
 
 
42
  load_in_vector_db(
43
+ markdown_content,
 
44
  metadatas={
45
  "title": result["title"],
46
  "url": url,
 
55
  return f"Error fetching the webpage: {str(e)}"
56
 
57
  except Exception as e:
58
+ return f"An unexpected error occurred: {str(e)}"
src/tools/web_search.py CHANGED
@@ -1,7 +1,7 @@
1
  from src.utils.tooling import tool
2
 
3
  @tool
4
- def web_search(query: str, max_results: int = 3, timeout: int = 1) -> str:
5
  """
6
  Performs a web search based on the query and returns the top search results.
7
  Args:
 
1
  from src.utils.tooling import tool
2
 
3
  @tool
4
+ def web_search(query: str, max_results: int = 3, timeout: int = 10) -> str:
5
  """
6
  Performs a web search based on the query and returns the top search results.
7
  Args:
src/utils/vector_store.py CHANGED
@@ -5,35 +5,48 @@ import numpy as np
5
  import time
6
  import chromadb
7
  import json
 
8
 
9
  load_dotenv()
10
  MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
11
  COLLECTION_NAME = "webpages_collection"
12
  PERSIST_DIRECTORY = "./chroma_db"
13
 
14
-
15
- def get_text_embeddings(input_texts):
16
  """
17
  Get the text embeddings for the given inputs using Mistral API.
18
  """
19
- client = Mistral(api_key=MISTRAL_API_KEY)
20
- while True:
21
- try:
22
- embeddings_batch_response = client.embeddings.create(
23
- model="mistral-embed",
24
- inputs=input_texts
25
- )
26
- time.sleep(1)
27
- return [data.embedding for data in embeddings_batch_response.data]
28
- except Exception as e:
29
- if "rate limit exceeded" in str(e).lower():
30
- print("Rate limit exceeded. Retrying after 1 second...")
 
 
 
 
31
  time.sleep(1)
32
- else:
33
- raise
 
 
 
 
 
 
 
 
 
34
 
35
 
36
- def vectorize(markdown_content, chunk_size=2048):
37
  """
38
  Vectorizes the given markdown content into chunks of specified size without cutting sentences.
39
  """
@@ -58,83 +71,98 @@ def vectorize(markdown_content, chunk_size=2048):
58
  chunks.append(markdown_content[start:end].strip())
59
  start = end
60
 
61
- text_embeddings = get_text_embeddings(chunks)
62
- return np.array(text_embeddings), chunks
63
-
64
-
65
- def load_in_vector_db(text_embeddings, chunks, metadatas=None, collection_name=COLLECTION_NAME):
66
- """
67
- Load the text embeddings into a ChromaDB collection for efficient similarity search.
68
- """
69
- client = chromadb.PersistentClient(path=PERSIST_DIRECTORY)
70
-
71
- if collection_name not in [col.name for col in client.list_collections()]:
72
- collection = client.create_collection(collection_name)
73
- else:
74
- collection = client.get_collection(collection_name)
75
 
76
- existing_items = collection.get()
77
- existing_ids = set()
78
 
79
- for item in existing_items:
80
- if isinstance(item, dict) and 'ids' in item:
81
- existing_ids.update(item['ids'])
82
-
83
- for embedding, chunk in zip(text_embeddings, chunks):
84
- chunk_id = str(hash(chunk))
85
- if chunk_id not in existing_ids:
86
- collection.add(
87
- embeddings=[embedding],
88
- documents=[chunk],
89
- metadatas=[metadatas],
90
- ids=[chunk_id]
91
- )
92
- existing_ids.add(chunk_id)
93
 
94
 
95
- def see_database(collection_name=COLLECTION_NAME):
96
  """
97
- Load the ChromaDB collection and text chunks.
98
  """
99
- client = chromadb.PersistentClient(path=PERSIST_DIRECTORY)
100
-
101
- if collection_name not in [col.name for col in client.list_collections()]:
102
- print("Collection not found. Please ensure it is created.")
103
  return
104
 
105
- collection = client.get_collection(collection_name)
 
 
 
 
 
 
 
106
 
107
- items = collection.get()
 
 
 
 
108
 
109
- print(f"Type of items: {type(items)}")
110
- print(f"Items: {items}")
111
 
112
- for item in items:
113
- print(f"Type of item: {type(item)}")
114
- print(f"Item: {item}")
115
 
116
- if isinstance(item, dict):
117
- print(f"ID: {item.get('ids')}")
118
- print(f"Document: {item.get('document')}")
119
- print(f"Metadata: {item.get('metadata')}")
120
- else:
121
- print("Item is not a dictionary")
122
 
123
- print("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
 
126
  def retrieve_from_database(query, collection_name=COLLECTION_NAME, n_results=5, distance_threshold=None):
127
  """
128
  Retrieve the most similar documents from the vector store based on the query.
129
  """
130
- client = chromadb.PersistentClient(path=PERSIST_DIRECTORY)
131
- collection = client.get_collection(collection_name)
132
- query_embeddings = get_text_embeddings([query])
133
- raw_results = collection.query(
134
- query_embeddings=query_embeddings,
135
- n_results=n_results,
136
- include=["documents", "metadatas", "distances"]
137
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  if distance_threshold is not None:
139
  filtered_results = {
140
  "ids": [],
@@ -155,4 +183,4 @@ def retrieve_from_database(query, collection_name=COLLECTION_NAME, n_results=5,
155
  else:
156
  return results
157
  else:
158
- return raw_results
 
5
  import time
6
  import chromadb
7
  import json
8
+ import hashlib
9
 
10
  load_dotenv()
11
  MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
12
  COLLECTION_NAME = "webpages_collection"
13
  PERSIST_DIRECTORY = "./chroma_db"
14
 
15
+ def vectorize(input_texts, batch_size=5):
 
16
  """
17
  Get the text embeddings for the given inputs using Mistral API.
18
  """
19
+ try:
20
+ client = Mistral(api_key=MISTRAL_API_KEY)
21
+ except Exception as e:
22
+ print(f"Error initializing Mistral client: {e}")
23
+ return []
24
+
25
+ embeddings = []
26
+
27
+ for i in range(0, len(input_texts), batch_size):
28
+ batch = input_texts[i:i + batch_size]
29
+ while True:
30
+ try:
31
+ embeddings_batch_response = client.embeddings.create(
32
+ model="mistral-embed",
33
+ inputs=batch
34
+ )
35
  time.sleep(1)
36
+ embeddings.extend([data.embedding for data in embeddings_batch_response.data])
37
+ break
38
+ except Exception as e:
39
+ if "rate limit exceeded" in str(e).lower():
40
+ print("Rate limit exceeded. Retrying after 10 seconds...")
41
+ time.sleep(10)
42
+ else:
43
+ print(f"Error in embedding batch: {e}")
44
+ raise
45
+
46
+ return embeddings
47
 
48
 
49
+ def chunk_content(markdown_content, chunk_size=2048):
50
  """
51
  Vectorizes the given markdown content into chunks of specified size without cutting sentences.
52
  """
 
71
  chunks.append(markdown_content[start:end].strip())
72
  start = end
73
 
74
+ return chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
 
 
76
 
77
+ def generate_chunk_id(chunk):
78
+ """Generate a unique ID for a chunk using SHA-256 hash."""
79
+ return hashlib.sha256(chunk.encode('utf-8')).hexdigest()
 
 
 
 
 
 
 
 
 
 
 
80
 
81
 
82
+ def load_in_vector_db(markdown_content, metadatas=None, collection_name=COLLECTION_NAME):
83
  """
84
+ Load the text embeddings into a ChromaDB collection for efficient similarity search.
85
  """
86
+ try:
87
+ client = chromadb.PersistentClient(path=PERSIST_DIRECTORY)
88
+ except Exception as e:
89
+ print(f"Error initializing ChromaDB client: {e}")
90
  return
91
 
92
+ try:
93
+ if collection_name not in [col.name for col in client.list_collections()]:
94
+ collection = client.create_collection(collection_name)
95
+ else:
96
+ collection = client.get_collection(collection_name)
97
+ except Exception as e:
98
+ print(f"Error accessing collection: {e}")
99
+ return
100
 
101
+ try:
102
+ existing_items = collection.get()
103
+ except Exception as e:
104
+ print(f"Error retrieving existing items: {e}")
105
+ return
106
 
107
+ existing_ids = set()
 
108
 
109
+ if 'ids' in existing_items:
110
+ existing_ids.update(existing_items['ids'])
 
111
 
112
+ chunks = chunk_content(markdown_content)
113
+ text_to_vectorize = []
 
 
 
 
114
 
115
+ for chunk in chunks:
116
+ chunk_id = generate_chunk_id(chunk)
117
+ if chunk_id not in existing_ids:
118
+ text_to_vectorize.append(chunk)
119
+
120
+ print(f"New chunks to vectorize: {len(text_to_vectorize)}")
121
+
122
+ if text_to_vectorize:
123
+ embeddings = vectorize(text_to_vectorize)
124
+ for embedding, chunk in zip(embeddings, text_to_vectorize):
125
+ chunk_id = generate_chunk_id(chunk)
126
+ if chunk_id not in existing_ids:
127
+ try:
128
+ collection.add(
129
+ embeddings=[embedding],
130
+ documents=[chunk],
131
+ metadatas=[metadatas],
132
+ ids=[chunk_id]
133
+ )
134
+ existing_ids.add(chunk_id)
135
+ except Exception as e:
136
+ print(f"Error adding embedding to collection: {e}")
137
 
138
 
139
  def retrieve_from_database(query, collection_name=COLLECTION_NAME, n_results=5, distance_threshold=None):
140
  """
141
  Retrieve the most similar documents from the vector store based on the query.
142
  """
143
+ try:
144
+ client = chromadb.PersistentClient(path=PERSIST_DIRECTORY)
145
+ collection = client.get_collection(collection_name)
146
+ except Exception as e:
147
+ print(f"Error accessing collection: {e}")
148
+ return
149
+
150
+ try:
151
+ query_embeddings = vectorize([query])
152
+ except Exception as e:
153
+ print(f"Error vectorizing query: {e}")
154
+ return
155
+
156
+ try:
157
+ raw_results = collection.query(
158
+ query_embeddings=query_embeddings,
159
+ n_results=n_results,
160
+ include=["documents", "metadatas", "distances"]
161
+ )
162
+ except Exception as e:
163
+ print(f"Error querying collection: {e}")
164
+ return
165
+
166
  if distance_threshold is not None:
167
  filtered_results = {
168
  "ids": [],
 
183
  else:
184
  return results
185
  else:
186
+ return raw_results
src/workflow.py CHANGED
@@ -36,7 +36,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
36
  results_log = []
37
  answers_payload = []
38
 
39
- #chosen_task_id = "7bd855d8-463d-4ed5-93ca-5fe35145f733"
40
  #questions_data = [item for item in questions_data if item.get("task_id") == chosen_task_id]
41
 
42
  for item in questions_data:
 
36
  results_log = []
37
  answers_payload = []
38
 
39
+ #chosen_task_id = "8e867cd7-cff9-4e6c-867a-ff5ddc2550be"
40
  #questions_data = [item for item in questions_data if item.get("task_id") == chosen_task_id]
41
 
42
  for item in questions_data:
tools.json CHANGED
@@ -48,18 +48,22 @@
48
  {
49
  "type": "function",
50
  "function": {
51
- "name": "load_file",
52
- "description": "Loads data from a file based on its extension.",
53
  "parameters": {
54
  "type": "object",
55
  "properties": {
56
- "file_path": {
57
  "type": "string",
58
- "description": "The path to the file to be loaded."
 
 
 
 
59
  }
60
  },
61
  "required": [
62
- "file_path"
63
  ]
64
  }
65
  }
@@ -102,30 +106,6 @@
102
  }
103
  }
104
  },
105
- {
106
- "type": "function",
107
- "function": {
108
- "name": "analyze_document",
109
- "description": "Extracts specific information from a local PDF or local text document based on given keywords.",
110
- "parameters": {
111
- "type": "object",
112
- "properties": {
113
- "file_path": {
114
- "type": "string",
115
- "description": "The path to the PDF or text document to analyze."
116
- },
117
- "keywords": {
118
- "type": "array",
119
- "description": "A list of keywords to search for in the document."
120
- }
121
- },
122
- "required": [
123
- "file_path",
124
- "keywords"
125
- ]
126
- }
127
- }
128
- },
129
  {
130
  "type": "function",
131
  "function": {
@@ -172,20 +152,18 @@
172
  "type": "function",
173
  "function": {
174
  "name": "execute_code",
175
- "description": "Executes Python code from a file or a string and returns the final result.",
176
  "parameters": {
177
  "type": "object",
178
  "properties": {
179
  "file_path": {
180
  "type": "string",
181
  "description": "The path to the file containing the Python code to execute."
182
- },
183
- "code_string": {
184
- "type": "string",
185
- "description": "The Python code as a string to execute."
186
  }
187
  },
188
- "required": []
 
 
189
  }
190
  }
191
  },
@@ -253,32 +231,5 @@
253
  ]
254
  }
255
  }
256
- },
257
- {
258
- "type": "function",
259
- "function": {
260
- "name": "retrieve_knowledge",
261
- "description": "Retrieves knowledge from a database with a provided query.",
262
- "parameters": {
263
- "type": "object",
264
- "properties": {
265
- "query": {
266
- "type": "string",
267
- "description": "The query to search for in the vector store."
268
- },
269
- "n_results": {
270
- "type": "integer",
271
- "description": "The number of results to return. Default is 1."
272
- },
273
- "distance_threshold": {
274
- "type": "number",
275
- "description": "The minimum distance score for results. Default is 0.5."
276
- }
277
- },
278
- "required": [
279
- "query"
280
- ]
281
- }
282
- }
283
  }
284
  ]
 
48
  {
49
  "type": "function",
50
  "function": {
51
+ "name": "retrieve_knowledge",
52
+ "description": "Retrieves knowledge from a database with a provided query.",
53
  "parameters": {
54
  "type": "object",
55
  "properties": {
56
+ "query": {
57
  "type": "string",
58
+ "description": "The query to search for in the vector store."
59
+ },
60
+ "n_results": {
61
+ "type": "integer",
62
+ "description": "The number of results to return. Default is 1."
63
  }
64
  },
65
  "required": [
66
+ "query"
67
  ]
68
  }
69
  }
 
106
  }
107
  }
108
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  {
110
  "type": "function",
111
  "function": {
 
152
  "type": "function",
153
  "function": {
154
  "name": "execute_code",
155
+ "description": "Executes Python code from a file and returns the final result.",
156
  "parameters": {
157
  "type": "object",
158
  "properties": {
159
  "file_path": {
160
  "type": "string",
161
  "description": "The path to the file containing the Python code to execute."
 
 
 
 
162
  }
163
  },
164
+ "required": [
165
+ "file_path"
166
+ ]
167
  }
168
  }
169
  },
 
231
  ]
232
  }
233
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  }
235
  ]