pedutronix commited on
Commit
9b916a5
·
verified ·
1 Parent(s): c86b5e6

Upload 3 files

Browse files
Files changed (3) hide show
  1. graph.py +190 -0
  2. system_prompts.py +49 -0
  3. tools.py +350 -0
graph.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from system_prompts import SYSTEM_PROMPT_ATTACH_FILENAME, SYSTEM_PROMPT_AGGREGATOR, SYSTEM_PROMPT_ORQ
2
+
3
+ from pydantic import BaseModel, Field
4
+ from pydantic import ValidationError
5
+
6
+ from langgraph.types import Command
7
+ from langgraph.graph import StateGraph, MessagesState, START, END
8
+ from langchain_core.messages import ToolMessage, AIMessage, HumanMessage
9
+ from langchain_google_vertexai import ChatVertexAI
10
+ from langchain_anthropic import ChatAnthropic
11
+ from langgraph.prebuilt import ToolNode
12
+
13
+ from typing import Literal, Optional
14
+ import time
15
+
16
+ from tools import download_youtube_video, get_tools
17
+
18
+ llm_pro = ChatVertexAI(model="gemini-2.5-pro")
19
+ llm_claude = ChatAnthropic(model='claude-3-5-sonnet-latest', max_retries=6)
20
+ llm_tools = llm_claude.bind_tools(get_tools())
21
+
22
+ class TaskState(MessagesState): # inherits the standard “messages” list
23
+ check_final_answer: bool | None
24
+ path_filename: str | None
25
+ gcp_path: str | None
26
+ final_answer: str | None
27
+ explanation: str | None
28
+
29
+ class RouterFilename(BaseModel):
30
+ is_filename_attached: bool = Field(..., description="Whether or not there is a file or link associated with data to be analysed at the user's request.")
31
+ data_type: Literal["code", "data", "youtube", "audio", "image", "none"] = Field(..., description="Type of file attached to the task")
32
+ youtube_url: Optional[str] = Field(
33
+ default=None,
34
+ description="Youtube URL attached to the user's order, if any."
35
+ )
36
+
37
+ class Answer(BaseModel):
38
+ final_answer: Optional[str] = Field(
39
+ default=None,
40
+ description="Final response for the user"
41
+ )
42
+
43
+ explanation: Optional[str] = Field(
44
+ default=None,
45
+ description="Explanation of the final response"
46
+ )
47
+
48
+ def attach_data(state: TaskState) -> dict:
49
+ messages = [
50
+ {"role": "system",
51
+ "content": SYSTEM_PROMPT_ATTACH_FILENAME}
52
+ ] + state["messages"]
53
+
54
+ generator = llm_pro.with_structured_output(RouterFilename)
55
+
56
+ for _ in range(3): # 3 reintentos lógicos
57
+ try:
58
+ router_decision = generator.invoke(messages)
59
+ if router_decision is not None:
60
+ break
61
+ except ValidationError as err:
62
+ messages.append({"role": "system", "content":
63
+ "This JSON is not valid! Please, try again."})
64
+ time.sleep(2.0)
65
+ else:
66
+ raise RuntimeError("Gemini didn't get the structured output.")
67
+
68
+ print(f"Router filename decision: {router_decision}")
69
+ if router_decision.is_filename_attached:
70
+ filename_type = router_decision.data_type
71
+ if filename_type in ("code", "data"):
72
+ path_filename = state["path_filename"]
73
+ if filename_type == 'code':
74
+ with open(state["path_filename"], "r", encoding="utf-8") as f:
75
+ code = f.read()
76
+
77
+ response = f"Code:\n```python\n{code}\n```"
78
+ else:
79
+ response = f"Path of the attached file: {path_filename}"
80
+
81
+ elif filename_type == 'youtube':
82
+ _, gcp_path = download_youtube_video(router_decision.youtube_url, "video")
83
+ response = f"video GCP uri: {gcp_path}"
84
+
85
+ elif filename_type == 'audio':
86
+ gcp_path = state["gcp_path"]
87
+ response = f"audio GCP uri: {gcp_path}"
88
+
89
+ else:
90
+ gcp_path = state["gcp_path"]
91
+ response = f"image GCP uri: {gcp_path}"
92
+
93
+
94
+ #pdb.set_trace()
95
+ return {"messages": state["messages"] + [response]}
96
+
97
+ return {}
98
+
99
+ def manager(state: TaskState) -> dict:
100
+ messages = [
101
+ {"role": "system",
102
+ "content": SYSTEM_PROMPT_ORQ}
103
+ ] + state["messages"]
104
+
105
+ response = llm_tools.invoke(messages)
106
+ print(f"LLM ORQ response: {response}")
107
+
108
+ #suponemos que esto tiene que ser la respuesta final
109
+ if not response.tool_calls and "FINAL_ANSER" in response.content:
110
+ return {"messages": state["messages"] + [response], "check_final_anser": True}
111
+
112
+ return {"messages": state["messages"] + [response]}
113
+
114
+ def next_node_router(state: TaskState) -> Literal[
115
+ "tool_node", "aggregator"
116
+ ]:
117
+ if state["check_final_answer"]:
118
+ return "aggregator"
119
+
120
+ # Inspeccionamos el último mensaje del historial
121
+ last_message = state["messages"][-1]
122
+ if isinstance(last_message, AIMessage) and last_message.tool_calls:
123
+ return "tool_node"
124
+
125
+ return "aggregator"
126
+
127
+ def aggregator(state: TaskState) -> dict:
128
+ task = state["messages"][0].content
129
+ last_model_answer = state["messages"][-1].content
130
+
131
+ content = f"""
132
+ Task: {task}
133
+ {last_model_answer}
134
+ """
135
+ message_last = HumanMessage(content=content)
136
+
137
+ messages = [
138
+ {"role": "system",
139
+ "content": SYSTEM_PROMPT_AGGREGATOR}
140
+ ] + [message_last]
141
+
142
+ generator = llm_pro.with_structured_output(Answer)
143
+
144
+ for _ in range(3): # 3 reintentos lógicos
145
+ try:
146
+ response = generator.invoke(messages)
147
+ if response is not None: # lista no vacía
148
+ break
149
+ except ValidationError as err:
150
+ messages.append({"role": "system", "content":
151
+ "This JSON is not valid! Please, try again."})
152
+ time.sleep(2.0)
153
+ else:
154
+ raise RuntimeError("Gemini didn't get the structured output.")
155
+
156
+ return {"final_answer": response.final_answer, "explanation": response.explanation}
157
+
158
+
159
+ def generate_graph():
160
+ tool_node = ToolNode(get_tools())
161
+
162
+ builder = StateGraph(TaskState)
163
+
164
+ # Añadimos todos los nodos, incluyendo el nuevo tool_node
165
+ builder.add_node("attach_data", attach_data)
166
+ builder.add_node("manager", manager)
167
+ builder.add_node("tool_node", tool_node) # NUEVO
168
+ builder.add_node("aggregator", aggregator)
169
+
170
+ # El manager es el punto de partida
171
+ builder.add_edge(START, "attach_data")
172
+ builder.add_edge("attach_data", "manager")
173
+
174
+ # Después de ejecutar una herramienta, vuelve al manager con el resultado
175
+ builder.add_edge("tool_node", "manager")
176
+
177
+ # El manager ahora usa un enrutador condicional para decidir el siguiente gran paso
178
+ builder.add_conditional_edges(
179
+ "manager",
180
+ next_node_router,
181
+ # El mapeo ahora es más simple gracias a la lógica en next_node_router
182
+ {
183
+ "tool_node": "tool_node",
184
+ "aggregator": "aggregator"
185
+ }
186
+ )
187
+
188
+ graph = builder.compile()
189
+
190
+ return graph
system_prompts.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SYSTEM_PROMPT_ATTACH_FILENAME = """
2
+ You are an expert problem-solving agent of all kinds.
3
+ You are going to receive a task from a user and you have to decide whether he has asked you to analyse the attached data.
4
+ The data can be a file name to download or a link to a web page to download the data.
5
+
6
+ RESPONSE FORMAT
7
+
8
+ Return a JSON format.
9
+ If no data is attached, the field data_type=“none”. If data_type is not “none”, the field “is_filename_attached”=True.
10
+ """
11
+
12
+ SYSTEM_PROMPT_VIDEO = """
13
+ You are an expert analyser of videos that you will be asked specific questions about.
14
+ You have to always answer the questions with an answer as we have to solve a quiz and ambiguous answers are not accepted.
15
+ """
16
+
17
+ SYSTEM_PROMPT_AUDIO = """
18
+ You are an expert analyser of audios that you will be asked specific questions about.
19
+ You have to always answer the questions with an answer as we have to solve a quiz and ambiguous answers are not accepted.
20
+ """
21
+
22
+ SYSTEM_PROMPT_IMAGE = """
23
+ You are an expert analyser of images that you will be asked specific questions about.
24
+ You have to always answer the questions with an answer as we have to solve a quiz and ambiguous answers are not accepted.
25
+ """
26
+
27
+ SYSTEM_PROMPT_ORQ = """
28
+ Decide step by step how to solve the user's question using the following tools if necessary:
29
+
30
+ • PythonREPL – Run Python code.
31
+ • web_search – Search the web using google search.
32
+ • visit_webpage – Visits a webpage at the given url and reads its content as a markdown string.
33
+ • wikipedia_search – Query Wikipedia.
34
+ • query_video – Analyse the video and answer your query.
35
+ • query_audio – Analyse the audio and answer your query.
36
+ • query_image – Analyse the image and answer your query.
37
+
38
+ If you are trying to analyse a very complicated picture such as the solution to a board game, it is best to try to transfer that position into code using an engine to confirm your thoughts by making those moves that you think are winning.
39
+
40
+ When you have reached the final answer, respond with:
41
+ FINAL ANSWER: {final answer}
42
+ EXPLANATION: {explanation}
43
+ """
44
+
45
+ SYSTEM_PROMPT_AGGREGATOR = """
46
+ You are an assistant who takes the final answer to a user's question and has to extract:
47
+ - Final answer: should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
48
+ - Explanation: that is understandable and coherent.
49
+ """
tools.py ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+
4
+ import mimetypes
5
+ from google.cloud import storage
6
+ from typing import Literal
7
+ import requests
8
+ import re
9
+ from markdownify import markdownify
10
+ from requests.exceptions import RequestException
11
+ from langchain_core.tools import convert_runnable_to_tool
12
+ from smolagents.utils import truncate_content
13
+ from langchain_core.runnables import RunnableLambda
14
+
15
+ from pytubefix import YouTube
16
+ from pytubefix.cli import on_progress
17
+
18
+ from langchain_core.tools import tool
19
+ from langchain_core.prompts import ChatPromptTemplate
20
+ from langchain_core.output_parsers import StrOutputParser
21
+ from langchain_google_vertexai import ChatVertexAI
22
+ from langchain.agents import Tool
23
+ from langchain_experimental.tools import PythonREPLTool
24
+ from langchain_community.tools import WikipediaQueryRun
25
+ from langchain_community.utilities import WikipediaAPIWrapper
26
+ from langchain_community.utilities import GoogleSerperAPIWrapper
27
+
28
+ from system_prompts import SYSTEM_PROMPT_VIDEO, SYSTEM_PROMPT_AUDIO, SYSTEM_PROMPT_IMAGE
29
+
30
+ llm_flash = ChatVertexAI(model="gemini-2.5-flash")
31
+
32
+ # Extensiones que queremos “normalizar” (por si el sistema no las trae de serie)
33
+ _EXTRA_MIME = {
34
+ ".mp3": "audio/mpeg", # RFC oficial :contentReference[oaicite:2]{index=2}
35
+ ".mp4": "video/mp4", # MIME estándar :contentReference[oaicite:3]{index=3}
36
+ }
37
+ mimetypes.add_type("audio/mpeg", ".mp3")
38
+ mimetypes.add_type("video/mp4", ".mp4")
39
+
40
+ def upload_file_to_bucket(
41
+ local_path: str,
42
+ bucket_name: str = os.getenv("GCP_BUCKET_NAME"),
43
+ ) -> str:
44
+ """
45
+ Sube cualquier fichero a Cloud Storage y devuelve su URI gs://.
46
+ • Detecta automáticamente el MIME según la extensión.
47
+ • Admite sobrescribir `object_name` para cambiar la ruta en el bucket.
48
+ • Aplica precondición `if_generation_match=0` (subida segura: falla si ya existe).
49
+ """
50
+ if not os.path.isfile(local_path):
51
+ raise FileNotFoundError(f"No existe: {local_path}")
52
+
53
+ # ---------- (1) Resolver nombre y extensión ----------
54
+ _, ext = os.path.splitext(local_path) # :contentReference[oaicite:4]{index=4}
55
+ ext = ext.lower()
56
+ object_name = f"data{ext}"
57
+
58
+ # ---------- (2) Resolver MIME ----------
59
+ file_type, _ = mimetypes.guess_type(local_path) # intenta inferir MIME
60
+ if not file_type and ext in _EXTRA_MIME: # fallback manual
61
+ file_type = _EXTRA_MIME[ext]
62
+ if not file_type:
63
+ raise ValueError(f"No se pudo inferir MIME para «{ext}»")
64
+
65
+ # ---------- (3) Subir a GCS ----------
66
+ client = storage.Client()
67
+ bucket = client.bucket(bucket_name)
68
+ blob = bucket.blob(object_name)
69
+
70
+ blob.upload_from_filename(
71
+ local_path,
72
+ content_type=file_type,
73
+ )
74
+
75
+ gs_uri = f"gs://{bucket_name}/{object_name}"
76
+ print(f"✅ Subido → {gs_uri} ({file_type})")
77
+ return gs_uri
78
+
79
+
80
+ def download_youtube_video(url: str, mode: Literal["video", "audio"]) -> str:
81
+ """
82
+ Downloads a YouTube video or audio file based on the specified mode.
83
+
84
+ Args:
85
+ url (str): The URL of the YouTube video to download.
86
+ mode (Literal["audio", "video"]): The download mode. Use "audio" to download the audio track as an .mp3 file,
87
+ or "video" to download the full video as an .mp4 file.
88
+
89
+ Returns:
90
+ Tuple[str, str]:
91
+ A two-element tuple *(local_path, gcp_path)* where
92
+
93
+ * **local_path** is the absolute path of the file saved on disk.
94
+ * **gcp_path** is the `gs://bucket/object` URI (or signed HTTPS
95
+ URL) of the file uploaded to Google Cloud Storage.
96
+
97
+ Raises:
98
+ ValueError: If the mode is not "audio" or "video".
99
+ Exception: If an error occurs during the download process.
100
+ """
101
+ if mode not in ["audio", "video"]:
102
+ raise ValueError("'Mode' argument is not valid! It should be audio or video.")
103
+
104
+ data_folder = "data/"
105
+ yt = YouTube(url, on_progress_callback=on_progress)
106
+
107
+ if mode == "video":
108
+ ys = yt.streams.get_highest_resolution()
109
+ tmp_path = ys.download(output_path=data_folder)
110
+ base, _ = os.path.splitext(tmp_path)
111
+ mp4_path = f"{base}.mp4"
112
+
113
+ mp4_files = [
114
+ f for f in os.listdir(data_folder)
115
+ if f.lower().endswith(".mp4")
116
+ ]
117
+
118
+ path_filename = mp4_path
119
+ uri_path = upload_file_to_bucket(path_filename)
120
+
121
+ elif mode == "audio":
122
+ audio = yt.streams.filter(only_audio=True).first() # best available audio
123
+ tmp_path = audio.download(output_path=data_folder) # e.g. .../myvideo.m4a
124
+ base, _ = os.path.splitext(tmp_path)
125
+ mp3_path = f"{base}.mp3"
126
+
127
+ # Convert with FFmpeg
128
+ subprocess.run(
129
+ [
130
+ "ffmpeg", "-y", # overwrite if exists
131
+ "-i", tmp_path, # input
132
+ "-vn", # no video
133
+ "-ar", "44100", # sample-rate
134
+ "-ab", "192k", # audio bitrate
135
+ "-loglevel", "error", # silence ffmpeg output
136
+ mp3_path,
137
+ ],
138
+ check=True,
139
+ )
140
+
141
+ os.remove(tmp_path) # keep filesystem limpio (opcional)
142
+ path_filename = os.path.abspath(mp3_path)
143
+ uri_path = upload_file_to_bucket(path_filename)
144
+
145
+ return path_filename, uri_path
146
+
147
+ @tool
148
+ def query_video(gcp_uri: str, query: str) -> str:
149
+ """Analyzes a video file from a Google Cloud Storage (GCS) URI to answer a specific question about its visual content.
150
+
151
+ This tool is the correct choice for any task that requires understanding or describing
152
+ events, objects, or actions within a video. The video must be accessible via a GCS URI.
153
+
154
+ Args:
155
+ gcp_uri (str): The full Google Cloud Storage URI for the video file.
156
+ It MUST be a .mp4 file and the URI MUST start with 'gs://'.
157
+ query (str): A clear, specific question about the video's content.
158
+ For example: 'What is the maximum number of birds on screen at the same time?'
159
+ or 'What color is the car that appears at the 15-second mark?'.
160
+
161
+ Returns:
162
+ str: A string containing the answer to the query based on the video analysis.
163
+ """
164
+ # Tu código de validación y ejecución de la cadena
165
+ _, file_extension = os.path.splitext(gcp_uri)
166
+ if file_extension.lower() != '.mp4':
167
+ return "Error: The video cannot be processed because it is not a .mp4 file. The gcp_uri must point to a .mp4 file."
168
+
169
+ # He notado que en tu `chain.invoke` usas "video_uri" pero el ChatPromptTemplate usa "{video_uri}".
170
+ # Sin embargo, tu función no tiene un parámetro `video_uri`. Debería ser `gcp_uri`. Lo corrijo aquí.
171
+ chat_prompt = ChatPromptTemplate.from_messages([
172
+ ("system", SYSTEM_PROMPT_VIDEO),
173
+ ("human", [
174
+ "{query}",
175
+ {
176
+ "type": "media",
177
+ "file_uri": "{video_uri}", # <-- Esta clave debe coincidir con la de invoke
178
+ "mime_type": "video/mp4"
179
+ }
180
+ ]),
181
+ ])
182
+
183
+ # Suponiendo que `llm_flash` está definido
184
+ chain = chat_prompt | llm_flash | StrOutputParser()
185
+
186
+ # La clave en invoke debe coincidir con la del prompt template: "video_uri"
187
+ result = chain.invoke({
188
+ "query": query,
189
+ "video_uri": gcp_uri # <-- Usar la clave correcta aquí
190
+ })
191
+
192
+ return result
193
+
194
+ @tool
195
+ def query_audio(gcp_uri: str, query: str) -> str:
196
+ """Analyzes an audio file from a Google Cloud Storage (GCS) URI to answer a specific question about its content.
197
+
198
+ This tool is ideal for tasks like transcription, speaker identification, sound analysis,
199
+ or answering questions about speech or music within an audio file.
200
+
201
+ Args:
202
+ gcp_uri (str): The full Google Cloud Storage URI for the audio file.
203
+ It MUST be a .mp3 file and the URI MUST start with 'gs://'.
204
+ query (str): A clear, specific question about the audio's content.
205
+ For example: 'Transcribe the speech in this audio,' 'Is the speaker male or female?'
206
+ or 'What song is playing in the background?'.
207
+
208
+ Returns:
209
+ str: A string containing the answer to the query based on the audio analysis.
210
+ """
211
+ # Código de validación y ejecución
212
+ _, file_extension = os.path.splitext(gcp_uri)
213
+ if file_extension.lower() != '.mp3':
214
+ return "Error: The audio cannot be processed because it is not a .mp3 file. The gcp_uri must point to a .mp3 file."
215
+
216
+ chat_prompt = ChatPromptTemplate.from_messages([
217
+ ("system", SYSTEM_PROMPT_AUDIO),
218
+ ("human", [
219
+ "{query}",
220
+ {
221
+ "type": "media",
222
+ "file_uri": "{audio_uri}",
223
+ "mime_type": "audio/mpeg"
224
+ }
225
+ ]),
226
+ ])
227
+
228
+ # Suponiendo que `llm_flash` está definido
229
+ chain = chat_prompt | llm_flash | StrOutputParser()
230
+
231
+ result = chain.invoke({
232
+ "query": query,
233
+ "audio_uri": gcp_uri
234
+ })
235
+
236
+ return result
237
+
238
+ @tool
239
+ def query_image(gcp_uri: str, query: str) -> str:
240
+ """Analyzes an image file from a Google Cloud Storage (GCS) URI to answer a question about its visual content.
241
+
242
+ This tool is ideal for tasks like reading text from an image (OCR), identifying objects,
243
+ describing a scene, or answering any question based on the visual information in a static image.
244
+
245
+ Args:
246
+ gcp_uri (str): The full Google Cloud Storage URI for the image file.
247
+ It MUST be a .png file and the URI MUST start with 'gs://'.
248
+ query (str): A clear, specific question about the image's content.
249
+ For example: 'What text is written on the street sign?',
250
+ 'How many people are in this picture?', or 'Describe the main activity in this image.'
251
+
252
+ Returns:
253
+ str: A string containing the answer to the query based on the image's content.
254
+ """
255
+ # Código de validación y ejecución
256
+ _, file_extension = os.path.splitext(gcp_uri)
257
+ if file_extension.lower() != '.png':
258
+ return "Error: The image cannot be processed because it is not a .png file. The gcp_uri must point to a .png file."
259
+
260
+ # Corregido: 'hat_prompt' a 'chat_prompt'
261
+ chat_prompt = ChatPromptTemplate.from_messages([
262
+ ("system", SYSTEM_PROMPT_IMAGE),
263
+ ("human", [
264
+ "{query}",
265
+ {
266
+ "type": "image_url",
267
+ "image_url": {"url": "{gcp_uri}"} # Formato estándar para image_url
268
+ }
269
+ ]),
270
+ ])
271
+
272
+ # Suponiendo que `llm_flash` está definido
273
+ chain = chat_prompt | llm_flash | StrOutputParser()
274
+
275
+ result = chain.invoke({
276
+ "query": query,
277
+ "gcp_uri": gcp_uri
278
+ })
279
+
280
+ return result
281
+
282
+ def visit_webpage(url: str) -> str:
283
+ try:
284
+ # Send a GET request to the URL with a 20-second timeout
285
+ response = requests.get(url, timeout=20)
286
+ response.raise_for_status() # Raise an exception for bad status codes
287
+
288
+ # Convert the HTML content to Markdown
289
+ markdown_content = markdownify(response.text).strip()
290
+
291
+ # Remove multiple line breaks
292
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
293
+
294
+ return truncate_content(markdown_content, 10000)
295
+
296
+ except requests.exceptions.Timeout:
297
+ return "The request timed out. Please try again later or check the URL."
298
+ except RequestException as e:
299
+ return f"Error fetching the webpage: {str(e)}"
300
+ except Exception as e:
301
+ return f"An unexpected error occurred: {str(e)}"
302
+
303
+ visit_webpage_with_retry = RunnableLambda(visit_webpage).with_retry(
304
+ wait_exponential_jitter=True,
305
+ stop_after_attempt=3,
306
+ )
307
+
308
+ visit_webpage_tool = convert_runnable_to_tool(
309
+ visit_webpage_with_retry,
310
+ name="visit_webpage",
311
+ description=(
312
+ "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
313
+ ),
314
+ arg_types={"url": "str"},
315
+ )
316
+
317
+ python_tool = PythonREPLTool()
318
+
319
+ search = GoogleSerperAPIWrapper()
320
+ search_tool = Tool(name="web_search", func=search.run, description="useful for when you need to ask with search on the internet")
321
+
322
+ wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
323
+ wikipedia_tool = Tool(name="wikipedia_search", func=wikipedia.run, description="useful for when you need to ask with search on Wikipedia")
324
+
325
+ def get_tools():
326
+ visit_webpage_with_retry = RunnableLambda(visit_webpage).with_retry(
327
+ wait_exponential_jitter=True,
328
+ stop_after_attempt=3,
329
+ )
330
+
331
+ visit_webpage_tool = convert_runnable_to_tool(
332
+ visit_webpage_with_retry,
333
+ name="visit_webpage",
334
+ description=(
335
+ "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
336
+ ),
337
+ arg_types={"url": "str"},
338
+ )
339
+
340
+ python_tool = PythonREPLTool()
341
+
342
+ search = GoogleSerperAPIWrapper()
343
+ search_tool = Tool(name="web_search", func=search.run, description="useful for when you need to ask with search on the internet")
344
+
345
+ wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
346
+ wikipedia_tool = Tool(name="wikipedia_search", func=wikipedia.run, description="useful for when you need to ask with search on Wikipedia")
347
+
348
+ tools = [python_tool, search_tool, wikipedia_tool, visit_webpage_tool, query_video, query_image, query_audio]
349
+
350
+ return tools