josondev commited on
Commit
58a708e
·
verified ·
1 Parent(s): 14a0e2b

Update veryfinal.py

Browse files
Files changed (1) hide show
  1. veryfinal.py +407 -368
veryfinal.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- Open-Source Multi-LLM Agent System
3
- Uses only free and open-source models - no paid APIs required
4
  """
5
 
6
  import os
@@ -9,27 +9,35 @@ import random
9
  import operator
10
  from typing import List, Dict, Any, TypedDict, Annotated, Optional
11
  from dotenv import load_dotenv
 
 
12
 
13
- # Core LangChain imports
14
  from langchain_core.tools import tool
15
- from langchain_community.tools.tavily_search import TavilySearchResults
16
- from langchain_community.document_loaders import WikipediaLoader
17
  from langgraph.graph import StateGraph, END
18
  from langgraph.checkpoint.memory import MemorySaver
19
- from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
20
 
21
- # Open-source model integrations
22
- from langchain_groq import ChatGroq # Free tier available
23
- from langchain_community.llms import Ollama
24
- from langchain_community.chat_models import ChatOllama
25
-
26
- # Hugging Face integration for open-source models
27
  try:
28
- from langchain_huggingface import HuggingFacePipeline
29
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
30
- HF_AVAILABLE = True
 
 
 
 
 
 
 
 
 
 
 
 
31
  except ImportError:
32
- HF_AVAILABLE = False
 
33
 
34
  # Vector database imports
35
  import faiss
@@ -39,171 +47,109 @@ import json
39
 
40
  load_dotenv()
41
 
42
- # Enhanced system prompt
43
- ENHANCED_SYSTEM_PROMPT = (
44
- "You are a helpful assistant tasked with answering questions using available tools. "
45
- "You must provide accurate, comprehensive answers based on available information. "
46
- "When answering questions, follow these guidelines:\n"
47
- "1. Use available tools to gather information when needed\n"
48
- "2. Provide precise, factual answers\n"
49
- "3. For numbers: don't use commas or units unless specified\n"
50
- "4. For strings: don't use articles or abbreviations, write digits in plain text\n"
51
- "5. For lists: apply above rules based on element type\n"
52
- "6. Always end with 'FINAL ANSWER: [YOUR ANSWER]'\n"
53
- "7. Be concise but thorough in your reasoning\n"
54
- "8. If you cannot find the answer, state that clearly"
55
- )
56
-
57
- # ---- Tool Definitions ----
58
- @tool
59
- def multiply(a: int, b: int) -> int:
60
- """Multiply two integers and return the product."""
61
- return a * b
62
 
63
- @tool
64
- def add(a: int, b: int) -> int:
65
- """Add two integers and return the sum."""
66
- return a + b
 
67
 
68
- @tool
69
- def subtract(a: int, b: int) -> int:
70
- """Subtract the second integer from the first and return the difference."""
71
- return a - b
 
 
 
 
 
 
72
 
73
- @tool
74
- def divide(a: int, b: int) -> float:
75
- """Divide the first integer by the second and return the quotient."""
76
- if b == 0:
77
- raise ValueError("Cannot divide by zero.")
78
- return a / b
79
-
80
- @tool
81
- def modulus(a: int, b: int) -> int:
82
- """Return the remainder when dividing the first integer by the second."""
83
- return a % b
84
-
85
- @tool
86
- def optimized_web_search(query: str) -> str:
87
- """Perform web search using free DuckDuckGo (fallback if Tavily not available)."""
88
- try:
89
- # Try Tavily first (free tier)
90
- if os.getenv("TAVILY_API_KEY"):
91
- time.sleep(random.uniform(0.7, 1.5))
92
- search_tool = TavilySearchResults(max_results=3)
93
- docs = search_tool.invoke({"query": query})
94
- return "\n\n---\n\n".join(
95
- f"<Doc url='{d.get('url','')}'>{d.get('content','')[:800]}</Doc>"
96
- for d in docs
97
- )
98
- else:
99
- # Fallback to DuckDuckGo (completely free)
100
- try:
101
- from duckduckgo_search import DDGS
102
- with DDGS() as ddgs:
103
- results = list(ddgs.text(query, max_results=3))
104
- return "\n\n---\n\n".join(
105
- f"<Doc url='{r.get('href','')}'>{r.get('body','')[:800]}</Doc>"
106
- for r in results
107
- )
108
- except ImportError:
109
- return "Web search not available - install duckduckgo-search for free web search"
110
- except Exception as e:
111
- return f"Web search failed: {e}"
112
-
113
- @tool
114
- def optimized_wiki_search(query: str) -> str:
115
- """Perform Wikipedia search - completely free."""
116
- try:
117
- time.sleep(random.uniform(0.3, 1))
118
- docs = WikipediaLoader(query=query, load_max_docs=2).load()
119
- return "\n\n---\n\n".join(
120
- f"<Doc src='{d.metadata.get('source','Wikipedia')}'>{d.page_content[:1000]}</Doc>"
121
- for d in docs
122
- )
123
- except Exception as e:
124
- return f"Wikipedia search failed: {e}"
125
-
126
- # ---- Open-Source Model Manager ----
127
- class OpenSourceModelManager:
128
- """Manages only open-source and free models"""
129
 
130
  def __init__(self):
131
  self.available_models = {}
132
- self._initialize_models()
133
 
134
- def _initialize_models(self):
135
- """Initialize only open-source models"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- # 1. Groq (Free tier with open-source models)
138
  if os.getenv("GROQ_API_KEY"):
139
  try:
140
- self.available_models['groq_llama3_70b'] = ChatGroq(
141
- model="llama3-70b-8192",
142
- temperature=0,
143
- api_key=os.getenv("GROQ_API_KEY")
144
- )
145
- self.available_models['groq_llama3_8b'] = ChatGroq(
146
- model="llama3-8b-8192",
147
- temperature=0,
148
- api_key=os.getenv("GROQ_API_KEY")
149
- )
150
- self.available_models['groq_mixtral'] = ChatGroq(
151
- model="mixtral-8x7b-32768",
152
- temperature=0,
153
- api_key=os.getenv("GROQ_API_KEY")
154
- )
155
- self.available_models['groq_gemma'] = ChatGroq(
156
- model="gemma-7b-it",
157
- temperature=0,
158
- api_key=os.getenv("GROQ_API_KEY")
159
- )
160
- print("Groq models initialized (free tier)")
161
  except Exception as e:
162
  print(f"Groq models not available: {e}")
163
 
164
- # 2. Ollama (Completely free local models)
165
  try:
166
- # Test if Ollama is running
167
- test_model = ChatOllama(model="llama3", base_url="http://localhost:11434")
168
- # If no error, add Ollama models
169
- self.available_models['ollama_llama3'] = ChatOllama(model="llama3")
170
- self.available_models['ollama_llama3_70b'] = ChatOllama(model="llama3:70b")
171
- self.available_models['ollama_mistral'] = ChatOllama(model="mistral")
172
- self.available_models['ollama_phi3'] = ChatOllama(model="phi3")
173
- self.available_models['ollama_codellama'] = ChatOllama(model="codellama")
174
- self.available_models['ollama_gemma'] = ChatOllama(model="gemma")
175
- self.available_models['ollama_qwen'] = ChatOllama(model="qwen")
176
- print("Ollama models initialized (local)")
177
  except Exception as e:
178
- print(f"Ollama not available: {e}")
179
 
180
- # 3. Hugging Face Transformers (Completely free)
181
- if HF_AVAILABLE:
182
  try:
183
- # Small models that can run on CPU
184
- self.available_models['hf_gpt2'] = self._create_hf_model("gpt2")
185
- self.available_models['hf_distilgpt2'] = self._create_hf_model("distilgpt2")
186
- print("Hugging Face models initialized (local)")
 
187
  except Exception as e:
188
- print(f"Hugging Face models not available: {e}")
189
 
190
- print(f"Total available open-source models: {len(self.available_models)}")
191
-
192
- def _create_hf_model(self, model_name: str):
193
- """Create Hugging Face pipeline model"""
 
 
 
 
 
 
194
  try:
195
- pipe = pipeline(
196
- "text-generation",
197
- model=model_name,
198
- max_length=512,
199
- do_sample=True,
200
- temperature=0.7,
201
- pad_token_id=50256
202
- )
203
- return HuggingFacePipeline(pipeline=pipe)
204
  except Exception as e:
205
- print(f"Failed to create HF model {model_name}: {e}")
206
- return None
 
207
 
208
  def get_model(self, model_name: str):
209
  """Get a specific model by name"""
@@ -216,20 +162,26 @@ class OpenSourceModelManager:
216
  def get_best_model_for_task(self, task_type: str):
217
  """Get the best available model for a specific task type"""
218
  if task_type == "reasoning":
219
- # Prefer larger models for reasoning
220
- for model_name in ['groq_llama3_70b', 'ollama_llama3_70b', 'groq_mixtral', 'ollama_llama3']:
221
  if model_name in self.available_models:
222
  return self.available_models[model_name]
223
 
224
  elif task_type == "coding":
225
  # Prefer code-specialized models
226
- for model_name in ['ollama_codellama', 'groq_llama3_70b', 'ollama_llama3']:
227
  if model_name in self.available_models:
228
  return self.available_models[model_name]
229
 
230
  elif task_type == "fast":
231
  # Prefer fast, smaller models
232
- for model_name in ['groq_llama3_8b', 'groq_gemma', 'ollama_phi3', 'hf_distilgpt2']:
 
 
 
 
 
 
233
  if model_name in self.available_models:
234
  return self.available_models[model_name]
235
 
@@ -238,246 +190,333 @@ class OpenSourceModelManager:
238
  return list(self.available_models.values())[0]
239
  return None
240
 
241
- # ---- Enhanced Agent State ----
242
- class EnhancedAgentState(TypedDict):
243
- """State structure for the enhanced multi-LLM agent system."""
244
- messages: Annotated[List[HumanMessage | AIMessage], operator.add]
245
- query: str
246
- agent_type: str
247
- final_answer: str
248
- perf: Dict[str, Any]
249
- tools_used: List[str]
250
- reasoning: str
251
- model_used: str
252
-
253
- # ---- Open-Source Multi-LLM System ----
254
- class OpenSourceMultiLLMSystem:
255
- """
256
- Multi-LLM system using only open-source and free models
257
- """
258
 
259
  def __init__(self):
260
- self.model_manager = OpenSourceModelManager()
261
- self.tools = [
262
- multiply, add, subtract, divide, modulus,
263
- optimized_web_search, optimized_wiki_search
264
- ]
265
- self.graph = self._build_graph()
266
 
267
- def _build_graph(self) -> StateGraph:
268
- """Build the LangGraph state machine with open-source models."""
 
 
 
269
 
270
- def router(st: EnhancedAgentState) -> EnhancedAgentState:
271
- """Route queries to appropriate model based on complexity and content analysis."""
272
- q = st["query"].lower()
273
-
274
- # Enhanced routing logic
275
- if any(keyword in q for keyword in ["calculate", "compute", "math", "multiply", "add", "subtract", "divide"]):
276
- model_type = "reasoning"
277
- agent_type = "math"
278
- elif any(keyword in q for keyword in ["search", "find", "lookup", "wikipedia", "information about"]):
279
- model_type = "fast"
280
- agent_type = "search_enhanced"
281
- elif any(keyword in q for keyword in ["code", "programming", "function", "algorithm"]):
282
- model_type = "coding"
283
- agent_type = "coding"
284
- elif len(q.split()) > 20: # Complex queries
285
- model_type = "reasoning"
286
- agent_type = "complex"
287
- else:
288
- model_type = "fast"
289
- agent_type = "simple"
290
-
291
- # Get the best model for this task
292
- selected_model = self.model_manager.get_best_model_for_task(model_type)
293
- model_name = "unknown"
294
- for name, model in self.model_manager.available_models.items():
295
- if model == selected_model:
296
- model_name = name
297
- break
298
-
299
- return {**st, "agent_type": agent_type, "tools_used": [], "reasoning": "", "model_used": model_name}
300
-
301
- def math_node(st: EnhancedAgentState) -> EnhancedAgentState:
302
- """Process mathematical queries."""
303
- return self._process_with_model(st, "reasoning", "Mathematical calculation using open-source model")
304
-
305
- def search_enhanced_node(st: EnhancedAgentState) -> EnhancedAgentState:
306
- """Process query with search enhancement."""
307
- t0 = time.time()
308
- tools_used = []
309
-
310
- try:
311
- # Determine search strategy
312
- query = st["query"]
313
- search_results = ""
314
 
315
- if any(keyword in query.lower() for keyword in ["wikipedia", "wiki"]):
316
- search_results = optimized_wiki_search.invoke({"query": query})
317
- tools_used.append("wikipedia_search")
318
- else:
319
- search_results = optimized_web_search.invoke({"query": query})
320
- tools_used.append("web_search")
321
-
322
- enhanced_query = f"""
323
- Original Question: {query}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
- Search Results:
326
- {search_results}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
 
328
- Based on the search results above, provide a direct answer to the original question.
329
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
- # Use fast model for search-enhanced queries
332
- model = self.model_manager.get_best_model_for_task("fast")
333
- if model:
334
- sys = SystemMessage(content=ENHANCED_SYSTEM_PROMPT)
335
- res = model.invoke([sys, HumanMessage(content=enhanced_query)])
336
-
337
- answer = res.content.strip() if hasattr(res, 'content') else str(res).strip()
338
- if "FINAL ANSWER:" in answer:
339
- answer = answer.split("FINAL ANSWER:")[-1].strip()
340
-
341
- return {**st,
342
- "final_answer": answer,
343
- "tools_used": tools_used,
344
- "reasoning": "Used search enhancement with open-source model",
345
- "perf": {"time": time.time() - t0, "prov": "Search-Enhanced"}}
346
- else:
347
- return {**st, "final_answer": "No models available", "perf": {"error": "No models"}}
348
- except Exception as e:
349
- return {**st, "final_answer": f"Error: {e}", "perf": {"error": str(e)}}
350
-
351
- def coding_node(st: EnhancedAgentState) -> EnhancedAgentState:
352
- """Process coding-related queries."""
353
- return self._process_with_model(st, "coding", "Code generation using open-source model")
354
-
355
- def complex_node(st: EnhancedAgentState) -> EnhancedAgentState:
356
- """Process complex queries."""
357
- return self._process_with_model(st, "reasoning", "Complex reasoning using open-source model")
358
-
359
- def simple_node(st: EnhancedAgentState) -> EnhancedAgentState:
360
- """Process simple queries."""
361
- return self._process_with_model(st, "fast", "Simple query using fast open-source model")
362
-
363
- # Build graph
364
- g = StateGraph(EnhancedAgentState)
365
- g.add_node("router", router)
366
- g.add_node("math", math_node)
367
- g.add_node("search_enhanced", search_enhanced_node)
368
- g.add_node("coding", coding_node)
369
- g.add_node("complex", complex_node)
370
- g.add_node("simple", simple_node)
371
 
372
- g.set_entry_point("router")
373
- g.add_conditional_edges("router", lambda s: s["agent_type"], {
374
- "math": "math",
375
- "search_enhanced": "search_enhanced",
376
- "coding": "coding",
377
- "complex": "complex",
378
- "simple": "simple"
379
- })
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
- for node in ["math", "search_enhanced", "coding", "complex", "simple"]:
382
- g.add_edge(node, END)
383
-
384
- return g.compile(checkpointer=MemorySaver())
385
 
386
- def _process_with_model(self, st: EnhancedAgentState, model_type: str, reasoning: str) -> EnhancedAgentState:
387
- """Process query with specified model type"""
388
- t0 = time.time()
389
- try:
390
- model = self.model_manager.get_best_model_for_task(model_type)
391
- if not model:
392
- return {**st, "final_answer": "No suitable model available", "perf": {"error": "No model"}}
393
-
394
- enhanced_query = f"""
395
- Question: {st["query"]}
396
-
397
- Please provide a direct, accurate answer to this question.
398
- """
399
-
400
- sys = SystemMessage(content=ENHANCED_SYSTEM_PROMPT)
401
- res = model.invoke([sys, HumanMessage(content=enhanced_query)])
402
-
403
- answer = res.content.strip() if hasattr(res, 'content') else str(res).strip()
404
- if "FINAL ANSWER:" in answer:
405
- answer = answer.split("FINAL ANSWER:")[-1].strip()
406
-
407
- return {**st,
408
- "final_answer": answer,
409
- "reasoning": reasoning,
410
- "perf": {"time": time.time() - t0, "prov": f"OpenSource-{model_type}"}}
411
- except Exception as e:
412
- return {**st, "final_answer": f"Error: {e}", "perf": {"error": str(e)}}
413
-
414
- def process_query(self, q: str) -> str:
415
- """Process a query through the open-source multi-LLM system."""
416
- state = {
417
- "messages": [HumanMessage(content=q)],
418
- "query": q,
419
- "agent_type": "",
420
- "final_answer": "",
421
- "perf": {},
422
- "tools_used": [],
423
- "reasoning": "",
424
- "model_used": ""
425
- }
426
- cfg = {"configurable": {"thread_id": f"opensource_qa_{hash(q)}"}}
427
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  try:
429
- out = self.graph.invoke(state, cfg)
430
- answer = out.get("final_answer", "").strip()
431
 
432
- # Ensure we don't return the question as the answer
433
- if answer == q or answer.startswith(q):
434
- return "Information not available"
435
 
436
- return answer if answer else "No answer generated"
437
  except Exception as e:
438
- return f"Error processing query: {e}"
439
 
440
  def get_system_info(self) -> Dict[str, Any]:
441
- """Get information about available open-source models"""
 
 
 
 
 
 
 
 
 
442
  return {
443
  "available_models": self.model_manager.list_available_models(),
 
 
 
444
  "total_models": len(self.model_manager.available_models),
445
- "model_types": {
446
- "groq_free_tier": [m for m in self.model_manager.list_available_models() if m.startswith("groq_")],
447
- "ollama_local": [m for m in self.model_manager.list_available_models() if m.startswith("ollama_")],
448
- "huggingface_local": [m for m in self.model_manager.list_available_models() if m.startswith("hf_")]
449
- }
450
  }
451
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  # ---- Build Graph Function (for compatibility) ----
453
- def build_graph(provider: str = "opensource"):
454
- """Build graph using only open-source models"""
455
- return OpenSourceMultiLLMSystem().graph
 
456
 
457
  # ---- Main execution ----
458
  if __name__ == "__main__":
459
- # Initialize the open-source system
460
- system = OpenSourceMultiLLMSystem()
461
 
462
  # Print system information
463
  info = system.get_system_info()
464
- print("Open-Source System Information:")
465
- print(f"Total Models Available: {info['total_models']}")
466
- for category, models in info['model_types'].items():
467
- if models:
468
- print(f" {category}: {models}")
 
 
 
469
 
470
  # Test queries
471
  test_questions = [
472
- "What is 25 multiplied by 17?",
473
- "Find information about Mercedes Sosa albums between 2000-2009",
474
- "Write a simple Python function to calculate factorial",
475
- "Explain quantum computing in simple terms",
476
- "What is the capital of France?"
477
  ]
478
 
479
  print("\n" + "="*60)
480
- print("Testing Open-Source Multi-LLM System")
481
  print("="*60)
482
 
483
  for i, question in enumerate(test_questions, 1):
 
1
  """
2
+ Enhanced Agno Multi-LLM Agent System with NVIDIA Integration
3
+ Uses open-source models + NVIDIA NIM models available through Agno framework
4
  """
5
 
6
  import os
 
9
  import operator
10
  from typing import List, Dict, Any, TypedDict, Annotated, Optional
11
  from dotenv import load_dotenv
12
+ from datetime import datetime
13
+ from textwrap import dedent
14
 
15
+ # Core LangChain imports for compatibility
16
  from langchain_core.tools import tool
17
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 
18
  from langgraph.graph import StateGraph, END
19
  from langgraph.checkpoint.memory import MemorySaver
 
20
 
21
+ # Agno imports for open-source models + NVIDIA
 
 
 
 
 
22
  try:
23
+ from agno.agent import Agent
24
+ from agno.models.groq import Groq
25
+ from agno.models.ollama import Ollama
26
+ from agno.models.together import Together
27
+ from agno.models.anyscale import Anyscale
28
+ from agno.models.huggingface import HuggingFaceChat
29
+ from agno.models.nvidia import Nvidia # NVIDIA NIM integration
30
+ from agno.tools.duckduckgo import DuckDuckGoTools
31
+ from agno.tools.wikipedia import WikipediaTools
32
+ from agno.tools.calculator import Calculator
33
+ from agno.tools.reasoning import ReasoningTools
34
+ from agno.memory import AgentMemory
35
+ from agno.storage import AgentStorage
36
+ from agno.knowledge import AgentKnowledge
37
+ AGNO_AVAILABLE = True
38
  except ImportError:
39
+ AGNO_AVAILABLE = False
40
+ print("Agno not available. Install with: pip install agno")
41
 
42
  # Vector database imports
43
  import faiss
 
47
 
48
  load_dotenv()
49
 
50
+ # Enhanced system prompt for Agno agents
51
+ AGNO_SYSTEM_PROMPT = dedent("""\
52
+ You are a helpful assistant tasked with answering questions using available tools.
53
+ You must provide accurate, comprehensive answers based on available information.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ Your capabilities include:
56
+ - Using search tools to find current information
57
+ - Performing mathematical calculations
58
+ - Reasoning through complex problems step by step
59
+ - Accessing Wikipedia for encyclopedic knowledge
60
 
61
+ Guidelines:
62
+ 1. Use available tools to gather information when needed
63
+ 2. Provide precise, factual answers
64
+ 3. For numbers: don't use commas or units unless specified
65
+ 4. For strings: don't use articles or abbreviations, write digits in plain text
66
+ 5. For lists: apply above rules based on element type
67
+ 6. Always end with 'FINAL ANSWER: [YOUR ANSWER]'
68
+ 7. Be concise but thorough in your reasoning
69
+ 8. If you cannot find the answer, state that clearly
70
+ """)
71
 
72
+ # ---- Enhanced Model Manager with NVIDIA Support ----
73
+ class AgnoEnhancedModelManager:
74
+ """Manages open-source models + NVIDIA NIM models available through Agno"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def __init__(self):
77
  self.available_models = {}
78
+ self._initialize_all_models()
79
 
80
+ def _initialize_all_models(self):
81
+ """Initialize open-source models + NVIDIA NIM models through Agno"""
82
+ if not AGNO_AVAILABLE:
83
+ return
84
+
85
+ # 1. NVIDIA NIM Models (Enterprise-grade open-source models)
86
+ if os.getenv("NVIDIA_API_KEY"):
87
+ try:
88
+ # NVIDIA NIM provides access to optimized open-source models
89
+ self.available_models['nvidia_llama3_70b'] = Nvidia(id="meta/llama3-70b-instruct")
90
+ self.available_models['nvidia_llama3_8b'] = Nvidia(id="meta/llama3-8b-instruct")
91
+ self.available_models['nvidia_mixtral'] = Nvidia(id="mistralai/mixtral-8x7b-instruct-v0.1")
92
+ self.available_models['nvidia_codellama'] = Nvidia(id="meta/codellama-70b-instruct")
93
+ self.available_models['nvidia_gemma'] = Nvidia(id="google/gemma-7b-it")
94
+ self.available_models['nvidia_yi'] = Nvidia(id="01-ai/yi-34b-chat")
95
+ print("NVIDIA NIM models initialized")
96
+ except Exception as e:
97
+ print(f"NVIDIA models not available: {e}")
98
 
99
+ # 2. Groq (Free tier with open-source models)
100
  if os.getenv("GROQ_API_KEY"):
101
  try:
102
+ self.available_models['groq_llama3_70b'] = Groq(id="llama3-70b-8192")
103
+ self.available_models['groq_llama3_8b'] = Groq(id="llama3-8b-8192")
104
+ self.available_models['groq_mixtral'] = Groq(id="mixtral-8x7b-32768")
105
+ self.available_models['groq_gemma'] = Groq(id="gemma-7b-it")
106
+ print("Groq open-source models initialized")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  except Exception as e:
108
  print(f"Groq models not available: {e}")
109
 
110
+ # 3. Ollama (Completely free local models)
111
  try:
112
+ self.available_models['ollama_llama3'] = Ollama(id="llama3")
113
+ self.available_models['ollama_llama3_70b'] = Ollama(id="llama3:70b")
114
+ self.available_models['ollama_mistral'] = Ollama(id="mistral")
115
+ self.available_models['ollama_phi3'] = Ollama(id="phi3")
116
+ self.available_models['ollama_codellama'] = Ollama(id="codellama")
117
+ self.available_models['ollama_gemma'] = Ollama(id="gemma")
118
+ self.available_models['ollama_qwen'] = Ollama(id="qwen")
119
+ print("Ollama local models initialized")
 
 
 
120
  except Exception as e:
121
+ print(f"Ollama models not available: {e}")
122
 
123
+ # 4. Together AI (Open-source models)
124
+ if os.getenv("TOGETHER_API_KEY"):
125
  try:
126
+ self.available_models['together_llama3_70b'] = Together(id="meta-llama/Llama-3-70b-chat-hf")
127
+ self.available_models['together_llama3_8b'] = Together(id="meta-llama/Llama-3-8b-chat-hf")
128
+ self.available_models['together_mistral'] = Together(id="mistralai/Mistral-7B-Instruct-v0.1")
129
+ self.available_models['together_qwen'] = Together(id="Qwen/Qwen2-72B-Instruct")
130
+ print("Together AI open-source models initialized")
131
  except Exception as e:
132
+ print(f"Together AI models not available: {e}")
133
 
134
+ # 5. Anyscale (Open-source models)
135
+ if os.getenv("ANYSCALE_API_KEY"):
136
+ try:
137
+ self.available_models['anyscale_llama3_70b'] = Anyscale(id="meta-llama/Llama-3-70b-chat-hf")
138
+ self.available_models['anyscale_mistral'] = Anyscale(id="mistralai/Mistral-7B-Instruct-v0.1")
139
+ print("Anyscale open-source models initialized")
140
+ except Exception as e:
141
+ print(f"Anyscale models not available: {e}")
142
+
143
+ # 6. Hugging Face (Open-source models)
144
  try:
145
+ if os.getenv("HUGGINGFACE_API_KEY"):
146
+ self.available_models['hf_llama3_8b'] = HuggingFaceChat(id="meta-llama/Meta-Llama-3-8B-Instruct")
147
+ self.available_models['hf_mistral'] = HuggingFaceChat(id="mistralai/Mistral-7B-Instruct-v0.1")
148
+ print("Hugging Face open-source models initialized")
 
 
 
 
 
149
  except Exception as e:
150
+ print(f"Hugging Face models not available: {e}")
151
+
152
+ print(f"Total available models: {len(self.available_models)}")
153
 
154
  def get_model(self, model_name: str):
155
  """Get a specific model by name"""
 
162
  def get_best_model_for_task(self, task_type: str):
163
  """Get the best available model for a specific task type"""
164
  if task_type == "reasoning":
165
+ # Prefer larger, more capable models for reasoning
166
+ for model_name in ['nvidia_llama3_70b', 'groq_llama3_70b', 'together_llama3_70b', 'anyscale_llama3_70b', 'ollama_llama3_70b']:
167
  if model_name in self.available_models:
168
  return self.available_models[model_name]
169
 
170
  elif task_type == "coding":
171
  # Prefer code-specialized models
172
+ for model_name in ['nvidia_codellama', 'ollama_codellama', 'nvidia_llama3_70b', 'groq_llama3_70b']:
173
  if model_name in self.available_models:
174
  return self.available_models[model_name]
175
 
176
  elif task_type == "fast":
177
  # Prefer fast, smaller models
178
+ for model_name in ['groq_llama3_8b', 'nvidia_llama3_8b', 'groq_gemma', 'ollama_phi3', 'hf_llama3_8b']:
179
+ if model_name in self.available_models:
180
+ return self.available_models[model_name]
181
+
182
+ elif task_type == "enterprise":
183
+ # Prefer NVIDIA NIM for enterprise-grade tasks
184
+ for model_name in ['nvidia_llama3_70b', 'nvidia_mixtral', 'nvidia_codellama']:
185
  if model_name in self.available_models:
186
  return self.available_models[model_name]
187
 
 
190
  return list(self.available_models.values())[0]
191
  return None
192
 
193
+ # ---- Enhanced Specialized Agno Agents with NVIDIA ----
194
+ class AgnoEnhancedAgentSystem:
195
+ """System of specialized Agno agents using open-source + NVIDIA models"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
  def __init__(self):
198
+ self.model_manager = AgnoEnhancedModelManager()
199
+ self.agents = {}
200
+ self._create_specialized_agents()
 
 
 
201
 
202
+ def _create_specialized_agents(self):
203
+ """Create specialized agents for different tasks using best available models"""
204
+ if not AGNO_AVAILABLE:
205
+ print("Agno not available, agents cannot be created")
206
+ return
207
 
208
+ # Enterprise Research Agent (NVIDIA preferred)
209
+ enterprise_model = self.model_manager.get_best_model_for_task("enterprise")
210
+ if enterprise_model:
211
+ self.agents['enterprise_research'] = Agent(
212
+ model=enterprise_model,
213
+ tools=[DuckDuckGoTools(), WikipediaTools(), ReasoningTools()],
214
+ description=dedent("""\
215
+ You are an enterprise-grade research specialist with access to optimized models.
216
+ Your expertise lies in comprehensive analysis, fact-checking, and providing
217
+ detailed, accurate responses for complex research tasks.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
+ Your approach is:
220
+ - Enterprise-level accuracy and reliability
221
+ - Comprehensive and thorough analysis
222
+ - Multi-source verification
223
+ - Professional-grade output quality
224
+ """),
225
+ instructions=dedent("""\
226
+ 1. Use advanced reasoning capabilities for complex analysis
227
+ 2. Cross-reference multiple sources for maximum accuracy
228
+ 3. Provide comprehensive, well-structured responses
229
+ 4. Include confidence levels and source reliability assessment
230
+ 5. Always end with 'FINAL ANSWER: [your comprehensive answer]'
231
+ 6. Prioritize accuracy and completeness over speed
232
+ """),
233
+ memory=AgentMemory(),
234
+ markdown=True,
235
+ show_tool_calls=True,
236
+ add_datetime_to_instructions=True
237
+ )
238
+
239
+ # Advanced Math Agent (Best reasoning model)
240
+ math_model = self.model_manager.get_best_model_for_task("reasoning")
241
+ if math_model:
242
+ self.agents['advanced_math'] = Agent(
243
+ model=math_model,
244
+ tools=[Calculator(), ReasoningTools()],
245
+ description=dedent("""\
246
+ You are an advanced mathematics expert with access to powerful reasoning models.
247
+ You excel at complex mathematical problem solving, statistical analysis,
248
+ and providing step-by-step solutions with high accuracy.
249
 
250
+ Your approach is:
251
+ - Rigorous mathematical methodology
252
+ - Step-by-step problem decomposition
253
+ - High-precision calculations
254
+ - Clear mathematical communication
255
+ """),
256
+ instructions=dedent("""\
257
+ 1. Break down complex mathematical problems systematically
258
+ 2. Use advanced reasoning for multi-step problems
259
+ 3. Show detailed work and methodology
260
+ 4. Verify calculations using multiple approaches when possible
261
+ 5. Provide exact numerical answers without commas or units unless specified
262
+ 6. Always end with 'FINAL ANSWER: [precise numerical result]'
263
+ """),
264
+ memory=AgentMemory(),
265
+ markdown=True,
266
+ show_tool_calls=True
267
+ )
268
+
269
+ # Fast Response Agent (Optimized for speed)
270
+ fast_model = self.model_manager.get_best_model_for_task("fast")
271
+ if fast_model:
272
+ self.agents['fast_response'] = Agent(
273
+ model=fast_model,
274
+ tools=[DuckDuckGoTools(), WikipediaTools()],
275
+ description=dedent("""\
276
+ You are a rapid response specialist optimized for quick, accurate answers.
277
+ You provide concise, direct responses while maintaining high quality standards.
278
 
279
+ Your approach is:
280
+ - Speed-optimized processing
281
+ - Direct and concise communication
282
+ - Efficient tool usage
283
+ - Quality maintained at high speed
284
+ """),
285
+ instructions=dedent("""\
286
+ 1. Provide quick, accurate answers
287
+ 2. Use tools efficiently - only when necessary
288
+ 3. Be direct and avoid unnecessary elaboration
289
+ 4. Maintain accuracy despite speed focus
290
+ 5. Always end with 'FINAL ANSWER: [your concise answer]'
291
+ 6. Prioritize clarity and correctness
292
+ """),
293
+ markdown=True,
294
+ show_tool_calls=False
295
+ )
296
+
297
+ # Advanced Coding Agent (Code-specialized model)
298
+ coding_model = self.model_manager.get_best_model_for_task("coding")
299
+ if coding_model:
300
+ self.agents['advanced_coding'] = Agent(
301
+ model=coding_model,
302
+ tools=[ReasoningTools()],
303
+ description=dedent("""\
304
+ You are an advanced programming expert with access to code-specialized models.
305
+ You excel at complex code generation, algorithm design, debugging, and
306
+ software architecture recommendations.
307
 
308
+ Your approach is:
309
+ - Advanced programming methodologies
310
+ - Clean, efficient code generation
311
+ - Comprehensive error handling
312
+ - Best practices implementation
313
+ """),
314
+ instructions=dedent("""\
315
+ 1. Write production-quality, well-documented code
316
+ 2. Follow industry best practices and design patterns
317
+ 3. Include comprehensive error handling and edge cases
318
+ 4. Provide clear explanations of code logic
319
+ 5. Consider performance, security, and maintainability
320
+ 6. Always end with 'FINAL ANSWER: [your code solution]'
321
+ """),
322
+ memory=AgentMemory(),
323
+ markdown=True,
324
+ show_tool_calls=True
325
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
+ # Standard Research Agent (Fallback)
328
+ research_model = self.model_manager.get_best_model_for_task("reasoning")
329
+ if research_model and 'enterprise_research' not in self.agents:
330
+ self.agents['research'] = Agent(
331
+ model=research_model,
332
+ tools=[DuckDuckGoTools(), WikipediaTools(), ReasoningTools()],
333
+ description=dedent("""\
334
+ You are a research specialist with expertise in finding and analyzing information.
335
+ Your specialty lies in gathering comprehensive data from multiple sources.
336
+ """),
337
+ instructions=dedent("""\
338
+ 1. Use search tools to find current and relevant information
339
+ 2. Apply systematic reasoning to analyze findings
340
+ 3. Provide comprehensive answers with sources
341
+ 4. Always end with 'FINAL ANSWER: [your answer]'
342
+ """),
343
+ memory=AgentMemory(),
344
+ markdown=True,
345
+ show_tool_calls=True
346
+ )
347
 
348
+ print(f"Created {len(self.agents)} specialized Agno agents with enhanced models")
 
 
 
349
 
350
+ def route_query(self, query: str) -> str:
351
+ """Route query to the most appropriate agent"""
352
+ q_lower = query.lower()
353
+
354
+ # Route to specialized agents
355
+ if any(keyword in q_lower for keyword in ["calculate", "math", "multiply", "add", "subtract", "divide", "compute", "statistical"]):
356
+ if 'advanced_math' in self.agents:
357
+ return self._query_agent('advanced_math', query)
358
+ elif 'math' in self.agents:
359
+ return self._query_agent('math', query)
360
+
361
+ elif any(keyword in q_lower for keyword in ["code", "programming", "function", "algorithm", "python", "javascript", "debug"]):
362
+ if 'advanced_coding' in self.agents:
363
+ return self._query_agent('advanced_coding', query)
364
+ elif 'coding' in self.agents:
365
+ return self._query_agent('coding', query)
366
+
367
+ elif any(keyword in q_lower for keyword in ["enterprise", "analysis", "comprehensive", "detailed", "professional"]):
368
+ if 'enterprise_research' in self.agents:
369
+ return self._query_agent('enterprise_research', query)
370
+
371
+ elif any(keyword in q_lower for keyword in ["research", "find", "search", "information", "study", "analyze"]):
372
+ if 'enterprise_research' in self.agents:
373
+ return self._query_agent('enterprise_research', query)
374
+ elif 'research' in self.agents:
375
+ return self._query_agent('research', query)
376
+
377
+ elif len(query.split()) < 10: # Simple queries
378
+ if 'fast_response' in self.agents:
379
+ return self._query_agent('fast_response', query)
380
+ elif 'fast' in self.agents:
381
+ return self._query_agent('fast', query)
 
 
 
 
 
 
 
 
 
382
 
383
+ # Default to best available agent
384
+ if 'enterprise_research' in self.agents:
385
+ return self._query_agent('enterprise_research', query)
386
+ elif 'research' in self.agents:
387
+ return self._query_agent('research', query)
388
+ elif self.agents:
389
+ agent_name = list(self.agents.keys())[0]
390
+ return self._query_agent(agent_name, query)
391
+
392
+ return "No agents available"
393
+
394
+ def _query_agent(self, agent_name: str, query: str) -> str:
395
+ """Query a specific agent"""
396
  try:
397
+ agent = self.agents[agent_name]
398
+ response = agent.run(query)
399
 
400
+ # Extract final answer if present
401
+ if "FINAL ANSWER:" in response:
402
+ return response.split("FINAL ANSWER:")[-1].strip()
403
 
404
+ return response.strip()
405
  except Exception as e:
406
+ return f"Error with {agent_name} agent: {e}"
407
 
408
  def get_system_info(self) -> Dict[str, Any]:
409
+ """Get information about available agents and models"""
410
+ model_breakdown = {
411
+ "nvidia_models": [m for m in self.model_manager.list_available_models() if m.startswith("nvidia_")],
412
+ "groq_models": [m for m in self.model_manager.list_available_models() if m.startswith("groq_")],
413
+ "ollama_models": [m for m in self.model_manager.list_available_models() if m.startswith("ollama_")],
414
+ "together_models": [m for m in self.model_manager.list_available_models() if m.startswith("together_")],
415
+ "anyscale_models": [m for m in self.model_manager.list_available_models() if m.startswith("anyscale_")],
416
+ "hf_models": [m for m in self.model_manager.list_available_models() if m.startswith("hf_")]
417
+ }
418
+
419
  return {
420
  "available_models": self.model_manager.list_available_models(),
421
+ "model_breakdown": model_breakdown,
422
+ "active_agents": list(self.agents.keys()),
423
+ "agno_available": AGNO_AVAILABLE,
424
  "total_models": len(self.model_manager.available_models),
425
+ "nvidia_available": len(model_breakdown["nvidia_models"]) > 0
 
 
 
 
426
  }
427
 
428
+ # ---- Enhanced Agent State for LangGraph compatibility ----
429
+ class EnhancedAgentState(TypedDict):
430
+ """State structure for compatibility with existing system."""
431
+ messages: Annotated[List[HumanMessage | AIMessage], operator.add]
432
+ query: str
433
+ agent_type: str
434
+ final_answer: str
435
+ perf: Dict[str, Any]
436
+ tools_used: List[str]
437
+ reasoning: str
438
+ model_used: str
439
+
440
+ # ---- Unified System with Enhanced NVIDIA Integration ----
441
+ class UnifiedAgnoEnhancedSystem:
442
+ """Unified system that integrates Agno agents with NVIDIA + open-source models"""
443
+
444
+ def __init__(self):
445
+ if AGNO_AVAILABLE:
446
+ print("Using enhanced Agno-based system with NVIDIA + open-source models")
447
+ self.agno_system = AgnoEnhancedAgentSystem()
448
+ self.graph = self._build_compatibility_graph()
449
+ else:
450
+ print("Agno not available")
451
+ self.agno_system = None
452
+ self.graph = None
453
+
454
+ def _build_compatibility_graph(self):
455
+ """Build LangGraph for compatibility with existing app.py"""
456
+ def process_node(state: EnhancedAgentState) -> EnhancedAgentState:
457
+ """Process query through enhanced Agno system"""
458
+ query = state.get("query", "")
459
+
460
+ if self.agno_system:
461
+ answer = self.agno_system.route_query(query)
462
+ return {**state, "final_answer": answer}
463
+ else:
464
+ return {**state, "final_answer": "Enhanced Agno system not available"}
465
+
466
+ g = StateGraph(EnhancedAgentState)
467
+ g.add_node("process", process_node)
468
+ g.set_entry_point("process")
469
+ g.add_edge("process", END)
470
+
471
+ return g.compile(checkpointer=MemorySaver())
472
+
473
+ def process_query(self, query: str) -> str:
474
+ """Process query through the unified enhanced system"""
475
+ if self.agno_system:
476
+ return self.agno_system.route_query(query)
477
+ else:
478
+ return "Enhanced Agno system not available"
479
+
480
+ def get_system_info(self) -> Dict[str, Any]:
481
+ """Get information about the current enhanced system"""
482
+ if self.agno_system:
483
+ return self.agno_system.get_system_info()
484
+ else:
485
+ return {"system": "agno_unavailable", "agno_available": False}
486
+
487
  # ---- Build Graph Function (for compatibility) ----
488
+ def build_graph(provider: str = "agno_enhanced"):
489
+ """Build graph using enhanced Agno models including NVIDIA"""
490
+ system = UnifiedAgnoEnhancedSystem()
491
+ return system.graph if system.graph else None
492
 
493
  # ---- Main execution ----
494
  if __name__ == "__main__":
495
+ # Initialize the enhanced unified system
496
+ system = UnifiedAgnoEnhancedSystem()
497
 
498
  # Print system information
499
  info = system.get_system_info()
500
+ print("Enhanced Agno System Information:")
501
+ for key, value in info.items():
502
+ if isinstance(value, dict):
503
+ print(f" {key}:")
504
+ for subkey, subvalue in value.items():
505
+ print(f" {subkey}: {subvalue}")
506
+ else:
507
+ print(f" {key}: {value}")
508
 
509
  # Test queries
510
  test_questions = [
511
+ "Enterprise analysis: What is 25 multiplied by 17?",
512
+ "Research the latest developments in quantum computing",
513
+ "Write an advanced Python function to calculate factorial with error handling",
514
+ "Find comprehensive information about Mercedes Sosa albums between 2000-2009",
515
+ "Quick answer: What is the capital of France?"
516
  ]
517
 
518
  print("\n" + "="*60)
519
+ print("Testing Enhanced Agno Multi-LLM System with NVIDIA")
520
  print("="*60)
521
 
522
  for i, question in enumerate(test_questions, 1):