|
""" |
|
Open-Source Multi-LLM Agent System |
|
Uses only free and open-source models - no paid APIs required |
|
""" |
|
|
|
import os |
|
import time |
|
import random |
|
import operator |
|
from typing import List, Dict, Any, TypedDict, Annotated, Optional |
|
from dotenv import load_dotenv |
|
|
|
|
|
from langchain_core.tools import tool |
|
from langchain_community.tools.tavily_search import TavilySearchResults |
|
from langchain_community.document_loaders import WikipediaLoader |
|
from langgraph.graph import StateGraph, END |
|
from langgraph.checkpoint.memory import MemorySaver |
|
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage |
|
|
|
|
|
from langchain_groq import ChatGroq |
|
from langchain_community.llms import Ollama |
|
from langchain_community.chat_models import ChatOllama |
|
|
|
|
|
try: |
|
from langchain_huggingface import HuggingFacePipeline |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
HF_AVAILABLE = True |
|
except ImportError: |
|
HF_AVAILABLE = False |
|
|
|
|
|
import faiss |
|
import numpy as np |
|
from sentence_transformers import SentenceTransformer |
|
import json |
|
|
|
load_dotenv() |
|
|
|
|
|
ENHANCED_SYSTEM_PROMPT = ( |
|
"You are a helpful assistant tasked with answering questions using available tools. " |
|
"You must provide accurate, comprehensive answers based on available information. " |
|
"When answering questions, follow these guidelines:\n" |
|
"1. Use available tools to gather information when needed\n" |
|
"2. Provide precise, factual answers\n" |
|
"3. For numbers: don't use commas or units unless specified\n" |
|
"4. For strings: don't use articles or abbreviations, write digits in plain text\n" |
|
"5. For lists: apply above rules based on element type\n" |
|
"6. Always end with 'FINAL ANSWER: [YOUR ANSWER]'\n" |
|
"7. Be concise but thorough in your reasoning\n" |
|
"8. If you cannot find the answer, state that clearly" |
|
) |
|
|
|
|
|
@tool |
|
def multiply(a: int, b: int) -> int: |
|
"""Multiply two integers and return the product.""" |
|
return a * b |
|
|
|
@tool |
|
def add(a: int, b: int) -> int: |
|
"""Add two integers and return the sum.""" |
|
return a + b |
|
|
|
@tool |
|
def subtract(a: int, b: int) -> int: |
|
"""Subtract the second integer from the first and return the difference.""" |
|
return a - b |
|
|
|
@tool |
|
def divide(a: int, b: int) -> float: |
|
"""Divide the first integer by the second and return the quotient.""" |
|
if b == 0: |
|
raise ValueError("Cannot divide by zero.") |
|
return a / b |
|
|
|
@tool |
|
def modulus(a: int, b: int) -> int: |
|
"""Return the remainder when dividing the first integer by the second.""" |
|
return a % b |
|
|
|
@tool |
|
def optimized_web_search(query: str) -> str: |
|
"""Perform web search using free DuckDuckGo (fallback if Tavily not available).""" |
|
try: |
|
|
|
if os.getenv("TAVILY_API_KEY"): |
|
time.sleep(random.uniform(0.7, 1.5)) |
|
search_tool = TavilySearchResults(max_results=3) |
|
docs = search_tool.invoke({"query": query}) |
|
return "\n\n---\n\n".join( |
|
f"<Doc url='{d.get('url','')}'>{d.get('content','')[:800]}</Doc>" |
|
for d in docs |
|
) |
|
else: |
|
|
|
try: |
|
from duckduckgo_search import DDGS |
|
with DDGS() as ddgs: |
|
results = list(ddgs.text(query, max_results=3)) |
|
return "\n\n---\n\n".join( |
|
f"<Doc url='{r.get('href','')}'>{r.get('body','')[:800]}</Doc>" |
|
for r in results |
|
) |
|
except ImportError: |
|
return "Web search not available - install duckduckgo-search for free web search" |
|
except Exception as e: |
|
return f"Web search failed: {e}" |
|
|
|
@tool |
|
def optimized_wiki_search(query: str) -> str: |
|
"""Perform Wikipedia search - completely free.""" |
|
try: |
|
time.sleep(random.uniform(0.3, 1)) |
|
docs = WikipediaLoader(query=query, load_max_docs=2).load() |
|
return "\n\n---\n\n".join( |
|
f"<Doc src='{d.metadata.get('source','Wikipedia')}'>{d.page_content[:1000]}</Doc>" |
|
for d in docs |
|
) |
|
except Exception as e: |
|
return f"Wikipedia search failed: {e}" |
|
|
|
|
|
class OpenSourceModelManager: |
|
"""Manages only open-source and free models""" |
|
|
|
def __init__(self): |
|
self.available_models = {} |
|
self._initialize_models() |
|
|
|
def _initialize_models(self): |
|
"""Initialize only open-source models""" |
|
|
|
|
|
if os.getenv("GROQ_API_KEY"): |
|
try: |
|
self.available_models['groq_llama3_70b'] = ChatGroq( |
|
model="llama3-70b-8192", |
|
temperature=0, |
|
api_key=os.getenv("GROQ_API_KEY") |
|
) |
|
self.available_models['groq_llama3_8b'] = ChatGroq( |
|
model="llama3-8b-8192", |
|
temperature=0, |
|
api_key=os.getenv("GROQ_API_KEY") |
|
) |
|
self.available_models['groq_mixtral'] = ChatGroq( |
|
model="mixtral-8x7b-32768", |
|
temperature=0, |
|
api_key=os.getenv("GROQ_API_KEY") |
|
) |
|
self.available_models['groq_gemma'] = ChatGroq( |
|
model="gemma-7b-it", |
|
temperature=0, |
|
api_key=os.getenv("GROQ_API_KEY") |
|
) |
|
print("Groq models initialized (free tier)") |
|
except Exception as e: |
|
print(f"Groq models not available: {e}") |
|
|
|
|
|
try: |
|
|
|
test_model = ChatOllama(model="llama3", base_url="http://localhost:11434") |
|
|
|
self.available_models['ollama_llama3'] = ChatOllama(model="llama3") |
|
self.available_models['ollama_llama3_70b'] = ChatOllama(model="llama3:70b") |
|
self.available_models['ollama_mistral'] = ChatOllama(model="mistral") |
|
self.available_models['ollama_phi3'] = ChatOllama(model="phi3") |
|
self.available_models['ollama_codellama'] = ChatOllama(model="codellama") |
|
self.available_models['ollama_gemma'] = ChatOllama(model="gemma") |
|
self.available_models['ollama_qwen'] = ChatOllama(model="qwen") |
|
print("Ollama models initialized (local)") |
|
except Exception as e: |
|
print(f"Ollama not available: {e}") |
|
|
|
|
|
if HF_AVAILABLE: |
|
try: |
|
|
|
self.available_models['hf_gpt2'] = self._create_hf_model("gpt2") |
|
self.available_models['hf_distilgpt2'] = self._create_hf_model("distilgpt2") |
|
print("Hugging Face models initialized (local)") |
|
except Exception as e: |
|
print(f"Hugging Face models not available: {e}") |
|
|
|
print(f"Total available open-source models: {len(self.available_models)}") |
|
|
|
def _create_hf_model(self, model_name: str): |
|
"""Create Hugging Face pipeline model""" |
|
try: |
|
pipe = pipeline( |
|
"text-generation", |
|
model=model_name, |
|
max_length=512, |
|
do_sample=True, |
|
temperature=0.7, |
|
pad_token_id=50256 |
|
) |
|
return HuggingFacePipeline(pipeline=pipe) |
|
except Exception as e: |
|
print(f"Failed to create HF model {model_name}: {e}") |
|
return None |
|
|
|
def get_model(self, model_name: str): |
|
"""Get a specific model by name""" |
|
return self.available_models.get(model_name) |
|
|
|
def list_available_models(self) -> List[str]: |
|
"""List all available model names""" |
|
return list(self.available_models.keys()) |
|
|
|
def get_best_model_for_task(self, task_type: str): |
|
"""Get the best available model for a specific task type""" |
|
if task_type == "reasoning": |
|
|
|
for model_name in ['groq_llama3_70b', 'ollama_llama3_70b', 'groq_mixtral', 'ollama_llama3']: |
|
if model_name in self.available_models: |
|
return self.available_models[model_name] |
|
|
|
elif task_type == "coding": |
|
|
|
for model_name in ['ollama_codellama', 'groq_llama3_70b', 'ollama_llama3']: |
|
if model_name in self.available_models: |
|
return self.available_models[model_name] |
|
|
|
elif task_type == "fast": |
|
|
|
for model_name in ['groq_llama3_8b', 'groq_gemma', 'ollama_phi3', 'hf_distilgpt2']: |
|
if model_name in self.available_models: |
|
return self.available_models[model_name] |
|
|
|
|
|
if self.available_models: |
|
return list(self.available_models.values())[0] |
|
return None |
|
|
|
|
|
class EnhancedAgentState(TypedDict): |
|
"""State structure for the enhanced multi-LLM agent system.""" |
|
messages: Annotated[List[HumanMessage | AIMessage], operator.add] |
|
query: str |
|
agent_type: str |
|
final_answer: str |
|
perf: Dict[str, Any] |
|
tools_used: List[str] |
|
reasoning: str |
|
model_used: str |
|
|
|
|
|
class OpenSourceMultiLLMSystem: |
|
""" |
|
Multi-LLM system using only open-source and free models |
|
""" |
|
|
|
def __init__(self): |
|
self.model_manager = OpenSourceModelManager() |
|
self.tools = [ |
|
multiply, add, subtract, divide, modulus, |
|
optimized_web_search, optimized_wiki_search |
|
] |
|
self.graph = self._build_graph() |
|
|
|
def _build_graph(self) -> StateGraph: |
|
"""Build the LangGraph state machine with open-source models.""" |
|
|
|
def router(st: EnhancedAgentState) -> EnhancedAgentState: |
|
"""Route queries to appropriate model based on complexity and content analysis.""" |
|
q = st["query"].lower() |
|
|
|
|
|
if any(keyword in q for keyword in ["calculate", "compute", "math", "multiply", "add", "subtract", "divide"]): |
|
model_type = "reasoning" |
|
agent_type = "math" |
|
elif any(keyword in q for keyword in ["search", "find", "lookup", "wikipedia", "information about"]): |
|
model_type = "fast" |
|
agent_type = "search_enhanced" |
|
elif any(keyword in q for keyword in ["code", "programming", "function", "algorithm"]): |
|
model_type = "coding" |
|
agent_type = "coding" |
|
elif len(q.split()) > 20: |
|
model_type = "reasoning" |
|
agent_type = "complex" |
|
else: |
|
model_type = "fast" |
|
agent_type = "simple" |
|
|
|
|
|
selected_model = self.model_manager.get_best_model_for_task(model_type) |
|
model_name = "unknown" |
|
for name, model in self.model_manager.available_models.items(): |
|
if model == selected_model: |
|
model_name = name |
|
break |
|
|
|
return {**st, "agent_type": agent_type, "tools_used": [], "reasoning": "", "model_used": model_name} |
|
|
|
def math_node(st: EnhancedAgentState) -> EnhancedAgentState: |
|
"""Process mathematical queries.""" |
|
return self._process_with_model(st, "reasoning", "Mathematical calculation using open-source model") |
|
|
|
def search_enhanced_node(st: EnhancedAgentState) -> EnhancedAgentState: |
|
"""Process query with search enhancement.""" |
|
t0 = time.time() |
|
tools_used = [] |
|
|
|
try: |
|
|
|
query = st["query"] |
|
search_results = "" |
|
|
|
if any(keyword in query.lower() for keyword in ["wikipedia", "wiki"]): |
|
search_results = optimized_wiki_search.invoke({"query": query}) |
|
tools_used.append("wikipedia_search") |
|
else: |
|
search_results = optimized_web_search.invoke({"query": query}) |
|
tools_used.append("web_search") |
|
|
|
enhanced_query = f""" |
|
Original Question: {query} |
|
|
|
Search Results: |
|
{search_results} |
|
|
|
Based on the search results above, provide a direct answer to the original question. |
|
""" |
|
|
|
|
|
model = self.model_manager.get_best_model_for_task("fast") |
|
if model: |
|
sys = SystemMessage(content=ENHANCED_SYSTEM_PROMPT) |
|
res = model.invoke([sys, HumanMessage(content=enhanced_query)]) |
|
|
|
answer = res.content.strip() if hasattr(res, 'content') else str(res).strip() |
|
if "FINAL ANSWER:" in answer: |
|
answer = answer.split("FINAL ANSWER:")[-1].strip() |
|
|
|
return {**st, |
|
"final_answer": answer, |
|
"tools_used": tools_used, |
|
"reasoning": "Used search enhancement with open-source model", |
|
"perf": {"time": time.time() - t0, "prov": "Search-Enhanced"}} |
|
else: |
|
return {**st, "final_answer": "No models available", "perf": {"error": "No models"}} |
|
except Exception as e: |
|
return {**st, "final_answer": f"Error: {e}", "perf": {"error": str(e)}} |
|
|
|
def coding_node(st: EnhancedAgentState) -> EnhancedAgentState: |
|
"""Process coding-related queries.""" |
|
return self._process_with_model(st, "coding", "Code generation using open-source model") |
|
|
|
def complex_node(st: EnhancedAgentState) -> EnhancedAgentState: |
|
"""Process complex queries.""" |
|
return self._process_with_model(st, "reasoning", "Complex reasoning using open-source model") |
|
|
|
def simple_node(st: EnhancedAgentState) -> EnhancedAgentState: |
|
"""Process simple queries.""" |
|
return self._process_with_model(st, "fast", "Simple query using fast open-source model") |
|
|
|
|
|
g = StateGraph(EnhancedAgentState) |
|
g.add_node("router", router) |
|
g.add_node("math", math_node) |
|
g.add_node("search_enhanced", search_enhanced_node) |
|
g.add_node("coding", coding_node) |
|
g.add_node("complex", complex_node) |
|
g.add_node("simple", simple_node) |
|
|
|
g.set_entry_point("router") |
|
g.add_conditional_edges("router", lambda s: s["agent_type"], { |
|
"math": "math", |
|
"search_enhanced": "search_enhanced", |
|
"coding": "coding", |
|
"complex": "complex", |
|
"simple": "simple" |
|
}) |
|
|
|
for node in ["math", "search_enhanced", "coding", "complex", "simple"]: |
|
g.add_edge(node, END) |
|
|
|
return g.compile(checkpointer=MemorySaver()) |
|
|
|
def _process_with_model(self, st: EnhancedAgentState, model_type: str, reasoning: str) -> EnhancedAgentState: |
|
"""Process query with specified model type""" |
|
t0 = time.time() |
|
try: |
|
model = self.model_manager.get_best_model_for_task(model_type) |
|
if not model: |
|
return {**st, "final_answer": "No suitable model available", "perf": {"error": "No model"}} |
|
|
|
enhanced_query = f""" |
|
Question: {st["query"]} |
|
|
|
Please provide a direct, accurate answer to this question. |
|
""" |
|
|
|
sys = SystemMessage(content=ENHANCED_SYSTEM_PROMPT) |
|
res = model.invoke([sys, HumanMessage(content=enhanced_query)]) |
|
|
|
answer = res.content.strip() if hasattr(res, 'content') else str(res).strip() |
|
if "FINAL ANSWER:" in answer: |
|
answer = answer.split("FINAL ANSWER:")[-1].strip() |
|
|
|
return {**st, |
|
"final_answer": answer, |
|
"reasoning": reasoning, |
|
"perf": {"time": time.time() - t0, "prov": f"OpenSource-{model_type}"}} |
|
except Exception as e: |
|
return {**st, "final_answer": f"Error: {e}", "perf": {"error": str(e)}} |
|
|
|
def process_query(self, q: str) -> str: |
|
"""Process a query through the open-source multi-LLM system.""" |
|
state = { |
|
"messages": [HumanMessage(content=q)], |
|
"query": q, |
|
"agent_type": "", |
|
"final_answer": "", |
|
"perf": {}, |
|
"tools_used": [], |
|
"reasoning": "", |
|
"model_used": "" |
|
} |
|
cfg = {"configurable": {"thread_id": f"opensource_qa_{hash(q)}"}} |
|
|
|
try: |
|
out = self.graph.invoke(state, cfg) |
|
answer = out.get("final_answer", "").strip() |
|
|
|
|
|
if answer == q or answer.startswith(q): |
|
return "Information not available" |
|
|
|
return answer if answer else "No answer generated" |
|
except Exception as e: |
|
return f"Error processing query: {e}" |
|
|
|
def get_system_info(self) -> Dict[str, Any]: |
|
"""Get information about available open-source models""" |
|
return { |
|
"available_models": self.model_manager.list_available_models(), |
|
"total_models": len(self.model_manager.available_models), |
|
"model_types": { |
|
"groq_free_tier": [m for m in self.model_manager.list_available_models() if m.startswith("groq_")], |
|
"ollama_local": [m for m in self.model_manager.list_available_models() if m.startswith("ollama_")], |
|
"huggingface_local": [m for m in self.model_manager.list_available_models() if m.startswith("hf_")] |
|
} |
|
} |
|
|
|
|
|
def build_graph(provider: str = "opensource"): |
|
"""Build graph using only open-source models""" |
|
return OpenSourceMultiLLMSystem().graph |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
system = OpenSourceMultiLLMSystem() |
|
|
|
|
|
info = system.get_system_info() |
|
print("Open-Source System Information:") |
|
print(f"Total Models Available: {info['total_models']}") |
|
for category, models in info['model_types'].items(): |
|
if models: |
|
print(f" {category}: {models}") |
|
|
|
|
|
test_questions = [ |
|
"What is 25 multiplied by 17?", |
|
"Find information about Mercedes Sosa albums between 2000-2009", |
|
"Write a simple Python function to calculate factorial", |
|
"Explain quantum computing in simple terms", |
|
"What is the capital of France?" |
|
] |
|
|
|
print("\n" + "="*60) |
|
print("Testing Open-Source Multi-LLM System") |
|
print("="*60) |
|
|
|
for i, question in enumerate(test_questions, 1): |
|
print(f"\nQuestion {i}: {question}") |
|
print("-" * 50) |
|
answer = system.process_query(question) |
|
print(f"Answer: {answer}") |
|
|