josondev's picture
Update veryfinal.py
4efaf9c verified
raw
history blame
20 kB
"""
Open-Source Multi-LLM Agent System
Uses only free and open-source models - no paid APIs required
"""
import os
import time
import random
import operator
from typing import List, Dict, Any, TypedDict, Annotated, Optional
from dotenv import load_dotenv
# Core LangChain imports
from langchain_core.tools import tool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WikipediaLoader
from langgraph.graph import StateGraph, END
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
# Open-source model integrations
from langchain_groq import ChatGroq # Free tier available
from langchain_community.llms import Ollama
from langchain_community.chat_models import ChatOllama
# Hugging Face integration for open-source models
try:
from langchain_huggingface import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
HF_AVAILABLE = True
except ImportError:
HF_AVAILABLE = False
# Vector database imports
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import json
load_dotenv()
# Enhanced system prompt
ENHANCED_SYSTEM_PROMPT = (
"You are a helpful assistant tasked with answering questions using available tools. "
"You must provide accurate, comprehensive answers based on available information. "
"When answering questions, follow these guidelines:\n"
"1. Use available tools to gather information when needed\n"
"2. Provide precise, factual answers\n"
"3. For numbers: don't use commas or units unless specified\n"
"4. For strings: don't use articles or abbreviations, write digits in plain text\n"
"5. For lists: apply above rules based on element type\n"
"6. Always end with 'FINAL ANSWER: [YOUR ANSWER]'\n"
"7. Be concise but thorough in your reasoning\n"
"8. If you cannot find the answer, state that clearly"
)
# ---- Tool Definitions ----
@tool
def multiply(a: int, b: int) -> int:
"""Multiply two integers and return the product."""
return a * b
@tool
def add(a: int, b: int) -> int:
"""Add two integers and return the sum."""
return a + b
@tool
def subtract(a: int, b: int) -> int:
"""Subtract the second integer from the first and return the difference."""
return a - b
@tool
def divide(a: int, b: int) -> float:
"""Divide the first integer by the second and return the quotient."""
if b == 0:
raise ValueError("Cannot divide by zero.")
return a / b
@tool
def modulus(a: int, b: int) -> int:
"""Return the remainder when dividing the first integer by the second."""
return a % b
@tool
def optimized_web_search(query: str) -> str:
"""Perform web search using free DuckDuckGo (fallback if Tavily not available)."""
try:
# Try Tavily first (free tier)
if os.getenv("TAVILY_API_KEY"):
time.sleep(random.uniform(0.7, 1.5))
search_tool = TavilySearchResults(max_results=3)
docs = search_tool.invoke({"query": query})
return "\n\n---\n\n".join(
f"<Doc url='{d.get('url','')}'>{d.get('content','')[:800]}</Doc>"
for d in docs
)
else:
# Fallback to DuckDuckGo (completely free)
try:
from duckduckgo_search import DDGS
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=3))
return "\n\n---\n\n".join(
f"<Doc url='{r.get('href','')}'>{r.get('body','')[:800]}</Doc>"
for r in results
)
except ImportError:
return "Web search not available - install duckduckgo-search for free web search"
except Exception as e:
return f"Web search failed: {e}"
@tool
def optimized_wiki_search(query: str) -> str:
"""Perform Wikipedia search - completely free."""
try:
time.sleep(random.uniform(0.3, 1))
docs = WikipediaLoader(query=query, load_max_docs=2).load()
return "\n\n---\n\n".join(
f"<Doc src='{d.metadata.get('source','Wikipedia')}'>{d.page_content[:1000]}</Doc>"
for d in docs
)
except Exception as e:
return f"Wikipedia search failed: {e}"
# ---- Open-Source Model Manager ----
class OpenSourceModelManager:
"""Manages only open-source and free models"""
def __init__(self):
self.available_models = {}
self._initialize_models()
def _initialize_models(self):
"""Initialize only open-source models"""
# 1. Groq (Free tier with open-source models)
if os.getenv("GROQ_API_KEY"):
try:
self.available_models['groq_llama3_70b'] = ChatGroq(
model="llama3-70b-8192",
temperature=0,
api_key=os.getenv("GROQ_API_KEY")
)
self.available_models['groq_llama3_8b'] = ChatGroq(
model="llama3-8b-8192",
temperature=0,
api_key=os.getenv("GROQ_API_KEY")
)
self.available_models['groq_mixtral'] = ChatGroq(
model="mixtral-8x7b-32768",
temperature=0,
api_key=os.getenv("GROQ_API_KEY")
)
self.available_models['groq_gemma'] = ChatGroq(
model="gemma-7b-it",
temperature=0,
api_key=os.getenv("GROQ_API_KEY")
)
print("Groq models initialized (free tier)")
except Exception as e:
print(f"Groq models not available: {e}")
# 2. Ollama (Completely free local models)
try:
# Test if Ollama is running
test_model = ChatOllama(model="llama3", base_url="http://localhost:11434")
# If no error, add Ollama models
self.available_models['ollama_llama3'] = ChatOllama(model="llama3")
self.available_models['ollama_llama3_70b'] = ChatOllama(model="llama3:70b")
self.available_models['ollama_mistral'] = ChatOllama(model="mistral")
self.available_models['ollama_phi3'] = ChatOllama(model="phi3")
self.available_models['ollama_codellama'] = ChatOllama(model="codellama")
self.available_models['ollama_gemma'] = ChatOllama(model="gemma")
self.available_models['ollama_qwen'] = ChatOllama(model="qwen")
print("Ollama models initialized (local)")
except Exception as e:
print(f"Ollama not available: {e}")
# 3. Hugging Face Transformers (Completely free)
if HF_AVAILABLE:
try:
# Small models that can run on CPU
self.available_models['hf_gpt2'] = self._create_hf_model("gpt2")
self.available_models['hf_distilgpt2'] = self._create_hf_model("distilgpt2")
print("Hugging Face models initialized (local)")
except Exception as e:
print(f"Hugging Face models not available: {e}")
print(f"Total available open-source models: {len(self.available_models)}")
def _create_hf_model(self, model_name: str):
"""Create Hugging Face pipeline model"""
try:
pipe = pipeline(
"text-generation",
model=model_name,
max_length=512,
do_sample=True,
temperature=0.7,
pad_token_id=50256
)
return HuggingFacePipeline(pipeline=pipe)
except Exception as e:
print(f"Failed to create HF model {model_name}: {e}")
return None
def get_model(self, model_name: str):
"""Get a specific model by name"""
return self.available_models.get(model_name)
def list_available_models(self) -> List[str]:
"""List all available model names"""
return list(self.available_models.keys())
def get_best_model_for_task(self, task_type: str):
"""Get the best available model for a specific task type"""
if task_type == "reasoning":
# Prefer larger models for reasoning
for model_name in ['groq_llama3_70b', 'ollama_llama3_70b', 'groq_mixtral', 'ollama_llama3']:
if model_name in self.available_models:
return self.available_models[model_name]
elif task_type == "coding":
# Prefer code-specialized models
for model_name in ['ollama_codellama', 'groq_llama3_70b', 'ollama_llama3']:
if model_name in self.available_models:
return self.available_models[model_name]
elif task_type == "fast":
# Prefer fast, smaller models
for model_name in ['groq_llama3_8b', 'groq_gemma', 'ollama_phi3', 'hf_distilgpt2']:
if model_name in self.available_models:
return self.available_models[model_name]
# Default fallback to first available
if self.available_models:
return list(self.available_models.values())[0]
return None
# ---- Enhanced Agent State ----
class EnhancedAgentState(TypedDict):
"""State structure for the enhanced multi-LLM agent system."""
messages: Annotated[List[HumanMessage | AIMessage], operator.add]
query: str
agent_type: str
final_answer: str
perf: Dict[str, Any]
tools_used: List[str]
reasoning: str
model_used: str
# ---- Open-Source Multi-LLM System ----
class OpenSourceMultiLLMSystem:
"""
Multi-LLM system using only open-source and free models
"""
def __init__(self):
self.model_manager = OpenSourceModelManager()
self.tools = [
multiply, add, subtract, divide, modulus,
optimized_web_search, optimized_wiki_search
]
self.graph = self._build_graph()
def _build_graph(self) -> StateGraph:
"""Build the LangGraph state machine with open-source models."""
def router(st: EnhancedAgentState) -> EnhancedAgentState:
"""Route queries to appropriate model based on complexity and content analysis."""
q = st["query"].lower()
# Enhanced routing logic
if any(keyword in q for keyword in ["calculate", "compute", "math", "multiply", "add", "subtract", "divide"]):
model_type = "reasoning"
agent_type = "math"
elif any(keyword in q for keyword in ["search", "find", "lookup", "wikipedia", "information about"]):
model_type = "fast"
agent_type = "search_enhanced"
elif any(keyword in q for keyword in ["code", "programming", "function", "algorithm"]):
model_type = "coding"
agent_type = "coding"
elif len(q.split()) > 20: # Complex queries
model_type = "reasoning"
agent_type = "complex"
else:
model_type = "fast"
agent_type = "simple"
# Get the best model for this task
selected_model = self.model_manager.get_best_model_for_task(model_type)
model_name = "unknown"
for name, model in self.model_manager.available_models.items():
if model == selected_model:
model_name = name
break
return {**st, "agent_type": agent_type, "tools_used": [], "reasoning": "", "model_used": model_name}
def math_node(st: EnhancedAgentState) -> EnhancedAgentState:
"""Process mathematical queries."""
return self._process_with_model(st, "reasoning", "Mathematical calculation using open-source model")
def search_enhanced_node(st: EnhancedAgentState) -> EnhancedAgentState:
"""Process query with search enhancement."""
t0 = time.time()
tools_used = []
try:
# Determine search strategy
query = st["query"]
search_results = ""
if any(keyword in query.lower() for keyword in ["wikipedia", "wiki"]):
search_results = optimized_wiki_search.invoke({"query": query})
tools_used.append("wikipedia_search")
else:
search_results = optimized_web_search.invoke({"query": query})
tools_used.append("web_search")
enhanced_query = f"""
Original Question: {query}
Search Results:
{search_results}
Based on the search results above, provide a direct answer to the original question.
"""
# Use fast model for search-enhanced queries
model = self.model_manager.get_best_model_for_task("fast")
if model:
sys = SystemMessage(content=ENHANCED_SYSTEM_PROMPT)
res = model.invoke([sys, HumanMessage(content=enhanced_query)])
answer = res.content.strip() if hasattr(res, 'content') else str(res).strip()
if "FINAL ANSWER:" in answer:
answer = answer.split("FINAL ANSWER:")[-1].strip()
return {**st,
"final_answer": answer,
"tools_used": tools_used,
"reasoning": "Used search enhancement with open-source model",
"perf": {"time": time.time() - t0, "prov": "Search-Enhanced"}}
else:
return {**st, "final_answer": "No models available", "perf": {"error": "No models"}}
except Exception as e:
return {**st, "final_answer": f"Error: {e}", "perf": {"error": str(e)}}
def coding_node(st: EnhancedAgentState) -> EnhancedAgentState:
"""Process coding-related queries."""
return self._process_with_model(st, "coding", "Code generation using open-source model")
def complex_node(st: EnhancedAgentState) -> EnhancedAgentState:
"""Process complex queries."""
return self._process_with_model(st, "reasoning", "Complex reasoning using open-source model")
def simple_node(st: EnhancedAgentState) -> EnhancedAgentState:
"""Process simple queries."""
return self._process_with_model(st, "fast", "Simple query using fast open-source model")
# Build graph
g = StateGraph(EnhancedAgentState)
g.add_node("router", router)
g.add_node("math", math_node)
g.add_node("search_enhanced", search_enhanced_node)
g.add_node("coding", coding_node)
g.add_node("complex", complex_node)
g.add_node("simple", simple_node)
g.set_entry_point("router")
g.add_conditional_edges("router", lambda s: s["agent_type"], {
"math": "math",
"search_enhanced": "search_enhanced",
"coding": "coding",
"complex": "complex",
"simple": "simple"
})
for node in ["math", "search_enhanced", "coding", "complex", "simple"]:
g.add_edge(node, END)
return g.compile(checkpointer=MemorySaver())
def _process_with_model(self, st: EnhancedAgentState, model_type: str, reasoning: str) -> EnhancedAgentState:
"""Process query with specified model type"""
t0 = time.time()
try:
model = self.model_manager.get_best_model_for_task(model_type)
if not model:
return {**st, "final_answer": "No suitable model available", "perf": {"error": "No model"}}
enhanced_query = f"""
Question: {st["query"]}
Please provide a direct, accurate answer to this question.
"""
sys = SystemMessage(content=ENHANCED_SYSTEM_PROMPT)
res = model.invoke([sys, HumanMessage(content=enhanced_query)])
answer = res.content.strip() if hasattr(res, 'content') else str(res).strip()
if "FINAL ANSWER:" in answer:
answer = answer.split("FINAL ANSWER:")[-1].strip()
return {**st,
"final_answer": answer,
"reasoning": reasoning,
"perf": {"time": time.time() - t0, "prov": f"OpenSource-{model_type}"}}
except Exception as e:
return {**st, "final_answer": f"Error: {e}", "perf": {"error": str(e)}}
def process_query(self, q: str) -> str:
"""Process a query through the open-source multi-LLM system."""
state = {
"messages": [HumanMessage(content=q)],
"query": q,
"agent_type": "",
"final_answer": "",
"perf": {},
"tools_used": [],
"reasoning": "",
"model_used": ""
}
cfg = {"configurable": {"thread_id": f"opensource_qa_{hash(q)}"}}
try:
out = self.graph.invoke(state, cfg)
answer = out.get("final_answer", "").strip()
# Ensure we don't return the question as the answer
if answer == q or answer.startswith(q):
return "Information not available"
return answer if answer else "No answer generated"
except Exception as e:
return f"Error processing query: {e}"
def get_system_info(self) -> Dict[str, Any]:
"""Get information about available open-source models"""
return {
"available_models": self.model_manager.list_available_models(),
"total_models": len(self.model_manager.available_models),
"model_types": {
"groq_free_tier": [m for m in self.model_manager.list_available_models() if m.startswith("groq_")],
"ollama_local": [m for m in self.model_manager.list_available_models() if m.startswith("ollama_")],
"huggingface_local": [m for m in self.model_manager.list_available_models() if m.startswith("hf_")]
}
}
# ---- Build Graph Function (for compatibility) ----
def build_graph(provider: str = "opensource"):
"""Build graph using only open-source models"""
return OpenSourceMultiLLMSystem().graph
# ---- Main execution ----
if __name__ == "__main__":
# Initialize the open-source system
system = OpenSourceMultiLLMSystem()
# Print system information
info = system.get_system_info()
print("Open-Source System Information:")
print(f"Total Models Available: {info['total_models']}")
for category, models in info['model_types'].items():
if models:
print(f" {category}: {models}")
# Test queries
test_questions = [
"What is 25 multiplied by 17?",
"Find information about Mercedes Sosa albums between 2000-2009",
"Write a simple Python function to calculate factorial",
"Explain quantum computing in simple terms",
"What is the capital of France?"
]
print("\n" + "="*60)
print("Testing Open-Source Multi-LLM System")
print("="*60)
for i, question in enumerate(test_questions, 1):
print(f"\nQuestion {i}: {question}")
print("-" * 50)
answer = system.process_query(question)
print(f"Answer: {answer}")