Spaces:

srossitto79
/

AgentLlama007B

Runtime error

App Files Files Community

AgentLlama007B / RBotReloaded.py

srossitto79

added airllm experiment

48a378e over 1 year ago

raw

history blame contribute delete

22.5 kB

	import os
	import re
	from datetime import datetime, timedelta
	from threading import Thread
	import asyncio
	import requests
	import streamlit as st
	import json
	import time
	from bs4 import BeautifulSoup
	from PIL import Image
	import base64
	import io
	import google_free_search
	from langchain.vectorstores import FAISS # For storing embeddings
	from langchain.chains import RetrievalQA, ConversationalRetrievalChain # Chains for QA
	from langchain.utilities import TextRequestsWrapper, WikipediaAPIWrapper # Tools
	from langchain.document_loaders import DirectoryLoader, PyMuPDFLoader, PyPDFLoader, TextLoader, WebBaseLoader # Loaders
	from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader # Load URLs
	from langchain.schema import AIMessage, HumanMessage, get_buffer_string # Chat history
	from langchain.text_splitter import RecursiveCharacterTextSplitter # Split text
	from langchain.llms import TextGen, LlamaCpp, CTransformers # Language models
	from langchain.memory import ConversationBufferMemory # Chat memory
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # Logging
	from langchain.agents import Tool, load_tools # Tools
	from langchain.input import get_colored_text # Console colors
	from langchain.embeddings import (
	HuggingFaceEmbeddings,
	LlamaCppEmbeddings,
	SentenceTransformerEmbeddings,
	)
	from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline # Image generation
	from typing import Any, Dict, List
	import torch
	from diffusers.pipelines.stable_diffusion.convert_from_ckpt import download_from_original_stable_diffusion_ckpt
	import inspect
	from AirLLM import AirLLM

	# Config
	EMBD_CHUNK_SIZE = 512
	AI_NAME = "Agent Llama"
	USER_NAME = "Buddy"
	MODELS_DIR = "./models"

	def validate_and_fix_params(tool_name, params_list):
	try:
	# Create a list to store the validated and fixed parameters
	validated_params = []
	n_param = 0
	while n_param < len(params_list) and len(validated_params) < 1:
	val = str(params_list[n_param]).replace("(", "").replace(")", "").replace("\"", "").replace("#", "").strip()
	n_param = n_param + 1
	if len(val) > 2:
	validated_params.append(val)

	if (n_param < len(params_list) and tool_name == "ImageGenerator" and len(validated_params) < 3):
	while n_param < len(params_list):
	val = int(str(params_list[n_param]).replace("(", "").replace(")", "").replace("\"", "").replace("#", "").strip())
	n_param = n_param + 1
	if val > 0:
	validated_params.append(val)

	return validated_params
	except Exception as e:
	# Handle any exceptions that may occur during parameter validation
	print(f"Error parsing params: {str(e)}")
	return []

	# Helper to load LM
	def create_llm(model_id=f"{MODELS_DIR}/deepseek-coder-6.7b-instruct.Q5_K_M.gguf", load_4bit=False, load_8bit=False, ctx_len = 8192, temperature=0.5, top_p=0.95):
	if (model_id.startswith("http")):
	print(f"Creating TextGen LLM base_url:{model_id}")
	return TextGen(model_url=model_id, seed=79, callbacks=[StreamingStdOutCallbackHandler()])
	if (os.path.exists(model_id)):
	try:
	print(f"Creating LlamaCpp LLM model_id:{model_id}")
	return LlamaCpp(model_path=model_id, verbose=True, n_batch=521, seed=79, alpha_value=1,rope_freq_base=10000,compress_pos_emb=ctx_len / 4096, n_ctx=ctx_len, load_in_4bit=load_4bit, load_in_8bit=load_8bit, temperature=temperature,top_p=top_p)
	except Exception as ex:
	try:
	print(f"Creating CTransformers LLM model_id:{model_id}")
	config = {
	"context_length": ctx_len,
	"batch_size":521,
	"seed":79,
	"top_p":top_p,
	"temperature":temperature
	}
	return CTransformers(model=model_id, model_type='llama', seed=79, config=config)

	except Exception as ex:
	print(f"Load Error {str(ex)}")
	return None
	else:
	print(f"Trying AirLLM to load model_id:{model_id}")
	return AirLLM(llama2_model_id=model_id, max_len=ctx_len, compression=("4bit" if load_4bit else "8bit" if load_8bit else ""))

	# Class to store pages and run queries
	class StorageRetrievalLLM:

	def __init__(self, stored_pages_folder : str, llm, embeddings):

	# Initialize storage
	os.makedirs(stored_pages_folder, exist_ok=True)
	self.stored_pages_folder = stored_pages_folder
	self.llm = llm
	self.embeddings = embeddings

	# Try loading existing, else create new
	try:
	print(f"Loading StorageRetrievalLLM from disk")
	self.vectorstore = FAISS.load_local(folder_path=stored_pages_folder, embeddings=embeddings)
	self.chain = self.create_chain()
	except:
	print(f"Initializing a new instance of StorageRetrievalLLM")

	print(f"Loading PDF")
	self.vectorstore = None
	self.chain = None

	# Load pages
	loader = DirectoryLoader(stored_pages_folder, glob="*/.pdf", loader_cls=PyMuPDFLoader)
	documents = loader.load()

	# Split into chunks
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=EMBD_CHUNK_SIZE, chunk_overlap=100)
	documents = text_splitter.split_documents(documents)

	if len(documents) > 0:
	# Create index
	print(f"Creating FAISS index FROM {len(documents)} documents")
	self.vectorstore = FAISS.from_documents(documents, embeddings)
	self.vectorstore.save_local(folder_path=stored_pages_folder)
	else:
	print(f"Initializing with empty FAISS index")
	self.vectorstore = FAISS.from_texts(["Knowledge Base: Use the learning tools (learnOnline, wikipedia, etc...) to increase tour knownledge."], embeddings)

	if llm:
	# Create chain
	self.chain = self.create_chain()

	# Helper to create retrieval chain
	def create_chain(self, vectorstore = None, llm = None, embeddings = None):
	if vectorstore is None:
	vectorstore = self.vectorstore
	if llm is None:
	llm = self.llm
	if embeddings is None:
	embeddings = self.embeddings

	print(f"Creating Retriever llm chain")
	retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
	chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, return_source_documents=False)
	return chain

	# Add URL
	def addUrlToMemory(self, url : str, summarize = True):

	loader = RecursiveUrlLoader(url=url, max_depth=2, extractor=lambda x: BeautifulSoup(x, "html.parser").text)
	docs = loader.load()

	# Split
	splitter = RecursiveCharacterTextSplitter()
	documents = splitter.split_documents(docs)

	# Add
	self.vectorstore.add_documents(documents)

	# Update chain
	self.chain = self.create_chain()

	# Summarize
	if summarize:
	return self.query(query=f"return a short summary about the website {url}, try to not exceed 3500 tokens")
	else:
	return f"URL {url} Parsed and collected into memory vectorstore..."

	# Add document
	def addDocumentToMemory(self, doc : str, summarize = True):

	# Load file
	file_path = doc if os.path.exists(doc) else os.path.join("data", doc)
	self.stored_pages_folder
	# loader = DirectoryLoader(file_path, glob="*/.pdf", loader_cls=PyMuPDFLoader)
	# documents = loader.load()
	loader = PyPDFLoader(file_path)
	documents = loader.load_and_split()
	self.vectorstore.add_documents(documents)

	# Update chain
	self.chain = self.create_chain()

	# Summarize
	if summarize:
	return self.query(query=f"return a short summary about the doc {file_path}, try to not exceed 3500 tokens")
	else:
	return f"File {file_path} Parsed and collected into memory vectorstore..."

	# Add text file
	def addTextFileToMemory(self, file_path : str, summarize = True):

	# Load file
	loader = PyPDFLoader(file_path)
	documents = loader.load_and_split()
	self.vectorstore.add_documents(documents)

	# Update chain
	self.chain = self.create_chain()

	# Summarize
	if summarize:
	return self.query(query=f"return a short summary about the file {file_path}, try to not exceed 3500 tokens")
	else:
	return f"File {file_path} Parsed and collected into memory vectorstore..."

	# Add text
	def addTextToMemory(self, text : str, summarize = True):

	# Add text
	self.vectorstore.add_texts([text])

	# Update chain
	self.chain = self.create_chain()

	# Summarize
	if summarize:
	return self.query(query=f"return a short summary about the text {text[:10]}, try to not exceed 3500 tokens")
	else:
	return "Text Parsed and collected into memory vectorstore..."

	# Run query
	def query(self, query: str, chat_history = []):
	res = self.chain({"question" : query, "chat_history" : chat_history})
	return res['answer']

	# Class for agent
	class RBotAgent:

	def __init__(self, llm, tools, max_iterations=3, observations_callback=None):
	self.llm = llm
	self.tools = tools
	self.max_iterations=max_iterations
	self.observations_callback = observations_callback

	# Get tools prompt
	def tools_prompt(self):
	return "\n".join([ f"{tool.name}(query_params)" for tool in self.tools])

	# Main handler
	def __call__(self, params):

	input = params["input"]
	chat_history = params["chat_history"]
	formatted_history = get_buffer_string(chat_history, human_prefix="USER")

	prompt = f"""
	### EXAMPLE 1:
	USER: Find me a recipe for chocolate chip cookies.
	AI: SearchAndReply("chocolate chip cookies recipe", 5)

	### EXAMPLE 2:
	USER: Show me pictures of cute puppies.
	AI: ImageGenerator("cute puppies", 512, 512)

	### EXAMPLE 3:
	USER: Explain the concept of blockchain.
	AI: KnowledgeBaseQuery("Explain blockchain")

	### EXAMPLE 4:
	USER: Find me recent news about cryptocurrency.
	AI: SearchAndReply("recent cryptocurrency news")

	### EXAMPLE 5:
	USER: Can you calculate the factorial of 5?
	AI: Calculator("factorial(5)")

	### REAL CONVERSATION:
	[INST]
	SYSTEM: You are {AI_NAME} a smart and helpful AI assistant with access to external tools and knowledge.
	Today is {str(datetime.now().date())}, please reply last user message directly or invoking a valid action from the following list:
	{self.tools_prompt()}
	[/INST]
	{formatted_history}
	USER: {input}
	AI:
	"""
	observations = []

	# Try calling tools
	tool_names = [tool.name.lower() for tool in self.tools]
	for i in range(self.max_iterations):

	print(f"iteration {i+1} - sending prompt:\n" + prompt)
	for i in [1,2,3]:
	output = str(self.llm(prompt,stop=["USER:","AI:","SYS:","[INST]","[/INST]"])).strip()
	if output: break


	return_role = output.split(":")[0]
	return_message = output[len(return_role)+1:].split("[INST]")[0].split("[/INST]")[0].split("User")[0].split("USER")[0].strip()

	# Try to parse action request
	action_name = None
	action_input = None
	matches = re.findall(r"(\w+)\((.+?)\)", return_message)
	for match in matches:
	if len(match) > 1 and match[0] and match[1]:
	if match[0].strip().lower() in tool_names:
	action_name = match[0].strip().lower()
	action_input = match[1].strip().replace("query_params", "").strip().replace("()","")
	if (action_name and action_input): break

	# Try unformatted
	if not action_name or not action_input:
	lines = output.split("\n")
	for line in lines:
	for tool in tool_names:
	if f"{tool}:" in line.lower() or f"{tool}(" in line.lower():
	action_name = tool
	action_input = line[line.lower().find(tool)+len(tool):].strip().replace("query_params", "").strip().replace("()","")
	if (len(action_input) < 2):
	action_input = None
	else:
	print(f"Matched unformatted action request. {action_name}:{action_input} from line: {line}")
	if (action_name and action_input): break
	elif action_name and not action_input:
	action_input = line[line.find(":") + 1:].replace("\"","")
	if (len(action_input) < 2):
	action_input = None
	else:
	print(f"Matched unformatted action request. {action_name}:{action_input} from line: {line}")
	if (action_name and action_input): break


	# Call tool if found
	if action_name and action_input:
	for tool in self.tools:
	if tool.name.lower() in action_name:

	params_list = action_input.split(",")
	try:
	print(f"Fixing input for tool {tool.name}, Original params list: {str(params_list)}")
	params_list = validate_and_fix_params(tool.name, params_list)
	print(f"Fixed params list: {str(params_list)}")
	print(f"Calling action:{tool.name} with input:{str(params_list)}")
	observations.append(f"Calling action:{tool.name} with input:{str(params_list)}")
	res = tool.func(*params_list)
	except Exception as ex:
	res = f"{action_name} execution error: {str(ex)}"

	print(f"Action Output: {res}")
	observations.append(f"Action Output: {res}")
	prompt = prompt + f"Action: {tool.name}({action_input})\SYSTEM:{res}"
	if (i+1 == self.max_iterations):
	print(f"MAX ITERATIONS REACHED. PLEASE PROVIDE A FINAL RESPONSE!")
	prompt = prompt + "\nMAX ITERATIONS REACHED. PLEASE PROVIDE A FINAL RESPONSE!\nAI:"
	output = str(self.llm(prompt,stop=["USER:","AI:","SYS:","SYSTEM:","[INST]","[/INST]"])).strip()
	final_response = "\n*Reasoning: ".join(observations) + f"\n{output}" if len(observations) > 0 else f"\n{output}"
	return { "output": final_response }
	else:
	prompt = prompt + "\nAI:"
	else:
	final_response = "\n*Reasoning: ".join(observations) + f"\n{output}" if len(observations) > 0 else f"\n{output}"
	print(f"Final Anser: {final_response}")
	return { "output": final_response }

	return { "output": "Max Iterations reached. Last Output:\n" + output}

	# Main agent class
	class SmartAgent:

	def __init__(self, model_id: str, conversation_model = "", emb_model="all-MiniLM-L6-v2", load_in_4bit=False, load_in_8bit=True, ctx_len=16384, temp=0.1, top_p=0.95, max_iterations=3, observations_callback = None):

	self.chat_history = []
	self.max_iterations = max_iterations
	self.model = model_id
	self.current_message = ""

	# Load LM
	self.llm = create_llm(model_id, load_4bit=load_in_4bit, load_8bit=load_in_8bit, ctx_len=ctx_len, temperature=temp, top_p=top_p)

	# Load embeddings
	self.embeddings = SentenceTransformerEmbeddings(model_name=emb_model)

	# Initialize memory
	self.memory_chain = StorageRetrievalLLM(stored_pages_folder="./knowledge_base", llm=self.llm, embeddings=self.embeddings)

	#TOOL REQUEST
	self.requests_tool = TextRequestsWrapper()

	#Wikipedia
	self.wikipedia_tool = WikipediaAPIWrapper()

	self.image2image_gen_pipe = None
	self.text2image_gen_pipe = None

	# Create agent
	self.smartAgent = self.create_smart_agent()

	print(f"Smart Agent Initialized - CUDA Support:{torch.cuda.is_available()}")

	def reset_context(self):
	self.chat_history.clear()

	# Create image
	def createImage(self, prompt : str, width : int=512, height : int=512, denoise_strength : float=0.75, guidance_scale: float=7.5, model_id : str= 'dreamshaper_8.safetensors'):
	try:
	init_image = None
	if (os.path.exists("./image_gen_guide.jpg")):
	init_image = Image.open("./image_gen_guide.jpg")

	images = []
	if init_image is None:
	if self.text2image_gen_pipe is None:
	if torch.cuda.is_available():
	print(f"Loading Stable model {model_id} into GPU")
	self.text2image_gen_pipe = StableDiffusionPipeline.from_single_file(f"{MODELS_DIR}/" + model_id, torch_dtype=torch.float16, verbose=True, use_safetensors=True)
	self.text2image_gen_pipe = self.text2image_gen_pipe.to("cuda")
	else:
	print(f"Loading Stable model {model_id} into CPU")
	self.text2image_gen_pipe = StableDiffusionPipeline.from_single_file(f"{MODELS_DIR}/" + model_id, torch_dtype=torch.float32, verbose=True, use_safetensors=True)
	self.text2image_gen_pipe = self.text2image_gen_pipe.to("cpu")
	print("generating image from promt...")
	images = self.text2image_gen_pipe(prompt, width=width, height=height).images
	else:
	if self.image2image_gen_pipe is None:
	if torch.cuda.is_available():
	print(f"Loading Stable model {model_id} into GPU")
	self.image2image_gen_pipe = StableDiffusionImg2ImgPipeline.from_single_file(f"{MODELS_DIR}/" + model_id, torch_dtype=torch.float16, verbose=True, use_safetensors=True)
	self.image2image_gen_pipe = self.image2image_gen_pipe.to("cuda")
	else:
	print(f"Loading Stable model {model_id} into CPU")
	self.image2image_gen_pipe = StableDiffusionImg2ImgPipeline.from_single_file(f"{MODELS_DIR}/" + model_id, torch_dtype=torch.float32, verbose=True, use_safetensors=True)
	self.image2image_gen_pipe = self.image2image_gen_pipe.to("cpu")
	print("generating image from promt+image...")
	init_image = init_image.convert("RGB")
	images = self.image2image_gen_pipe(prompt, image=init_image, width=width, height=height, strength=denoise_strength, guidance_scale=guidance_scale).images

	paths = []
	for image in (images if images is not None else []):
	# Create a filename based on the current date and time
	filename = f'image_{datetime.now().strftime("%Y%m%d%H%M%S")}{(len(paths)+1)}.jpg'
	# Save the image to the specified path
	file_path = f"./generated_images/{filename}"
	image.save(file_path)
	paths.append(file_path)
	return f"Generated images from prompt \"{prompt}\" saved to files: {', '.join(paths)}"
	except Exception as e:
	print(f"error in createImage: {e}")
	return "Unable to generate file"

	def load_and_split_documents(self, url, max_depth=2):
	loader = RecursiveUrlLoader(url, max_depth=max_depth, extractor=lambda x: BeautifulSoup(x, "html.parser").text)
	docs = loader.load()
	splitter = RecursiveCharacterTextSplitter()
	return splitter.split_documents(docs)

	def search_and_reply(self, query, max_results=5):
	vectorstore = None
	sources = ""
	res_cnt = 0
	results = google_free_search.gsearch(query=query)
	#urls = [ur['link'].strip() for ur in results]
	urls = []
	for result in results:
	link = result['link']
	title = result['title']
	if (link.startswith("http://") or link.startswith("https://")):
	res_cnt = res_cnt +1
	if res_cnt > max_results: break
	print(f"- Found Valid Link {title} : {link}")
	sources += f"{title}, "
	urls.append(link)
	else:
	print(f"ERROR! Invalid link: {link} for result: {title}")

	if len(urls) > 0:
	import concurrent.futures
	print(f"Loading {len(urls)} urls into a vectore store")

	with concurrent.futures.ThreadPoolExecutor() as executor:
	future_results = [executor.submit(self.load_and_split_documents, url) for url in urls]

	documents = []
	for future in concurrent.futures.as_completed(future_results):
	documents.extend(future.result())

	if len(documents) > 0:
	vectorstore = FAISS.from_documents(documents, self.embeddings)

	if vectorstore is not None:
	retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
	chain = RetrievalQA.from_chain_type(llm=self.llm, chain_type="stuff", retriever=retriever)
	response = chain.run(self.current_message + " " + datetime.now().strftime("%Y/%m/%d"))
	ret_message = response #['answer']
	return ret_message
	else:
	return f"Unable to acquire results from web search results:{len(results)} - valids:{res_cnt}"

	# Main handler
	def agent_generate_response(self, user_message):

	start_time = time.time()

	self.current_message = user_message
	# Get response
	message_response = self.smartAgent({"input" : user_message, "chat_history" : self.chat_history})

	end_time = time.time()
	elapsed_time = end_time - start_time

	# Format response
	response = message_response['output'] + f" ({round(elapsed_time,2)}s)"
	self.chat_history.append(HumanMessage(content=user_message))
	self.chat_history.append(AIMessage(content=message_response['output']))

	return response

	# Create agent
	def create_smart_agent(self):

	# Tools
	tools = [
	Tool(name="SearchAndReply", func=self.search_and_reply, description="Search web and reply"),
	Tool(name="Wikipedia", func=self.wikipedia_tool.run, description="Query Wikipedia"),
	Tool(name="ImageGenerator", func=self.createImage, description="Generate images"),
	Tool(name="KnowledgeBaseQuery", func=self.memory_chain.query, description="Query knowledge base"),
	]
	tools.extend(load_tools(["llm-math"], llm=self.llm))

	# test_reply = self.llm(f"Hello {AI_NAME}")
	# print(f"Test reply to Hello: {test_reply}")

	return RBotAgent(llm=self.llm, tools=tools, max_iterations=self.max_iterations)