Spaces:
Runtime error
Runtime error
import os | |
import re | |
from datetime import datetime, timedelta | |
from threading import Thread | |
import asyncio | |
import requests | |
import streamlit as st | |
import json | |
import time | |
from bs4 import BeautifulSoup | |
from PIL import Image | |
import base64 | |
import io | |
import google_free_search | |
from langchain.vectorstores import FAISS # For storing embeddings | |
from langchain.chains import RetrievalQA, ConversationalRetrievalChain # Chains for QA | |
from langchain.utilities import TextRequestsWrapper, WikipediaAPIWrapper # Tools | |
from langchain.document_loaders import DirectoryLoader, PyMuPDFLoader, PyPDFLoader, TextLoader, WebBaseLoader # Loaders | |
from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader # Load URLs | |
from langchain.schema import AIMessage, HumanMessage, get_buffer_string # Chat history | |
from langchain.text_splitter import RecursiveCharacterTextSplitter # Split text | |
from langchain.llms import TextGen, LlamaCpp, CTransformers # Language models | |
from langchain.memory import ConversationBufferMemory # Chat memory | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler # Logging | |
from langchain.agents import Tool, load_tools # Tools | |
from langchain.input import get_colored_text # Console colors | |
from langchain.embeddings import ( | |
HuggingFaceEmbeddings, | |
LlamaCppEmbeddings, | |
SentenceTransformerEmbeddings, | |
) | |
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline # Image generation | |
from typing import Any, Dict, List | |
import torch | |
from diffusers.pipelines.stable_diffusion.convert_from_ckpt import download_from_original_stable_diffusion_ckpt | |
import inspect | |
from AirLLM import AirLLM | |
# Config | |
EMBD_CHUNK_SIZE = 512 | |
AI_NAME = "Agent Llama" | |
USER_NAME = "Buddy" | |
MODELS_DIR = "./models" | |
def validate_and_fix_params(tool_name, params_list): | |
try: | |
# Create a list to store the validated and fixed parameters | |
validated_params = [] | |
n_param = 0 | |
while n_param < len(params_list) and len(validated_params) < 1: | |
val = str(params_list[n_param]).replace("(", "").replace(")", "").replace("\"", "").replace("#", "").strip() | |
n_param = n_param + 1 | |
if len(val) > 2: | |
validated_params.append(val) | |
if (n_param < len(params_list) and tool_name == "ImageGenerator" and len(validated_params) < 3): | |
while n_param < len(params_list): | |
val = int(str(params_list[n_param]).replace("(", "").replace(")", "").replace("\"", "").replace("#", "").strip()) | |
n_param = n_param + 1 | |
if val > 0: | |
validated_params.append(val) | |
return validated_params | |
except Exception as e: | |
# Handle any exceptions that may occur during parameter validation | |
print(f"Error parsing params: {str(e)}") | |
return [] | |
# Helper to load LM | |
def create_llm(model_id=f"{MODELS_DIR}/deepseek-coder-6.7b-instruct.Q5_K_M.gguf", load_4bit=False, load_8bit=False, ctx_len = 8192, temperature=0.5, top_p=0.95): | |
if (model_id.startswith("http")): | |
print(f"Creating TextGen LLM base_url:{model_id}") | |
return TextGen(model_url=model_id, seed=79, callbacks=[StreamingStdOutCallbackHandler()]) | |
if (os.path.exists(model_id)): | |
try: | |
print(f"Creating LlamaCpp LLM model_id:{model_id}") | |
return LlamaCpp(model_path=model_id, verbose=True, n_batch=521, seed=79, alpha_value=1,rope_freq_base=10000,compress_pos_emb=ctx_len / 4096, n_ctx=ctx_len, load_in_4bit=load_4bit, load_in_8bit=load_8bit, temperature=temperature,top_p=top_p) | |
except Exception as ex: | |
try: | |
print(f"Creating CTransformers LLM model_id:{model_id}") | |
config = { | |
"context_length": ctx_len, | |
"batch_size":521, | |
"seed":79, | |
"top_p":top_p, | |
"temperature":temperature | |
} | |
return CTransformers(model=model_id, model_type='llama', seed=79, config=config) | |
except Exception as ex: | |
print(f"Load Error {str(ex)}") | |
return None | |
else: | |
print(f"Trying AirLLM to load model_id:{model_id}") | |
return AirLLM(llama2_model_id=model_id, max_len=ctx_len, compression=("4bit" if load_4bit else "8bit" if load_8bit else "")) | |
# Class to store pages and run queries | |
class StorageRetrievalLLM: | |
def __init__(self, stored_pages_folder : str, llm, embeddings): | |
# Initialize storage | |
os.makedirs(stored_pages_folder, exist_ok=True) | |
self.stored_pages_folder = stored_pages_folder | |
self.llm = llm | |
self.embeddings = embeddings | |
# Try loading existing, else create new | |
try: | |
print(f"Loading StorageRetrievalLLM from disk") | |
self.vectorstore = FAISS.load_local(folder_path=stored_pages_folder, embeddings=embeddings) | |
self.chain = self.create_chain() | |
except: | |
print(f"Initializing a new instance of StorageRetrievalLLM") | |
print(f"Loading PDF") | |
self.vectorstore = None | |
self.chain = None | |
# Load pages | |
loader = DirectoryLoader(stored_pages_folder, glob="**/*.pdf", loader_cls=PyMuPDFLoader) | |
documents = loader.load() | |
# Split into chunks | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=EMBD_CHUNK_SIZE, chunk_overlap=100) | |
documents = text_splitter.split_documents(documents) | |
if len(documents) > 0: | |
# Create index | |
print(f"Creating FAISS index FROM {len(documents)} documents") | |
self.vectorstore = FAISS.from_documents(documents, embeddings) | |
self.vectorstore.save_local(folder_path=stored_pages_folder) | |
else: | |
print(f"Initializing with empty FAISS index") | |
self.vectorstore = FAISS.from_texts(["Knowledge Base: Use the learning tools (learnOnline, wikipedia, etc...) to increase tour knownledge."], embeddings) | |
if llm: | |
# Create chain | |
self.chain = self.create_chain() | |
# Helper to create retrieval chain | |
def create_chain(self, vectorstore = None, llm = None, embeddings = None): | |
if vectorstore is None: | |
vectorstore = self.vectorstore | |
if llm is None: | |
llm = self.llm | |
if embeddings is None: | |
embeddings = self.embeddings | |
print(f"Creating Retriever llm chain") | |
retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) | |
chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, return_source_documents=False) | |
return chain | |
# Add URL | |
def addUrlToMemory(self, url : str, summarize = True): | |
loader = RecursiveUrlLoader(url=url, max_depth=2, extractor=lambda x: BeautifulSoup(x, "html.parser").text) | |
docs = loader.load() | |
# Split | |
splitter = RecursiveCharacterTextSplitter() | |
documents = splitter.split_documents(docs) | |
# Add | |
self.vectorstore.add_documents(documents) | |
# Update chain | |
self.chain = self.create_chain() | |
# Summarize | |
if summarize: | |
return self.query(query=f"return a short summary about the website {url}, try to not exceed 3500 tokens") | |
else: | |
return f"URL {url} Parsed and collected into memory vectorstore..." | |
# Add document | |
def addDocumentToMemory(self, doc : str, summarize = True): | |
# Load file | |
file_path = doc if os.path.exists(doc) else os.path.join("data", doc) | |
self.stored_pages_folder | |
# loader = DirectoryLoader(file_path, glob="**/*.pdf", loader_cls=PyMuPDFLoader) | |
# documents = loader.load() | |
loader = PyPDFLoader(file_path) | |
documents = loader.load_and_split() | |
self.vectorstore.add_documents(documents) | |
# Update chain | |
self.chain = self.create_chain() | |
# Summarize | |
if summarize: | |
return self.query(query=f"return a short summary about the doc {file_path}, try to not exceed 3500 tokens") | |
else: | |
return f"File {file_path} Parsed and collected into memory vectorstore..." | |
# Add text file | |
def addTextFileToMemory(self, file_path : str, summarize = True): | |
# Load file | |
loader = PyPDFLoader(file_path) | |
documents = loader.load_and_split() | |
self.vectorstore.add_documents(documents) | |
# Update chain | |
self.chain = self.create_chain() | |
# Summarize | |
if summarize: | |
return self.query(query=f"return a short summary about the file {file_path}, try to not exceed 3500 tokens") | |
else: | |
return f"File {file_path} Parsed and collected into memory vectorstore..." | |
# Add text | |
def addTextToMemory(self, text : str, summarize = True): | |
# Add text | |
self.vectorstore.add_texts([text]) | |
# Update chain | |
self.chain = self.create_chain() | |
# Summarize | |
if summarize: | |
return self.query(query=f"return a short summary about the text {text[:10]}, try to not exceed 3500 tokens") | |
else: | |
return "Text Parsed and collected into memory vectorstore..." | |
# Run query | |
def query(self, query: str, chat_history = []): | |
res = self.chain({"question" : query, "chat_history" : chat_history}) | |
return res['answer'] | |
# Class for agent | |
class RBotAgent: | |
def __init__(self, llm, tools, max_iterations=3, observations_callback=None): | |
self.llm = llm | |
self.tools = tools | |
self.max_iterations=max_iterations | |
self.observations_callback = observations_callback | |
# Get tools prompt | |
def tools_prompt(self): | |
return "\n".join([ f"{tool.name}(query_params)" for tool in self.tools]) | |
# Main handler | |
def __call__(self, params): | |
input = params["input"] | |
chat_history = params["chat_history"] | |
formatted_history = get_buffer_string(chat_history, human_prefix="USER") | |
prompt = f""" | |
### EXAMPLE 1: | |
USER: Find me a recipe for chocolate chip cookies. | |
AI: SearchAndReply("chocolate chip cookies recipe", 5) | |
### EXAMPLE 2: | |
USER: Show me pictures of cute puppies. | |
AI: ImageGenerator("cute puppies", 512, 512) | |
### EXAMPLE 3: | |
USER: Explain the concept of blockchain. | |
AI: KnowledgeBaseQuery("Explain blockchain") | |
### EXAMPLE 4: | |
USER: Find me recent news about cryptocurrency. | |
AI: SearchAndReply("recent cryptocurrency news") | |
### EXAMPLE 5: | |
USER: Can you calculate the factorial of 5? | |
AI: Calculator("factorial(5)") | |
### REAL CONVERSATION: | |
[INST] | |
SYSTEM: You are {AI_NAME} a smart and helpful AI assistant with access to external tools and knowledge. | |
Today is {str(datetime.now().date())}, please reply last user message directly or invoking a valid action from the following list: | |
{self.tools_prompt()} | |
[/INST] | |
{formatted_history} | |
USER: {input} | |
AI: | |
""" | |
observations = [] | |
# Try calling tools | |
tool_names = [tool.name.lower() for tool in self.tools] | |
for i in range(self.max_iterations): | |
print(f"iteration {i+1} - sending prompt:\n" + prompt) | |
for i in [1,2,3]: | |
output = str(self.llm(prompt,stop=["USER:","AI:","SYS:","[INST]","[/INST]"])).strip() | |
if output: break | |
return_role = output.split(":")[0] | |
return_message = output[len(return_role)+1:].split("[INST]")[0].split("[/INST]")[0].split("User")[0].split("USER")[0].strip() | |
# Try to parse action request | |
action_name = None | |
action_input = None | |
matches = re.findall(r"(\w+)\((.+?)\)", return_message) | |
for match in matches: | |
if len(match) > 1 and match[0] and match[1]: | |
if match[0].strip().lower() in tool_names: | |
action_name = match[0].strip().lower() | |
action_input = match[1].strip().replace("query_params", "").strip().replace("()","") | |
if (action_name and action_input): break | |
# Try unformatted | |
if not action_name or not action_input: | |
lines = output.split("\n") | |
for line in lines: | |
for tool in tool_names: | |
if f"{tool}:" in line.lower() or f"{tool}(" in line.lower(): | |
action_name = tool | |
action_input = line[line.lower().find(tool)+len(tool):].strip().replace("query_params", "").strip().replace("()","") | |
if (len(action_input) < 2): | |
action_input = None | |
else: | |
print(f"Matched unformatted action request. {action_name}:{action_input} from line: {line}") | |
if (action_name and action_input): break | |
elif action_name and not action_input: | |
action_input = line[line.find(":") + 1:].replace("\"","") | |
if (len(action_input) < 2): | |
action_input = None | |
else: | |
print(f"Matched unformatted action request. {action_name}:{action_input} from line: {line}") | |
if (action_name and action_input): break | |
# Call tool if found | |
if action_name and action_input: | |
for tool in self.tools: | |
if tool.name.lower() in action_name: | |
params_list = action_input.split(",") | |
try: | |
print(f"Fixing input for tool {tool.name}, Original params list: {str(params_list)}") | |
params_list = validate_and_fix_params(tool.name, params_list) | |
print(f"Fixed params list: {str(params_list)}") | |
print(f"Calling action:{tool.name} with input:{str(params_list)}") | |
observations.append(f"Calling action:{tool.name} with input:{str(params_list)}") | |
res = tool.func(*params_list) | |
except Exception as ex: | |
res = f"{action_name} execution error: {str(ex)}" | |
print(f"Action Output: {res}") | |
observations.append(f"Action Output: {res}") | |
prompt = prompt + f"Action: {tool.name}({action_input})\SYSTEM:{res}" | |
if (i+1 == self.max_iterations): | |
print(f"MAX ITERATIONS REACHED. PLEASE PROVIDE A FINAL RESPONSE!") | |
prompt = prompt + "\nMAX ITERATIONS REACHED. PLEASE PROVIDE A FINAL RESPONSE!\nAI:" | |
output = str(self.llm(prompt,stop=["USER:","AI:","SYS:","SYSTEM:","[INST]","[/INST]"])).strip() | |
final_response = "\n*Reasoning: ".join(observations) + f"\n{output}" if len(observations) > 0 else f"\n{output}" | |
return { "output": final_response } | |
else: | |
prompt = prompt + "\nAI:" | |
else: | |
final_response = "\n*Reasoning: ".join(observations) + f"\n{output}" if len(observations) > 0 else f"\n{output}" | |
print(f"Final Anser: {final_response}") | |
return { "output": final_response } | |
return { "output": "Max Iterations reached. Last Output:\n" + output} | |
# Main agent class | |
class SmartAgent: | |
def __init__(self, model_id: str, conversation_model = "", emb_model="all-MiniLM-L6-v2", load_in_4bit=False, load_in_8bit=True, ctx_len=16384, temp=0.1, top_p=0.95, max_iterations=3, observations_callback = None): | |
self.chat_history = [] | |
self.max_iterations = max_iterations | |
self.model = model_id | |
self.current_message = "" | |
# Load LM | |
self.llm = create_llm(model_id, load_4bit=load_in_4bit, load_8bit=load_in_8bit, ctx_len=ctx_len, temperature=temp, top_p=top_p) | |
# Load embeddings | |
self.embeddings = SentenceTransformerEmbeddings(model_name=emb_model) | |
# Initialize memory | |
self.memory_chain = StorageRetrievalLLM(stored_pages_folder="./knowledge_base", llm=self.llm, embeddings=self.embeddings) | |
#TOOL REQUEST | |
self.requests_tool = TextRequestsWrapper() | |
#Wikipedia | |
self.wikipedia_tool = WikipediaAPIWrapper() | |
self.image2image_gen_pipe = None | |
self.text2image_gen_pipe = None | |
# Create agent | |
self.smartAgent = self.create_smart_agent() | |
print(f"Smart Agent Initialized - CUDA Support:{torch.cuda.is_available()}") | |
def reset_context(self): | |
self.chat_history.clear() | |
# Create image | |
def createImage(self, prompt : str, width : int=512, height : int=512, denoise_strength : float=0.75, guidance_scale: float=7.5, model_id : str= 'dreamshaper_8.safetensors'): | |
try: | |
init_image = None | |
if (os.path.exists("./image_gen_guide.jpg")): | |
init_image = Image.open("./image_gen_guide.jpg") | |
images = [] | |
if init_image is None: | |
if self.text2image_gen_pipe is None: | |
if torch.cuda.is_available(): | |
print(f"Loading Stable model {model_id} into GPU") | |
self.text2image_gen_pipe = StableDiffusionPipeline.from_single_file(f"{MODELS_DIR}/" + model_id, torch_dtype=torch.float16, verbose=True, use_safetensors=True) | |
self.text2image_gen_pipe = self.text2image_gen_pipe.to("cuda") | |
else: | |
print(f"Loading Stable model {model_id} into CPU") | |
self.text2image_gen_pipe = StableDiffusionPipeline.from_single_file(f"{MODELS_DIR}/" + model_id, torch_dtype=torch.float32, verbose=True, use_safetensors=True) | |
self.text2image_gen_pipe = self.text2image_gen_pipe.to("cpu") | |
print("generating image from promt...") | |
images = self.text2image_gen_pipe(prompt, width=width, height=height).images | |
else: | |
if self.image2image_gen_pipe is None: | |
if torch.cuda.is_available(): | |
print(f"Loading Stable model {model_id} into GPU") | |
self.image2image_gen_pipe = StableDiffusionImg2ImgPipeline.from_single_file(f"{MODELS_DIR}/" + model_id, torch_dtype=torch.float16, verbose=True, use_safetensors=True) | |
self.image2image_gen_pipe = self.image2image_gen_pipe.to("cuda") | |
else: | |
print(f"Loading Stable model {model_id} into CPU") | |
self.image2image_gen_pipe = StableDiffusionImg2ImgPipeline.from_single_file(f"{MODELS_DIR}/" + model_id, torch_dtype=torch.float32, verbose=True, use_safetensors=True) | |
self.image2image_gen_pipe = self.image2image_gen_pipe.to("cpu") | |
print("generating image from promt+image...") | |
init_image = init_image.convert("RGB") | |
images = self.image2image_gen_pipe(prompt, image=init_image, width=width, height=height, strength=denoise_strength, guidance_scale=guidance_scale).images | |
paths = [] | |
for image in (images if images is not None else []): | |
# Create a filename based on the current date and time | |
filename = f'image_{datetime.now().strftime("%Y%m%d%H%M%S")}{(len(paths)+1)}.jpg' | |
# Save the image to the specified path | |
file_path = f"./generated_images/{filename}" | |
image.save(file_path) | |
paths.append(file_path) | |
return f"Generated images from prompt \"{prompt}\" saved to files: {', '.join(paths)}" | |
except Exception as e: | |
print(f"error in createImage: {e}") | |
return "Unable to generate file" | |
def load_and_split_documents(self, url, max_depth=2): | |
loader = RecursiveUrlLoader(url, max_depth=max_depth, extractor=lambda x: BeautifulSoup(x, "html.parser").text) | |
docs = loader.load() | |
splitter = RecursiveCharacterTextSplitter() | |
return splitter.split_documents(docs) | |
def search_and_reply(self, query, max_results=5): | |
vectorstore = None | |
sources = "" | |
res_cnt = 0 | |
results = google_free_search.gsearch(query=query) | |
#urls = [ur['link'].strip() for ur in results] | |
urls = [] | |
for result in results: | |
link = result['link'] | |
title = result['title'] | |
if (link.startswith("http://") or link.startswith("https://")): | |
res_cnt = res_cnt +1 | |
if res_cnt > max_results: break | |
print(f"- Found Valid Link {title} : {link}") | |
sources += f"{title}, " | |
urls.append(link) | |
else: | |
print(f"ERROR! Invalid link: {link} for result: {title}") | |
if len(urls) > 0: | |
import concurrent.futures | |
print(f"Loading {len(urls)} urls into a vectore store") | |
with concurrent.futures.ThreadPoolExecutor() as executor: | |
future_results = [executor.submit(self.load_and_split_documents, url) for url in urls] | |
documents = [] | |
for future in concurrent.futures.as_completed(future_results): | |
documents.extend(future.result()) | |
if len(documents) > 0: | |
vectorstore = FAISS.from_documents(documents, self.embeddings) | |
if vectorstore is not None: | |
retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) | |
chain = RetrievalQA.from_chain_type(llm=self.llm, chain_type="stuff", retriever=retriever) | |
response = chain.run(self.current_message + " " + datetime.now().strftime("%Y/%m/%d")) | |
ret_message = response #['answer'] | |
return ret_message | |
else: | |
return f"Unable to acquire results from web search results:{len(results)} - valids:{res_cnt}" | |
# Main handler | |
def agent_generate_response(self, user_message): | |
start_time = time.time() | |
self.current_message = user_message | |
# Get response | |
message_response = self.smartAgent({"input" : user_message, "chat_history" : self.chat_history}) | |
end_time = time.time() | |
elapsed_time = end_time - start_time | |
# Format response | |
response = message_response['output'] + f" ({round(elapsed_time,2)}s)" | |
self.chat_history.append(HumanMessage(content=user_message)) | |
self.chat_history.append(AIMessage(content=message_response['output'])) | |
return response | |
# Create agent | |
def create_smart_agent(self): | |
# Tools | |
tools = [ | |
Tool(name="SearchAndReply", func=self.search_and_reply, description="Search web and reply"), | |
Tool(name="Wikipedia", func=self.wikipedia_tool.run, description="Query Wikipedia"), | |
Tool(name="ImageGenerator", func=self.createImage, description="Generate images"), | |
Tool(name="KnowledgeBaseQuery", func=self.memory_chain.query, description="Query knowledge base"), | |
] | |
tools.extend(load_tools(["llm-math"], llm=self.llm)) | |
# test_reply = self.llm(f"Hello {AI_NAME}") | |
# print(f"Test reply to Hello: {test_reply}") | |
return RBotAgent(llm=self.llm, tools=tools, max_iterations=self.max_iterations) | |