# --------------------------------libraries----------------------------------- import streamlit as st #import torch import os import logging import sys from llama_index.callbacks import CallbackManager, LlamaDebugHandler from llama_index.llms import LlamaCPP from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt from llama_index.embeddings import InstructorEmbedding from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader from tqdm.notebook import tqdm from dotenv import load_dotenv # --------------------------------env variables----------------------------------- # Load environment variables load_dotenv(dotenv_path=".env") no_proxy = os.getenv("no_proxy") OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") OPENAI_API_BASE = os.getenv("OPENAI_API_BASE") # --------------------------------cache LLM----------------------------------- logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) llama_debug = LlamaDebugHandler(print_trace_on_end=True) callback_manager = CallbackManager([llama_debug]) # LLM @st.cache_resource def load_llm_model(): if not os.path.exists("models"): st.error("models directory does not exist. Please download and copy paste a model in folder models.") os.makedirs("models") return None # llm = LlamaCPP( #model_url="https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q5_K_M.gguf", model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf", temperature=0.0, max_new_tokens=100, context_window=1024, generate_kwargs={}, model_kwargs={"n_gpu_layers": 20}, messages_to_prompt=messages_to_prompt, completion_to_prompt=completion_to_prompt, verbose=True, ) return llm llm = load_llm_model() # --------------------------------cache Embedding model----------------------------------- @st.cache_resource def load_emb_model(): if not os.path.exists("data"): st.error("Data directory does not exist. Please upload the data.") os.makedirs("data") return None # embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base" #model_name="hkunlp/instructor-base" ) service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm) documents = SimpleDirectoryReader("data").load_data() print(f"Number of documents: {len(documents)}") index = VectorStoreIndex.from_documents( documents, service_context=service_context, show_progress=True) return index.as_query_engine() query_engine = load_emb_model() # ------------------------------------layout---------------------------------------- with st.sidebar: api_server_info = st.text_input("Local LLM API server", OPENAI_API_BASE ,key="openai_api_base") st.title("šŸ¤– Llama Index šŸ“š") if st.button('Clear Memory'): st.session_state.memory = "" st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp") st.write("šŸš€ This app allows you to chat with local LLM using api server or loaded in cache") st.subheader("šŸ’» System Requirements: ") st.markdown("- CPU: the faster the better ") st.markdown("- RAM: 16 GB or higher") st.markdown("- GPU: optional but very useful for Cuda acceleration") st.subheader("Developer Information:") st.write("This app is developed and maintained by **@mohcineelharras**") # Define your app's tabs tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"]) # -----------------------------------LLM only--------------------------------------------- if 'memory' not in st.session_state: st.session_state.memory = "" #token_count = 0 with tab1: st.title("šŸ’¬ LLM only") prompt = st.text_input( "Ask your question here", placeholder="Who is Lionel Messi", ) template = ( "system\n" "You are Dolphin, a helpful AI assistant. Your responses should be based solely on the content of documents you have access to. " "Do not provide information that is not contained in the documents. " "If a question is asked about content not in the documents, respond with 'I do not have that information.' " "Always respond in the same language as the question was asked. Be concise.\n" "user\n" "{prompt}\n" "assistant\n" ) if prompt: contextual_prompt = st.session_state.memory + "\n" + prompt formatted_prompt = template.format(prompt=contextual_prompt) response = llm.complete(formatted_prompt,max_tokens=100, temperature=0, top_p=0.95, top_k=10) #print(response) text_response = response #--------------------------------------------- # text_response = response["choices"][0]["text"] # token_count += response["usage"]["total_tokens"] # st.write("LLM's Response:\n", text_response) # st.write("Token count:\n", token_count) #--------------------------------------------- st.write("LLM's Response:\n",text_response) st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}" #st.write("Memory:\n", memory) with open("short_memory.txt", 'w') as file: file.write(st.session_state.memory) # -----------------------------------LLM Q&A------------------------------------------------- with tab2: st.title("šŸ’¬ LLM RAG QA with database") st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/blob/main/data") prompt = st.text_input( "Ask your question here", placeholder="How does the blockchain work ?", ) if prompt: response = query_engine.query(prompt) st.write("Your prompt: ", prompt) st.write("LLM's Response:\n"+ response.response) with st.expander("Document Similarity Search"): for i, node in enumerate(response.source_nodes): dict_source_i = node.node.metadata dict_source_i.update({"Text":node.node.text}) st.write("Source nĀ°"+str(i+1), dict_source_i) st.write() # -----------------------------------Upload File Q&A----------------------------------------- def load_emb_uploaded_document(filename): # You may want to add a check to prevent execution during initialization. if 'init' in st.session_state: embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base") service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm) documents = SimpleDirectoryReader(input_files=[filename]).load_data() index = VectorStoreIndex.from_documents( documents, service_context=service_context, show_progress=True) return index.as_query_engine() return None with tab3: st.title("šŸ“ One single document Q&A with Llama Index using local open llms") uploaded_file = st.file_uploader("Upload an File", type=("txt", "csv", "md","pdf")) question = st.text_input( "Ask something about the files", placeholder="Can you give me a short summary?", disabled=not uploaded_file, ) if 'init' not in st.session_state: st.session_state.init = True if uploaded_file: if not os.path.exists("draft_docs"): st.error("draft_docs directory does not exist. Please download and copy paste a model in folder models.") os.makedirs("draft_docs") with open("draft_docs/"+uploaded_file.name, "wb") as f: text = uploaded_file.read() f.write(text) text = uploaded_file.read() # if load_emb_uploaded_document: # load_emb_uploaded_document.clear() #load_emb_uploaded_document.clear() query_engine = load_emb_uploaded_document("draft_docs/"+uploaded_file.name) st.write("File ",uploaded_file.name, "was loaded successfully") if uploaded_file and question and api_server_info: response = prompt = f"""Based on the context presented. Respond to the question below to the best of your ability. \n\n{question}""" response = query_engine.query(prompt) st.write("### Answer") st.write(response.response) with st.expander("Document Similarity Search"): #st.write(len(response.source_nodes)) for i, node in enumerate(response.source_nodes): dict_source_i = node.node.metadata dict_source_i.update({"Text":node.node.text}) st.write("Source nĀ°"+str(i+1), dict_source_i) #st.write("Source nĀ°"+str(i)) #st.write("Meta Data :", node.node.metadata) #st.write("Text :", node.node.text) #st.write() #print("Is File uploaded : ",uploaded_file==True, "Is question asked : ", question==True, "Is question asked : ", api_server_info==True) st.markdown("""
Repository LinkedIn GitHub
Ā© 2023 Mohcine EL HARRAS
""", unsafe_allow_html=True) # -----------------------------------end-----------------------------------------